TensorFlow 在 MNIST 中的应用(一)
library(tensorflow)
library(keras)
library(dplyr)
library(ggplot2)
library(reshape2)
library(tfestimators)
# 初始化数据目录
data_dir<-"mnist-data"
dir.create(data_dir,recursive=TRUE,showWarnings=FALSE)
# 下载MNIST数据集,读入R
sources <- list(
train = list(
x = "/home/wangxh/Work/tftest/Test1/Tensorflowbook.ch9/train-images-idx3-ubyte.gz",
y = "/home/wangxh/Work/tftest/Test1/Tensorflowbook.ch9/train-labels-idx1-ubyte.gz" ),
test = list(
x = "/home/wangxh/Work/tftest/Test1/Tensorflowbook.ch9/t10k-images-idx3-ubyte.gz",
y = "/home/wangxh/Work/tftest/Test1/Tensorflowbook.ch9/t10k-labels-idx1-ubyte.gz" )
)
# 读取MNIST文件(该文件是以IDX格式编码)
read_idx <- function(file) {
# 创建读取文件的二进制连接
conn <- gzfile(file, open = "rb")
#用来注册执行exit()函数前执行的终止处理程序。
on.exit(close(conn), add = TRUE)
# 以4个字节的序列形式读入‘幻数’
magic <- readBin(conn, what = "raw", n = 4, endian = "big")
ndims <- as.integer(magic[[4]])
# 读取维度(32位的整数)
dims <- readBin(conn, what = "integer", n = ndims, endian = "big")
# 其余部分作为原始向量读入
data <- readBin(conn, what = "raw", n = prod(dims), endian = "big")
# 转换为一个整数向量
converted <- as.integer(data)
# 返回1维的数组的
if (length(dims) == 1)
return(converted)
# 将3D数据打包到矩阵中
matrix(converted, nrow = dims[1], ncol = prod(dims[-1]), byrow = TRUE)
}
mnist <- rapply(sources, classes = "character", how = "list", function(url) {
# 下载URL的idx文件
target <- file.path(data_dir, basename(url))
if (!file.exists(target))
download.file(url, target)
# 读取idx格式数据
read_idx(target)
})
# 转换训练集和测试集数值归一化为0-1范围,Mnist采用的像素范围最大值是255
max(mnist$train$y)
# 255
max(mnist$test$x)
# 255
mnist$train$x <- mnist$train$x / max(mnist$train$x)
mnist$test$x <- mnist$test$x / max(mnist$test$x)
# 尝试为随机的36个图像的样本绘制图,显示像素强度
n <- 36
indices <- sample(nrow(mnist$train$x), size = n)
data <- array(mnist$train$x[indices, ], dim = c(n, 28, 28))
melted <- melt(data, varnames = c("image", "x", "y"), value.name = "intensity")
ggplot(melted, aes(x = x, y = y, fill = intensity)) +
geom_tile() +
scale_fill_continuous(name = "Pixel Intensity") +
scale_y_reverse() +
facet_wrap(~ image, nrow = sqrt(n), ncol = sqrt(n)) +
theme(
strip.background = element_blank(),
strip.text.x = element_blank(),
panel.spacing = unit(0, "lines"),
axis.text = element_blank(),
axis.ticks = element_blank()
) +
labs(
title = "MNIST Image Data",
subtitle = "Visualization of a sample of images contained in MNIST data set.",
x = NULL,
y = NULL
)
# 构造线性分类器,这部分再tensoflow1.8上可以执行,但是再2.4上面会提示AttributeError: module 'tensorflow.python.feature_column.feature_column' has no attribute 'numeric_column'错误
classifier <- linear_classifier(
feature_columns = feature_columns(
column_numeric("x", shape = shape(784L))
),
n_classes = 10L #10位数字
)
# 构造输入函数生成器
mnist_input_fn <- function(data, ...) {
input_fn(
data,
response = "y",
features = "x",
batch_size = 128,
...
)
}
# 训练分类器
train(classifier, input_fn = mnist_input_fn(mnist$train), steps = 200)
# 在测试数据集上评估分类器
evaluate(classifier, input_fn = mnist_input_fn(mnist$test), steps = 200)
# A tibble: 1 x 4
# accuracy average_loss loss global_step
# <dbl> <dbl> <dbl> <dbl>
# 1 0.905 0.345 43.6 200
# 使用我们的分类器来预测测试数据集子集的标签
predictions <- predict(classifier, input_fn = mnist_input_fn(mnist$test))
n <- 20
indices <- sample(nrow(mnist$test$x), n)
classes <- vapply(indices, function(i) {
predictions$classes[[i]]
}, character(1))
data <- array(mnist$test$x[indices, ], dim = c(n, 28, 28))
melted <- melt(data, varnames = c("image", "x", "y"), value.name = "intensity")
melted$class <- classes
image_labels <- setNames(
sprintf("Predicted: %s\nActual: %s", classes, mnist$test$y[indices]),
1:n
)
ggplot(melted, aes(x = x, y = y, fill = intensity)) +
geom_tile() +
scale_y_reverse() +
facet_wrap(~ image, ncol = 5, labeller = labeller(image = image_labels)) +
theme(
panel.spacing = unit(0, "lines"),
axis.text = element_blank(),
axis.ticks = element_blank()
) +
labs(
title = "MNIST Image Data",
subtitle = "Visualization of a sample of images contained in MNIST data set.",
x = NULL,
y = NULL
)
Written on November 15, 2020