|
59、RNN实战案例:手写数字图片识别(分类)_笔记
- # -*- coding: utf-8 -*-
- __author__ = u'东方耀 微信:dfy_88888'
- __date__ = '2019/4/14 9:16'
- __product__ = 'PyCharm'
- __filename__ = 'rnn_mnist_demo'
- """
- 59、RNN实战案例:手写数字图片识别(分类)
- tf开发 三个模块:
- 1、数据提供者 api: next_batch(batch_size)
- 2、构建数据流图 模型结构 input LSTM fc output metric(loss accuracy) train_op
- 3、执行训练的代码 sess.run()
- """
- import tensorflow as tf
- from tensorflow.examples.tutorials.mnist import input_data
- import numpy as np
- import math
- import matplotlib.pyplot as plt
- tf.logging.set_verbosity(tf.logging.INFO)
- # 第一步:导入数据
- # 数据加载
- mnist = input_data.read_data_sets('mnist_data/', one_hot=True)
- # 样本数 55000
- print(mnist.train.num_examples)
- # images 特征矩阵 784个特征属性
- print(mnist.train.images.shape)
- # labels 目标属性10个 刚好是10个数字 进行了哑编码
- print(mnist.train.labels.shape)
- print('测试集shape:{}'.format(mnist.test.images.shape))
- print('验证集shape:{}'.format(mnist.validation.images.shape))
- # 手写数字识别的数据集主要包含三个部分:训练集(5.5w, mnist.train)、测试集(1w, mnist.test)、验证集(0.5w, mnist.validation)
- # 手写数字图片大小是28*28*1像素的图片(黑白),也就是每个图片由784维的特征描述
- # 第二步:设置超参并定义学习率调整策略
- learn_rate_base = 0.01
- # 时序长度 t1 t2 t3... t28 28个时刻
- num_timesteps = 28
- # 每个时刻输入的数据维度大小(像素值个数)
- input_size = 28
- # 一个LSTM的FP过程完成后 刚好就是处理了一张图片
- # (图片内部的像素点之间是有一定的依赖关系的)上下文关系 就可以用RNN
- num_lstm_layers = 2
- num_lstm_nodes = [128, 256]
- # 控制lstm梯度的大小 1.0是梯度的上限 如果梯度大于1.0就设为1.0 截断
- clip_lstm_grads = 1.0
- # 展示信息的间隔大小
- display_step = 10
- # 输入的样本维度大小信息 784 = 28*28
- input_dim = mnist.train.images.shape[1]
- # 输出的维度大小信息 10
- class_num = mnist.train.labels.shape[1]
- tf.logging.info('input_dim : %d' % input_dim)
- tf.logging.info('class_num : %d' % class_num)
- # 查看手写图片数据 10张图片数据与对应的y值
- # train_images, train_labels = mnist.train.next_batch(10)
- # print(train_images.shape)
- # print(train_labels.shape)
- # train_image = train_images[0]
- # train_label = train_labels[0]
- # print('实际值:', train_label)
- #
- # train_image = np.array(train_image)
- # train_image = train_image.reshape(28, 28)
- # fig = plt.figure()
- # plt.imshow(train_image, cmap='binary')
- # plt.show()
- def learn_rate_func(epoch):
- """
- 根据给定的迭代批次,更新产生一个学习率的值 均匀分布策略
- :param epoch:
- :return:
- """
- return max(0.001, learn_rate_base * (0.9 ** int(epoch / 10)))
- #
- # 第三步:开始构建模型 设置输入数据的占位符
- _X = tf.placeholder(dtype=tf.float32, shape=[None, input_dim], name='X_origianl')
- y = tf.placeholder(tf.int32, [None, class_num], name='y')
- # batch_size是一个int32类型的标量tensor的占位符,
- # 使用batch_size可以让我们在训练和测试的时候使用不同的数据量
- batch_size = tf.placeholder(tf.int32, [])
- # # dropout的时候,保留率多少
- keep_prob = tf.placeholder(tf.float32, [])
- learn_rate = tf.placeholder(tf.float32, [])
- # 第四步:构建RNN LSTM 网络(直接将网络结构翻译成为这个代码)
- # 1. 输入的数据格式转换
- # X格式:[batch_size, num_timesteps, input_size]
- X = tf.reshape(_X, [-1, num_timesteps, input_size])
- # LSTM层
- # 开方(输入层size + 输出层size)
- # 初始化方法 仿照了
- scale = 1.0 / math.sqrt(input_size + num_lstm_nodes[-1]) / 3.0
- lstm_initializer = tf.random_uniform_initializer(minval=-scale, maxval=scale)
- with tf.variable_scope(name_or_scope='lstm_nn', initializer=lstm_initializer):
- cells = []
- for i in range(num_lstm_layers):
- cell = tf.nn.rnn_cell.LSTMCell(num_units=num_lstm_nodes[i], name='lstm_cell')
- cell = tf.nn.rnn_cell.DropoutWrapper(cell, output_keep_prob=keep_prob)
- cells.append(cell)
- # 多层的LSTM 将两层的cell合并封装成一个MultiRNNCell
- # 将多层RNN当作单层RNN来操作了
- # tf.nn.rnn_cell
- cell = tf.nn.rnn_cell.MultiRNNCell(cells, state_is_tuple=True)
- # 初始的中间的隐含状态 state=0
- initial_state = cell.zero_state(batch_size=batch_size, dtype=tf.float32)
- # 让cell去处理或计算输入的序列 rnn_outputs包含了每一步的输出
- # rnn_outputs shape : [batch_size, num_timesteps, hps.num_lstm_nodes[-1]]
- rnn_outputs, middle_hidden_state = tf.nn.dynamic_rnn(cell=cell, inputs=X, initial_state=initial_state)
- print('rnn_outputs shape:', rnn_outputs.shape)
- # last shape [batch_size, hps.num_lstm_nodes[-1]]
- last = rnn_outputs[:, -1, :]
- print('last shape: ', last.shape)
- # fc
- # fc_initializer = tf.uniform_unit_scaling_initializer(factor=1.0, dtype=tf.float32)
- fc_initializer = tf.random_uniform_initializer(-1.0, 1.0, seed=10)
- with tf.variable_scope(name_or_scope='fc', initializer=fc_initializer):
- logits = tf.layers.dense(last, class_num, name='fc')
- #
- # 第五步:构建模型的损失函数
- # 第六步:构建梯度下降的优化方法(一般用Adam 动量GD)
- # 第七步:计算模型正确率
- # metrics (loss accuracy)
- # metrics度量层(loss 与 accuracy)
- # 这一层并没有变量或参数需要训练 也就不需要初始化器 所以使用name_scope
- with tf.name_scope(name='metrics'):
- # sparse_ 用这个:强大一些
- # softmax_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=outputs, logits=logits)
- # 计算_交叉熵损失函数值 用第二个版本
- softmax_loss = tf.nn.softmax_cross_entropy_with_logits_v2(labels=y, logits=logits)
- loss = tf.reduce_mean(softmax_loss)
- # argmax [0, 2, 1, 5, 8, 2] ---> 4
- y_pred = tf.argmax(tf.nn.softmax(logits), axis=1, output_type=tf.int32)
- y_true = tf.argmax(y, axis=1, output_type=tf.int32)
- correct_pred = tf.equal(y_pred, y_true)
- accuracy = tf.reduce_mean(tf.cast(correct_pred, dtype=tf.float32))
- # train_op 训练层
- with tf.name_scope(name='train_op'):
- # 需要对训练变量的梯度进行clip hps.clip_lstm_grads
- # 获取所有训练的变量
- # train_op = tf.train.AdamOptimizer(hps.learning_rate).minimize(loss, global_step=global_step)
- train_vars = tf.trainable_variables()
- for var in train_vars:
- tf.logging.info('train variable name : %s' % var.name)
- # tf.gradients(loss, train_vars) 损失值对所有变量的梯度
- # grads得到截断后的梯度
- grads, _ = tf.clip_by_global_norm(t_list=tf.gradients(loss, train_vars), clip_norm=clip_lstm_grads)
- # 普通梯度下降 BGD SGD MBGD 动量(有速度) 冲量
- # 将梯度应用到所有的变量
- optimizer = tf.train.AdamOptimizer(learn_rate)
- train_op = optimizer.apply_gradients(grads_and_vars=zip(grads, train_vars), name='train_op')
- # 第八步:会话中执行阶段(模型的训练与迭代)
- init_op = tf.global_variables_initializer()
- num_train_steps = 500
- batch_size_train = 100
- batch_size_test = 3000
- keep_prob_train = 0.8
- keep_prob_test = 1.0
- with tf.Session() as sess:
- sess.run(init_op)
- for step in range(num_train_steps):
- batch_xs, batch_labels = mnist.train.next_batch(batch_size_train)
- loss_val, accuracy_val, _ = sess.run(fetches=[loss, accuracy, train_op], feed_dict={
- _X: batch_xs,
- y: batch_labels,
- keep_prob: keep_prob_train,
- batch_size: batch_size_train,
- learn_rate: learn_rate_func(step)
- })
- if step % display_step == 0:
- tf.logging.info('Step: %5d, loss: %3.3f, accuracy: %3.3f' % (step, loss_val, accuracy_val))
- # test
- accuracy_test = sess.run(fetches=accuracy, feed_dict={
- _X: mnist.test.images[:3000],
- y: mnist.test.labels[:3000],
- keep_prob: keep_prob_test,
- batch_size: batch_size_test,
- })
- print('测试集准确率:{}'.format(accuracy_test))
- pass
复制代码
|
|