|
02、数据集与模型train代码(基于Keras)实现
使用数据集:mnist
- # -*- coding: utf-8 -*-
- __author__ = u'东方耀 微信:dfy_88888'
- __date__ = '2019/11/6 14:30'
- __product__ = 'PyCharm'
- __filename__ = 'train_model'
- from keras.layers import Input, Conv2D, MaxPooling2D, UpSampling2D, ZeroPadding2D
- from keras.models import Model
- from keras.callbacks import TensorBoard, EarlyStopping, ModelCheckpoint
- # from keras.datasets import mnist
- import numpy as np
- import pandas as pd
- import matplotlib.pyplot as plt
- from tensorflow.examples.tutorials.mnist import input_data
- # (x_train, y_train), (x_test, y_test) = mnist.load_data()
- mnist = input_data.read_data_sets(train_dir='mnist_data', one_hot=False)
- # (55000, 784) 已经归一化到0-1之间了 dtype=float32
- x_train = mnist.train.images
- # (10000, 784) 已经归一化到0-1之间了 dtype=float32
- x_test = mnist.test.images
- # 归一化 (0, 1) MinMaxScaler
- # x_train = x_train.astype('float32') / 255.
- # x_test = x_test.astype('float32') / 255.
- x_train = np.reshape(x_train, (len(x_train), 28, 28, 1))
- x_test = np.reshape(x_test, (len(x_test), 28, 28, 1))
- noise_factor = 0.5
- x_train_noisy = x_train + noise_factor * np.random.normal(loc=0.0, scale=1.0, size=x_train.shape)
- x_test_noisy = x_test + noise_factor * np.random.normal(loc=0.0, scale=1.0, size=x_test.shape)
- # 加了噪音后 不要越界
- x_train_noisy = np.clip(x_train_noisy, 0., 1.)
- x_test_noisy = np.clip(x_test_noisy, 0., 1.)
- def plot_learning_curve(history):
- # ValueError: arrays must all be same length
- # 表格型数据 要求每一列的len一致 这里即:history.history字典里每个key对应的value长度一致
- df_history = pd.DataFrame(data=history.history)
- print(df_history)
- # print(df_history.index)
- print(df_history.columns)
- # print(df_history.dtypes)
- df_history.plot(figsize=(8, 5))
- plt.grid(True)
- # x就是DataFrame的索引
- plt.ylim(0, 0.5)
- plt.show()
- def train_model():
- input_img = Input(shape=(28, 28, 1))
- x = Conv2D(16, (3, 3), activation='relu', padding='same')(input_img)
- x = MaxPooling2D((2, 2), padding='same')(x)
- x = Conv2D(8, (3, 3), activation='relu', padding='same')(x)
- x = MaxPooling2D((2, 2), padding='same')(x)
- x = Conv2D(8, (3, 3), activation='relu', padding='same')(x)
- encoded = MaxPooling2D((2, 2), padding='same', name='encoder')(x)
- # at this point the representation is (4, 4, 8) i.e. 128-dimensional 特征向量
- x = Conv2D(8, (3, 3), activation='relu', padding='same')(encoded)
- x = UpSampling2D((2, 2))(x)
- x = Conv2D(8, (3, 3), activation='relu', padding='same')(x)
- x = UpSampling2D((2, 2))(x)
- x = Conv2D(16, (3, 3), activation='relu', padding='valid')(x)
- x = UpSampling2D((2, 2))(x)
- # 这里激活为何用sigmoid? 0-1之间的 自身也是0-1之间的
- decoded = Conv2D(1, (3, 3), activation='sigmoid', padding='same')(x)
- autoencoder = Model(inputs=input_img, outputs=decoded)
- autoencoder.summary()
- # 损失函数为何用binary_crossentropy? 图片之间的相似度越小越好?
- # binary_crossentropy 与 category_crossentropy 有何区别?
- autoencoder.compile(optimizer='adam', loss='binary_crossentropy', metrics=['mse', 'mae'])
- cb_tensorboard = TensorBoard(log_dir='./train_log')
- cb_earlystop = EarlyStopping(monitor='val_loss', min_delta=1e-3, patience=5)
- cb_save_best_model = ModelCheckpoint(filepath='./autoencoder_dfy.h5', monitor='val_loss', save_best_only=True)
- callbacks = [cb_tensorboard, cb_earlystop, cb_save_best_model]
- # 注意:X = x_train_noisy Y = x_train 就是无监督学习了 只是自动加了噪音数据noise
- # 5.5w = 55 000 1w = 1000 * 10
- # ValueError: If steps_per_epoch is set, the `batch_size` must be None.
- history = autoencoder.fit(x_train_noisy, x_train,
- epochs=55,
- batch_size=256,
- # steps_per_epoch=55,
- shuffle=True,
- validation_data=(x_test_noisy, x_test),
- # validation_steps=10,
- validation_freq=1,
- callbacks=callbacks)
- # list all data in history
- print(history.history.keys())
- fig, ax_array = plt.subplots(1, 2)
- ax1, ax2 = ax_array
- ax1.set_title('model mse')
- ax1.plot(history.history['mse'])
- ax1.plot(history.history['val_mse'])
- ax1.set_xlabel('epoch')
- ax1.set_ylabel('mse')
- ax1.legend(['train', 'validation'], loc='upper left')
- ax2.set_title('model loss')
- ax2.plot(history.history['loss'])
- ax2.plot(history.history['val_loss'])
- ax2.set_xlabel('epoch')
- ax2.set_ylabel('loss')
- ax2.legend(['train', 'validation'], loc='upper left')
- plt.show()
- plot_learning_curve(history)
- train_model()
复制代码
东方老师AI官网:http://www.ai111.vip
有任何问题可联系东方老师微信:dfy_88888
【微信二维码图片】
|
|