|
TensorFlow实现图像风格转换 V1算法实现
- # -*- coding: utf-8 -*-
- __author__ = 'dongfangyao'
- __date__ = '2019/2/4 下午3:33'
- __product__ = 'PyCharm'
- __filename__ = '10_image_style_conversion'
- """
- TensorFlow实现图像风格转换 V1算法
- v1算法的缺点:每次需要随机初始化图像的变量 使用GD下降 运行多次 效率低
- v1算法训练的是 图像本身
- v2算法训练的是 网络(Image Transform Net)
- v3算法:重新定义了风格损失的计算方法 放弃了Gram矩阵计算相似度 用了最match小块分割方法(patch为单位)
- """
- import os
- import numpy as np
- import tensorflow as tf
- import time
- from PIL import Image
- import matplotlib.pyplot as plt
- VGG_MEAN = [103.939, 116.779, 123.68]
- class VGGNet:
- """
- 构建VGG16的网络结构 并从预处理模型中加载训练好的参数
- """
- def __init__(self, data_dict):
- self.data_dict = data_dict
- def get_conv_kernel(self, name):
- # 卷积核的参数 w:0 bias:1
- return tf.constant(self.data_dict[name][0], name='conv')
- def get_fc_weight(self, name):
- return tf.constant(self.data_dict[name][0], name='fc')
- def get_bias(self, name):
- return tf.constant(self.data_dict[name][1], name='bias')
- def conv_layer(self, inputs, name):
- """构建一个卷积计算层"""
- # 多使用name_scope的好处:1、防止参数命名冲突 2、可视化的显示规整
- with tf.name_scope(name):
- conv_w = self.get_conv_kernel(name)
- conv_b = self.get_bias(name)
- # tf.layers.conv2d() 里面没有参数的 不能用了
- result = tf.nn.conv2d(inputs, conv_w, [1, 1, 1, 1], padding='SAME')
- result = tf.nn.bias_add(result, conv_b)
- result = tf.nn.relu(result)
- return result
- def pooling_layer(self, inputs, name):
- """构建一个池化层 tf.layers.max_pooling2d()"""
- result = tf.nn.max_pool(inputs, [1, 2, 2, 1], [1, 2, 2, 1], padding='SAME', name=name)
- return result
- def fc_layer(self, inputs, name, activation=tf.nn.relu):
- """构建全连接层的计算"""
- with tf.name_scope(name):
- fc_w = self.get_fc_weight(name)
- fc_b = self.get_bias(name)
- result = tf.nn.bias_add(tf.matmul(inputs, fc_w), fc_b)
- if activation is None:
- return result
- else:
- return activation(result)
- def flatten_op(self, inputs, name):
- """展平操作 tf.layers.flatten()"""
- with tf.name_scope(name):
- # [N H W C]---> [N H*W*C]
- x_shape = inputs.get_shape().as_list()
- dim = 1
- for d in x_shape[1:]:
- dim *= d
- inputs = tf.reshape(inputs, shape=[-1, dim])
- return inputs
- def build(self, input_rgb):
- """构建vgg16网络 提取特征 FP过程
- 参数:
- --input_rgb: [1, 224, 224, 3]
- """
- start_time = time.time()
- print('building start...')
- # r, g, b = tf.split(value=input_rgb, num_or_size_splits=[1, 1, 1], axis=3)
- r, g, b = tf.split(value=input_rgb, num_or_size_splits=3, axis=3)
- # 输入vgg的图像是bgr的顺序(跟opencv一样 倒序的)而不是rgb
- x_bgr = tf.concat(values=[
- b - VGG_MEAN[0],
- g - VGG_MEAN[1],
- r - VGG_MEAN[2]
- ], axis=3)
- assert x_bgr.get_shape().as_list()[1:] == [224, 224, 3]
- # stage 1
- self.conv1_1 = self.conv_layer(x_bgr, 'conv1_1')
- self.conv1_2 = self.conv_layer(self.conv1_1, 'conv1_2')
- self.pool1 = self.pooling_layer(self.conv1_2, 'pool1')
- # stage 2
- self.conv2_1 = self.conv_layer(self.pool1, 'conv2_1')
- self.conv2_2 = self.conv_layer(self.conv2_1, 'conv2_2')
- self.pool2 = self.pooling_layer(self.conv2_2, 'pool2')
- # stage 3
- self.conv3_1 = self.conv_layer(self.pool2, 'conv3_1')
- self.conv3_2 = self.conv_layer(self.conv3_1, 'conv3_2')
- self.conv3_3 = self.conv_layer(self.conv3_2, 'conv3_3')
- self.pool3 = self.pooling_layer(self.conv3_3, 'pool3')
- # stage 4
- self.conv4_1 = self.conv_layer(self.pool3, 'conv4_1')
- self.conv4_2 = self.conv_layer(self.conv4_1, 'conv4_2')
- self.conv4_3 = self.conv_layer(self.conv4_2, 'conv4_3')
- self.pool4 = self.pooling_layer(self.conv4_3, 'pool4')
- # stage 5
- self.conv5_1 = self.conv_layer(self.pool4, 'conv5_1')
- self.conv5_2 = self.conv_layer(self.conv5_1, 'conv5_2')
- self.conv5_3 = self.conv_layer(self.conv5_2, 'conv5_3')
- self.pool5 = self.pooling_layer(self.conv5_3, 'pool5')
- # stage 6
- # self.flatten5 = self.flatten_op(self.pool5, 'flatten5')
- # self.fc6 = self.fc_layer(self.flatten5, 'fc6')
- # self.fc7 = self.fc_layer(self.fc6, 'fc7')
- # self.fc8 = self.fc_layer(self.fc7, 'fc8', activation=None)
- # self.prob = tf.nn.softmax(self.fc8, name='prob')
- # print(self.prob.shape)
- print('buliding finished 耗时:%4ds' % (time.time() - start_time))
- pass
- vgg16_npy_path = './vgg16.npy'
- # vgg16_data = np.load(vgg16_npy_path, encoding='latin1')
- # data_dict = vgg16_data.item()
- #
- # print(data_dict.keys())
- #
- # vgg16_for_result = VGGNet(data_dict)
- #
- # image_rgb = tf.placeholder(dtype=tf.float32, shape=[1, 224, 224, 3], name='image_rgb')
- #
- # vgg16_for_result.build(image_rgb)
- # 224 * 224
- content_img_path = './img/content.jpeg'
- style_img_path = './img/style.jpeg'
- num_steps = 100
- learning_rate = 10
- lambda_c = 0.1
- lambda_s = 500
- output_dir = './img/output_img'
- if not os.path.exists(output_dir):
- os.mkdir(output_dir)
- def initial_image(shape, mean, stddev):
- initial_image = tf.truncated_normal(shape=shape, mean=mean, stddev=stddev, dtype=tf.float32)
- return tf.Variable(initial_image)
- initial_image_result = initial_image([1, 224, 224, 3], mean=127.5, stddev=20)
- def read_img(image_name):
- img = Image.open(image_name)
- # [224 224 3]
- np_img = np.array(img)
- # np_img = tf.reshape(np_img, shape=[1, 224, 224, 3])
- # [1 224 224 3]
- # np_img = np.reshape(np_img, newshape=[1, 224, 224, 3])
- np_img = np.asarray([np_img], dtype=np.float32)
- print(np_img.shape)
- return np_img
- # plt.imshow(read_img(style_img_path)[0])
- # plt.show()
- content_img_arr_val = read_img(content_img_path)
- style_img_arr_val = read_img(style_img_path)
- content_img = tf.placeholder(dtype=tf.float32, shape=[1, 224, 224, 3], name='content_img')
- style_img = tf.placeholder(dtype=tf.float32, shape=[1, 224, 224, 3], name='style_img')
- # initial_image_result content_img style_img 三张图片进入vgg网络 提取特征
- vgg16_data = np.load(vgg16_npy_path, encoding='latin1')
- data_dict = vgg16_data.item()
- print(data_dict.keys())
- vgg16_for_initial_result = VGGNet(data_dict)
- vgg16_for_content_img = VGGNet(data_dict)
- vgg16_for_style_img = VGGNet(data_dict)
- vgg16_for_content_img.build(content_img)
- vgg16_for_style_img.build(style_img)
- vgg16_for_initial_result.build(initial_image_result)
- # 定义提取哪些层的特征 cnn的
- # 内容特征 越低层越精细
- content_features = [
- vgg16_for_content_img.conv1_2,
- vgg16_for_content_img.conv2_2,
- # vgg16_for_content_img.conv3_3,
- # vgg16_for_content_img.conv4_3,
- # vgg16_for_content_img.conv5_3
- ]
- # 结果的内容特征必须与内容特征一致
- result_content_features = [
- vgg16_for_initial_result.conv1_2,
- vgg16_for_initial_result.conv2_2,
- # vgg16_for_initial_result.conv3_3,
- # vgg16_for_initial_result.conv4_3,
- # vgg16_for_initial_result.conv5_3
- ]
- # 风格特征 越高层越抽象
- style_features = [
- # vgg16_for_style_img.conv1_2,
- # vgg16_for_style_img.conv2_2,
- # vgg16_for_style_img.conv3_3,
- vgg16_for_style_img.conv4_3,
- vgg16_for_style_img.conv5_3
- ]
- # 结果的风格特征必须与风格特征一致
- result_style_features = [
- # vgg16_for_initial_result.conv1_2,
- # vgg16_for_initial_result.conv2_2,
- # vgg16_for_initial_result.conv3_3,
- vgg16_for_initial_result.conv4_3,
- vgg16_for_initial_result.conv5_3
- ]
- # 开始计算损失
- content_loss = tf.zeros(shape=1, dtype=tf.float32)
- # zip([1, 2], [3, 4]) ---> [(1, 3), (2, 4)] 两个数组变成 一个数组
- # c与c_的shape(卷积激励之后):[1 height width channel] 在通道axis=[1, 2, 3]求平均
- # loss = mse 平方差损失函数
- for c, c_ in zip(content_features, result_content_features):
- content_loss += tf.reduce_mean(tf.square(c - c_), axis=[1, 2, 3])
- pass
- # 计算Gram矩阵
- def gram_matrix(x):
- """Gram矩阵计算 k个feature_map 两两之间的关联性 相似度 k*k的矩阵
- Args:
- ---x feature_map from Conv层 shape:[1 height width channels] """
- b, h ,w, ch = x.get_shape().as_list()
- features = tf.reshape(x, shape=[b, h*w, ch])
- # [ch ch] = [ch h*w] 矩阵相乘 [h*w ch]
- # 除以维度相乘是为了防止 值过大 除以统一的数
- # features是三维的 导致gram矩阵也是三维的
- gram = tf.matmul(features, features, adjoint_a=True) / tf.constant(h*w*ch, tf.float32)
- # features是2维的 导致gram矩阵也是2维的
- # gram = tf.matmul(tf.matrix_transpose(features[0]), features[0]) / tf.constant(h*w*ch, tf.float32)
- return gram
- pass
- style_gram_matrix = [gram_matrix(feature) for feature in style_features]
- result_style_gram_matrix = [gram_matrix(feature) for feature in result_style_features]
- style_loss = tf.zeros(shape=1, dtype=tf.float32)
- for s, s_ in zip(style_gram_matrix, result_style_gram_matrix):
- # loss = mse 平方差损失函数
- # axis=[0, 1, 2, 3] 这里会报错 gram矩阵输出是三维的
- style_loss += tf.reduce_mean(tf.square(s - s_), axis=[1, 2])
- # gram矩阵输出是二维的
- # style_loss += tf.reduce_mean(tf.square(s - s_), axis=[0, 1])
- pass
- loss = content_loss * lambda_c + style_loss * lambda_s
- with tf.name_scope('train'):
- train_op = tf.train.AdamOptimizer(learning_rate).minimize(loss)
- init_op = tf.global_variables_initializer()
- with tf.Session() as sess:
- sess.run(init_op)
- for step in range(num_steps):
- loss_value, content_loss_value, style_loss_value, _ = \
- sess.run(fetches=[loss, content_loss, style_loss, train_op], feed_dict={
- content_img: content_img_arr_val,
- style_img: style_img_arr_val
- })
- print('step:%d loss_value:%8.4f content_loss_value:%8.4f style_loss_value:%8.4f'
- % (step+1, loss_value[0], content_loss_value[0], style_loss_value[0]))
- # result_image:shape [224, 224, 3]
- result_image = initial_image_result.eval(sess)[0]
- # np.clip值裁剪 小于0的变为0 大于255的变为255
- result_image = np.clip(result_image, 0, 255)
- result_image = np.asarray(result_image, dtype=np.uint8)
- # np_img = np.asarray([np_img], dtype=np.float32)
- img = Image.fromarray(result_image)
- result_image_path = os.path.join(output_dir, 'result-%05d.jpg' % (step + 1))
- img.save(result_image_path)
- pass
复制代码
|
|