神经风格转换(Neural Style Transfer)在深度学习领域是一个很有意思的算法,它可以实现我们的美术大师梦,让我们把任意图片转换为名画风格。下面我们复现一下算法。
神经风格转换主要是将风格图片的风格转换到内容图片上以生成一张新的图片
第一张为内容图片,第二张为风格图片,我们要做的是把第二张图片的风格加到第一张图片上
资料链接
链接:https://pan.baidu.com/s/1OOPdIx4PiC9UUrxESSSmvg
提取码:2ky0
有一点需要注意,在很多资料中使用scipy.misc.imread读取图片,但是scipy.misc已经没有imread功能了,这里使用了plt.imread(image)
;在输出图片时使用plt.imshow(image)
和plt.show()
代码如下
import os
import sys
import time
import matplotlib.pyplot as plt # plt 用于显示图片
import numpy as np
import scipy.io
import tensorflow as tf
from matplotlib.pyplot import imshow
from PIL import Image
import nst_utils
content_image =plt.imread("images/louvre.jpg")
def compute_content_cost(a_C, a_G):
"""
计算内容代价的函数
参数:
a_C -- tensor类型,维度为(1, n_H, n_W, n_C),表示隐藏层中内容图像C的内容的激活值。
a_G -- tensor类型,维度为(1, n_H, n_W, n_C),表示隐藏层中生成图像G的内容的激活值。
返回:
J_content -- 实数,用上面的公式1计算的值。
"""
#获取a_G的维度信息
m, n_H, n_W, n_C = a_G.get_shape().as_list()
#对a_C与a_G从3维降到2维
a_C_unrolled = tf.transpose(tf.reshape(a_C, [n_H * n_W, n_C]))
a_G_unrolled = tf.transpose(tf.reshape(a_G, [n_H * n_W, n_C]))
#计算内容代价
J_content = 1/(4*n_H*n_W*n_C)*tf.reduce_sum(tf.square(tf.subtract(a_C_unrolled, a_G_unrolled)))
return J_content
def gram_matrix(A):
"""
Argument:
A -- matrix of shape (n_C, n_H*n_W) Returns:
GA -- Gram matrix of A, of shape (n_C, n_C)
"""
### START CODE HERE ### (≈1 line)
GA=tf.matmul(A,tf.transpose(A))#矩阵乘法A*AT
### END CODE HERE
return GA
def compute_layer_style_cost(a_S, a_G):
m, n_H, n_W, n_C=a_G.get_shape().as_list()
# Reshape the images to have them of shape (n_H*n_W, n_C) (≈2 lines)
a_S=tf.reshape(a_S,[n_H*n_W,n_C])
a_G=tf.reshape(a_G,[n_H*n_W,n_C])
# Computing gram_matrices for both images S and G (≈2 lines)
GS=gram_matrix(tf.transpose(a_S))
GG=gram_matrix(tf.transpose(a_G))# Computing the loss (≈1 line)
J_style_layer=tf.reduce_sum(tf.square(tf.subtract(GS,GG)))/(4*n_C*n_C*n_H*n_W*n_H*n_W)
### END CODE HERE ###
return J_style_layer
STYLE_LAYERS= [
('conv1_1', 0.2),
('conv2_1', 0.2),
('conv3_1', 0.2),
('conv4_1', 0.2),
('conv5_1', 0.2)]
def compute_style_cost(model, STYLE_LAYERS):
# initialize the overall style cost
J_style=0
for layer_name, coeff in STYLE_LAYERS:
# Select the output tensor of the currently selected layer
out=model[layer_name]
# Set a_S to be the hidden layer activation from the layer we have selected, by running the session on out
a_S=sess.run(out)
# Set a_G to be the hidden layer activation from same layer. Here, a_G references model[layer_name]# and isn't evaluated yet. Later in the code, we'll assign the image G as the model input, so that# when we run the session, this will be the activations drawn from the appropriate layer, with G as input.
a_G=out
# # Compute style_cost for the current layer
J_style_layer=compute_layer_style_cost(a_S, a_G)
# Add coeff * J_style_layer of this layer to overall style cost
J_style+=coeff*J_style_layer
return J_style
def total_cost(J_content, J_style, alpha=10, beta=40):
"""
Computes the total cost function
Arguments:
J_content -- content cost coded above
J_style -- style cost coded above
alpha -- hyperparameter weighting the importance of the content cost
beta -- hyperparameter weighting the importance of the style cost Returns: J -- total cost as defined by the formula above.
"""
### START CODE HERE ### (≈1 line)
J=alpha*J_content+beta*J_style
### END CODE HERE ###
return J
# Reset the graph
tf.reset_default_graph()
# Start interactive session
sess=tf.InteractiveSession()
content_image=plt.imread("images/louvre_small.jpg")
content_image=nst_utils.reshape_and_normalize_image(content_image)
style_image=plt.imread("images/monet.jpg")
style_image=nst_utils.reshape_and_normalize_image(style_image)
generated_image=nst_utils.generate_noise_image(content_image)
model =nst_utils.load_vgg_model("pretrained-model/imagenet-vgg-verydeep-19.mat")
# Assign the content image to be the input of the VGG model.
sess.run(model['input'].assign(content_image))
# Select the output tensor of layer conv4_2
out=model['conv4_2']
# Set a_C to be the hidden layer activation from the layer we have selected
a_C=sess.run(out)
# Set a_G to be the hidden layer activation from same layer. Here, a_G references model['conv4_2']
# and isn't evaluated yet. Later in the code, we'll assign the image G as the model input, so that
# when we run the session, this will be the activations drawn from the appropriate layer, with G as input.
a_G=out
# Compute the content cost
J_content=compute_content_cost(a_C, a_G)
# Assign the input of the model to be the "style" image
sess.run(model['input'].assign(style_image))
# Compute the style cost
J_style=compute_style_cost(model, STYLE_LAYERS)
J=total_cost(J_content,J_style,10,40)
# define optimizer (1 line)
optimizer=tf.train.AdamOptimizer(2.0)
# define train_step (1 line)
train_step=optimizer.minimize(J)
def model_nn(sess, input_image, num_iterations=200):
# Initialize global variables (you need to run the session on the initializer)
### START CODE HERE ### (1 line)
sess.run(tf.global_variables_initializer())
### END CODE HERE
#### Run the noisy input image (initial generated image) through the model. Use assign().
### START CODE HERE ### (1 line)
generated_image=sess.run(model['input'].assign(input_image))
### END CODE HERE ###
for i in range(num_iterations):
# Run the session on the train_step to minimize the total cost
### START CODE HERE ### (1 line)
sess.run(train_step)
### END CODE HERE ####
# Compute the generated image by running the session on the current model['input']
### START CODE HERE ### (1 line)
generated_image=sess.run(model['input'])
### END CODE HERE #### Print every 20 iteration
if i%20==0:
Jt, Jc, Js=sess.run([J, J_content, J_style])
print("Iteration "+str(i) +" :")
print("total cost = "+str(Jt))
print("content cost = "+str(Jc))
print("style cost = "+str(Js))
# save current generated image in the "/output" directorysave_image("output/"+str(i) +".png", generated_image)
# save last generated image
nst_utils.save_image("output/"+str(i) +".png", generated_image)
# save last generated image
nst_utils.save_image('output/generated_image.jpg', generated_image)
return generated_image
model_nn(sess, generated_image)
运行过程显示
Iteration 0 :
total cost = 5048188000.0
content cost = 7821.747
style cost = 126202744.0
Iteration 20 :
total cost = 938073400.0
content cost = 15224.606
style cost = 23448028.0
Iteration 40 :
total cost = 479490340.0
content cost = 16839.646
style cost = 11983049.0
Iteration 60 :
total cost = 311246500.0
content cost = 17609.51
style cost = 7776760.0
Iteration 80 :
total cost = 229463020.0
content cost = 17936.223
style cost = 5732091.5
Iteration 100 :
total cost = 181881710.0
content cost = 18152.766
style cost = 4542505.0
Iteration 120 :
total cost = 150853500.0
content cost = 18304.059
style cost = 3766761.5
Iteration 140 :
total cost = 128686380.0
content cost = 18429.021
style cost = 3212552.5
Iteration 160 :
total cost = 111931950.0
content cost = 18545.67
style cost = 2793662.5
Iteration 180 :
total cost = 98729336.0
content cost = 18640.434
style cost = 2463573.2
最终效果展示