有一篇很趣的论文《A Neural Algorithm of Artistic Style》。论文中提出了一种算法,可以将一张普通的图片,转换成具有某种艺术家的风格。
用语言来描述计算流程如下:
- 首先用一个给定参数的卷积神经网络(VGG网络),将原始的内容图片和风格图片进行函数映射,这个任务不需要网络后面的全连接层,只需要中间一些卷积层的输出即可,而且将网络中的max_pool改成了avg_pool。
- 可以利用这些中间输出,将其定义为内容输出和风格输出。内容输出是网络中较上层的映射结果,而风格输出是一些较低层映射的相关矩阵,用一个Gram矩阵计算表示。
- 将内容图片加噪音,这个将做为网络的输入,将网络的输出和内容输出进行对比,计算出内容损失函数(像素值取差值,再平方和),和风格输出进行对比,计算出风格损失函数,再整合两个损失形成总损失。
- 训练目标是总损失最小化,训练参数是第3步中的输入图片,这样得到一个图片,使内容和原来的内容图片相似,而风格和风格图片一致。
内容图片如下:
风格图片如下:
初始的输入是一个内容图片加噪音:
最后结果图片如下:
具体方法如下面代码所示:
加载包
In [1]:
import scipy.io
import numpy as np
import os
import scipy.misc
import matplotlib.pyplot as plt
import tensorflow as tf
%matplotlib inline
print ("Packages loaded")
定义网络结构
In [2]:
IMAGE_W = 800
IMAGE_H = 600
cwd = os.getcwd()
# 内容图片文档
CONTENT_IMG = cwd + "/images/Taipei101.jpg"
# 风格图片文档
STYLE_IMG = cwd + "/images/StarryNight.jpg"
# 输出结果的目录和文档名
OUTOUT_DIR = './images'
OUTPUT_IMG = 'results.png'
# VGG模型文件
VGG_MODEL = cwd + "/data/imagenet-vgg-verydeep-19.mat"
INI_NOISE_RATIO = 0.7
STYLE_STRENGTH = 500
ITERATION = 5000
CONTENT_LAYERS =[('conv4_2',1.)]
STYLE_LAYERS=[('conv1_1',1.),('conv2_1',1.5),('conv3_1',2.),('conv4_1',2.5),('conv5_1',3.)]
MEAN_VALUES = np.array([123, 117, 104]).reshape((1,1,1,3))
In [3]:
# 定义前向计算函数,如果是conv层则计算卷积,如果是pool则进行池化
def build_net(ntype, nin, nwb=None):
if ntype == 'conv':
return tf.nn.relu(tf.nn.conv2d(nin, nwb[0], strides=[1, 1, 1, 1], padding='SAME')+ nwb[1])
elif ntype == 'pool':
return tf.nn.avg_pool(nin, ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1], padding='SAME')
# 从VGG模型中提取参数
def get_weight_bias(vgg_layers, i,):
weights = vgg_layers[i][0][0][0][0][0]
weights = tf.constant(weights)
bias = vgg_layers[i][0][0][0][0][1]
bias = tf.constant(np.reshape(bias, (bias.size)))
return weights, bias
# 构建VGG模型网络结构,从现成的VGG模型文档中读取参数
# 以conv1_1层参数为例,长下面这个样子
# (<tf.Tensor 'Const_83:0' shape=(3, 3, 3, 64) dtype=float32>,
# <tf.Tensor 'Const_84:0' shape=(64,) dtype=float32>)
# conv1_1层输出长下面这个样子
# <tf.Tensor 'Relu_32:0' shape=(1, 600, 800, 64) dtype=float32>
def build_vgg19(path):
net = {}
vgg_rawnet = scipy.io.loadmat(path)
vgg_layers = vgg_rawnet['layers'][0]
net['input'] = tf.Variable(np.zeros((1, IMAGE_H, IMAGE_W, 3)).astype('float32'))
net['conv1_1'] = build_net('conv',net['input'],get_weight_bias(vgg_layers,0))
net['conv1_2'] = build_net('conv',net['conv1_1'],get_weight_bias(vgg_layers,2))
net['pool1'] = build_net('pool',net['conv1_2'])
net['conv2_1'] = build_net('conv',net['pool1'],get_weight_bias(vgg_layers,5))
net['conv2_2'] = build_net('conv',net['conv2_1'],get_weight_bias(vgg_layers,7))
net['pool2'] = build_net('pool',net['conv2_2'])
net['conv3_1'] = build_net('conv',net['pool2'],get_weight_bias(vgg_layers,10))
net['conv3_2'] = build_net('conv',net['conv3_1'],get_weight_bias(vgg_layers,12))
net['conv3_3'] = build_net('conv',net['conv3_2'],get_weight_bias(vgg_layers,14))
net['conv3_4'] = build_net('conv',net['conv3_3'],get_weight_bias(vgg_layers,16))
net['pool3'] = build_net('pool',net['conv3_4'])
net['conv4_1'] = build_net('conv',net['pool3'],get_weight_bias(vgg_layers,19))
net['conv4_2'] = build_net('conv',net['conv4_1'],get_weight_bias(vgg_layers,21))
net['conv4_3'] = build_net('conv',net['conv4_2'],get_weight_bias(vgg_layers,23))
net['conv4_4'] = build_net('conv',net['conv4_3'],get_weight_bias(vgg_layers,25))
net['pool4'] = build_net('pool',net['conv4_4'])
net['conv5_1'] = build_net('conv',net['pool4'],get_weight_bias(vgg_layers,28))
net['conv5_2'] = build_net('conv',net['conv5_1'],get_weight_bias(vgg_layers,30))
net['conv5_3'] = build_net('conv',net['conv5_2'],get_weight_bias(vgg_layers,32))
net['conv5_4'] = build_net('conv',net['conv5_3'],get_weight_bias(vgg_layers,34))
net['pool5'] = build_net('pool',net['conv5_4'])
return net
# 内容损失函数
def build_content_loss(p, x):
M = p.shape[1]*p.shape[2]
N = p.shape[3]
loss = (1./(2* N**0.5 * M**0.5 )) * tf.reduce_sum(tf.pow((x - p),2))
return loss
def gram_matrix(x, area, depth):
x1 = tf.reshape(x,(area,depth))
g = tf.matmul(tf.transpose(x1), x1)
return g
def gram_matrix_val(x, area, depth):
x1 = x.reshape(area,depth)
g = np.dot(x1.T, x1)
return g
# 风格损失函数,A为风格标准图片,G为训练后的结果图片
def build_style_loss(a, x):
M = a.shape[1]*a.shape[2]
N = a.shape[3]
A = gram_matrix_val(a, M, N )
G = gram_matrix(x, M, N )
loss = (1./(4 * N**2 * M**2)) * tf.reduce_sum(tf.pow((G - A),2))
return loss
# 读取图片函数,同时做白化
def read_image(path):
image = scipy.misc.imread(path)
image = image[np.newaxis,:IMAGE_H,:IMAGE_W,:]
image = image - MEAN_VALUES
return image
# 写图片函数
def write_image(path, image):
image = image + MEAN_VALUES
image = image[0]
image = np.clip(image, 0, 255).astype('uint8')
scipy.misc.imsave(path, image)
定义主函数
In [18]:
def main():
net = build_vgg19(VGG_MODEL)
sess = tf.Session()
sess.run(tf.initialize_all_variables())
# 建立一个纯噪音图片做为训练参数,使内容符合内容图片,而风格符合风格图片
noise_img = np.random.uniform(-20, 20, (1, IMAGE_H, IMAGE_W, 3)).astype('float32')
content_img = read_image(CONTENT_IMG)
style_img = read_image(STYLE_IMG)
# 将内容图片输入到VGG网络中,取出conv4_2层输出结果,计算内容损失
sess.run([net['input'].assign(content_img)])
cost_content = sum(map(lambda l,: l[1]*build_content_loss(sess.run(net[l[0]]) , net[l[0]])
, CONTENT_LAYERS))
# 将风格图片输入到VGG网络中,取出conv1_1-conv5_1五个层的输出结果,计算风格损失
sess.run([net['input'].assign(style_img)])
cost_style = sum(map(lambda l: l[1]*build_style_loss(sess.run(net[l[0]]) , net[l[0]])
, STYLE_LAYERS))
# 加总两种损失做为最小化训练目标,用cost_style做为调整系数
cost_total = cost_content + STYLE_STRENGTH * cost_style
optimizer = tf.train.AdamOptimizer(2.0)
train = optimizer.minimize(cost_total)
sess.run(tf.initialize_all_variables())
# 把内容图片加噪音后,做为VGG网络输入层,算法将学习去调整这个输入层,来使得训练目标最小
sess.run(net['input'].assign( INI_NOISE_RATIO* noise_img + (1.-INI_NOISE_RATIO) * content_img))
if not os.path.exists(OUTOUT_DIR):
os.mkdir(OUTOUT_DIR)
for i in range(500):
sess.run(train)
print i
if i%100 ==0:
result_img = sess.run(net['input'])
print sess.run(cost_total)
write_image(os.path.join(OUTOUT_DIR,'%s.png'%(str(i).zfill(4))),result_img)
write_image(os.path.join(OUTOUT_DIR,OUTPUT_IMG),result_img)
In [ ]:
main()
In [ ]:
转载需保留链接来源:软件玩家 » 艺术风格图片的神经网络算法实验