Tensorflow -- Neural Style Transfer
When doing week4-course4 assignment1 of Andrew Ng’s deep learning course, I struggled to understand what’s going on. So I record pseudo-code here to help myself understand it better.
The essence of Neural Style Transfer is to use a_C, a_S this two tensors and a_G, the variable computed from G to do forward propogation. G, the generated image, is exactly what we get from BP.
model = load_vgg_model(“pretrained-model.mat”) This model is a dict containing all pre-trained tf.variables.
1
2
3
4
5
6
7
8
9
10
11
12
13
def compute_content_cost(a_C, a_G):
a_C -- tensor of dimension (1, n_H, n_W, n_C)
a_G -- tensor of dimension (1, n_H, n_W, n_C)
m, n_H, n_W, n_C = a_G.get_shape().as_list()
a_C_unrolled = tf.transpose(tf.reshape(a_C, [-1, n_C]))
a_G_unrolled = tf.transpose(tf.reshape(a_G, [-1, n_C]))
J_content = (tf.reduce_sum(tf.square(tf.subtract(a_C_unrolled,a_G_unrolled))))/4/n_C/n_W/n_H
# when compute J_style, we make use of tf.matmul()
return J_content
We could test the above func below:
1
2
3
4
5
6
7
tf.reset_default_graph()
with tf.Session() as test:
tf.set_random_seed(1)
a_C = tf.random_normal([1, 4, 4, 3], mean=1, stddev=4)
a_G = tf.random_normal([1, 4, 4, 3], mean=1, stddev=4)
J_content = compute_content_cost(a_C, a_G)
print("J_content = " + str(J_content.eval()))
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
def model_nn(sess, input_image, num_iterations = 200):
sess.run(tf.global_variables_initializer())
sess.run(model['input'].assign(input_image))
for i in range(num_iterations):
optimizer = tf.train.AdamOptimizer(2.0)
train_step = optimizer.minimize(J)
sess.run(train_step)
generated_image = sess.run(model['input'])
# Print every 20 iteration.
if i%20 == 0:
J= sess.run([J])
print("Iteration " + str(i) + " :")
print("total cost = " + str(Jt))
# save last generated image
save_image('output/generated_image.jpg', generated_image)
return generated_image
if __name__ == "__main__":
tf.reset_default_graph()
# load image as np.array:
content_image = scipy.misc.imread("image/louvre.jpg")
content_image = reshape_and_normalize_image(content_image)
# then similarly we get style_image
# Assign the content image to be the input of the VGG model.
sess.run(model['input'].assign(content_image))
# Select the output tensor of layer conv4_2
out = model['conv4_2']
# a_C now is a evaluated Tensor
a_C = sess.run(out)
# Set a_G to be the hidden layer activation from same layer. Here, a_G references model['conv4_2'] to be evaluated
a_G = out
# Compute cost
J_content = compute_content_cost(a_C, a_G)
# then similarly we get J_style
J = alpha*J_content + beta*J_style
sess = tf.Session()
generated_image = sess.run(model['input'])
model_nn(sess, generated_image)
###Note: If we index an element by (i,j,k), then the corresponding axises are (0,1,2)
1
2
3
a = np.array([[[1, 2, 3],
[4, 5, 6]]])
a.reshape = (1,2,3)