版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/qq_32023541/article/details/83788914
(1)最简单的自编码网络 -- 样本重构功能
自编码网络是输入等于输出的网络,最基本的模型可以视为三层的神经网络,即输入层,隐藏层,输出层。也就看成压缩和解压的过程。编码就是压缩的过程,解码就是解压的过程。一般是一个对称的网络。
基本实现代码(MNIST 数据集图片的压缩和解压)将 784 维数据压缩成 128 维,实现输入数据的低维重构问题。
#-*- coding:utf-8 -*-
import sys
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("mnist_data/",one_hot=True)
learning_rate = 0.01
n_hidden_1 = 256
n_hidden_2 = 128
n_input = 784
x = tf.placeholder("float",[None,n_input])
# 输出等于输入
y = x
weights = {"encoder_h1":tf.Variable(tf.random_normal([n_input,n_hidden_1])),
"encoder_h2":tf.Variable(tf.random_normal([n_hidden_1,n_hidden_2])),
"decoder_h1":tf.Variable(tf.random_normal([n_hidden_2,n_hidden_1])),
"decoder_h2":tf.Variable(tf.random_normal([n_hidden_1,n_input]))}
biases = {"encoder_b1":tf.Variable(tf.zeros([n_hidden_1])),
"encoder_b2":tf.Variable(tf.zeros([n_hidden_2])),
"decoder_b1":tf.Variable(tf.zeros([n_hidden_1])),
"decoder_b2":tf.Variable(tf.zeros([n_input]))}
# 编码
def encoder(x):
layer1 = tf.nn.sigmoid(tf.add(tf.matmul(x,weights["encoder_h1"]),biases["encoder_b1"]))
layer2 = tf.nn.sigmoid(tf.add(tf.matmul(layer1,weights["encoder_h2"]),biases["encoder_b2"]))
return layer2
# 解码,编码的逆过程,因此完全对称
def decoder(x):
layer1 = tf.nn.sigmoid(tf.add(tf.matmul(x,weights["decoder_h1"]),biases["decoder_b1"]))
layer2 = tf.nn.sigmoid(tf.add(tf.matmul(layer1,weights["decoder_h2"]),biases["decoder_b2"]))
return layer2
# 输出的节点
encoder_out = encoder(x)
pred = decoder(encoder_out)
cost = tf.reduce_mean(tf.square(y - pred))
optimizer = tf.train.RMSPropOptimizer(learning_rate).minimize(cost)
# 训练参数
training_epochs = 20
batch_size = 256
display_step = 5
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
total_batch = int(mnist.train.num_examples/ batch_size)
# 开始训练
for epoch in range(training_epochs):
for i in range(total_batch):
batch_xs,batch_ys = mnist.train.next_batch(batch_size)
_,c = sess.run([optimizer,cost],{x:batch_xs})
if epoch % display_step == 0:
print ("Epoch:","%04d" %(epoch + 1),"Cost =","{:.9f}".format(c))
# 查看解码器还原能力可视化
show_num = 10
reconstruction = sess.run(pred,feed_dict = {x:mnist.test.images[:show_num]})
f,a = plt.subplots(2,10,figsize = (10,2))
for i in range(show_num):
a[0][i].imshow(np.reshape(mnist.test.images[i],(28,28)))
a[1][i].imshow(np.reshape(reconstruction[i],(28,28)))
plt.draw()
plt.show()
如图上面就是原图片,下面是特征解码图片。。。注意这是非线性解码器,也就是输出在[0,1] 之间,否则不能用 sigmoid 作为最后激活函数。
(2)卷积网络的自编码 -- 反卷积神经网络
我们先定义一个 反最大池化层的操作,这部分在附注有所提及。 unpool.py 定义如下:
# -*- coding:utf-8 -*-
import tensorflow as tf
'''
这里定义反最大池化层操作,所以在池化时必须记录取最大值的位置
必须用 tf.nn.max_pool_with_argmax 池化函数搭配使用,否则无法实现反池化
注意这是填充层的反池化
'''
# 定义池化函数
def max_pool_with_argmax(net,stride):
_,mask = tf.nn.max_pool_with_argmax(net,ksize = [1,stride,stride,1],strides = [1,stride,stride,1],padding = 'SAME')
mask = tf.stop_gradient(mask)
net = tf.nn.max_pool(net,ksize = [1,stride,stride,1],strides = [1,stride,stride,1],padding = 'SAME')
return net,mask
# 定义对应的反池化函数
def unpool(net,batch_size,mask,stride):
ksize = [1,stride,stride,1]
input_shape = net.get_shape().as_list()
# 计算new_shape
output_shape = (batch_size,input_shape[1]*ksize[1],input_shape[2]*ksize[2],input_shape[3])
# 计算最大值位置索引
one_like_mask = tf.ones_like(mask)
batch_range = tf.reshape(tf.range(output_shape[0],dtype = tf.int64),shape = [batch_size,1,1,1])
b = one_like_mask * batch_range
y = mask // (output_shape[2] * output_shape[3])
x = mask % (output_shape[2] * output_shape[3]) // output_shape[3]
feature_range = tf.range(output_shape[3],dtype = tf.int64)
f = one_like_mask * feature_range
# 转置索引
updates_size = tf.size(net)
indices = tf.transpose(tf.reshape(tf.stack([b,y,x,f]),[4,updates_size]))
values = tf.reshape(net,[updates_size])
ret = tf.scatter_nd(indices,values,output_shape)
return ret
接下来卷积网络自编码如下:
#-*- coding:utf-8 -*-
import sys
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.examples.tutorials.mnist import input_data
from unpool import max_pool_with_argmax,unpool
mnist = input_data.read_data_sets("mnist_data/",one_hot=True)
learning_rate = 0.01
n_conv1 = 32
n_conv2 = 64
n_input = 784
batch_size = 256
x = tf.placeholder("float",[None,n_input])
# 输出等于输入
y = x
weights = {"encoder_conv1":tf.Variable(tf.random_normal([5,5,1,n_conv1],stddev = 0.1)),
"encoder_conv2":tf.Variable(tf.random_normal([3,3,n_conv1,n_conv2],stddev = 0.1)),
"decoder_conv1":tf.Variable(tf.random_normal([5,5,1,n_conv1],stddev = 0.1)),
"decoder_conv2":tf.Variable(tf.random_normal([3,3,n_conv1,n_conv2],stddev = 0.1))}
biases = {"encoder_conv1":tf.Variable(tf.zeros([n_conv1])),
"encoder_conv2":tf.Variable(tf.zeros([n_conv2])),
"decoder_conv1":tf.Variable(tf.zeros([n_conv1])),
"decoder_conv2":tf.Variable(tf.zeros([n_conv2]))}
x_image = tf.reshape(x,[-1,28,28,1])
# 实现卷积操作
def conv2d(x,filter,strides = [1,1,1,1],padding = "SAME"):
return tf.nn.conv2d(x,filter,strides,padding)
# 连续两层卷积层
def encoder(x):
h_conv1 = conv2d(x,weights["encoder_conv1"]) + biases["encoder_conv1"]
h_conv2 = tf.nn.relu(conv2d(h_conv1,weights["encoder_conv2"]) + biases["encoder_conv2"])
return h_conv2,h_conv1
# 解码,反卷积层实现
''' 天坑:不知道为什么 .shape 第一个总是 ? 或者 None 导致报错 '''
def decoder(x,batch_size,conv1):
t_conv1 = tf.nn.conv2d_transpose(x - biases["decoder_conv2"],weights["decoder_conv2"],[batch_size,28,28,32],[1,1,1,1])
t_x_image = tf.nn.conv2d_transpose(t_conv1 - biases["decoder_conv1"],weights["decoder_conv1"],[batch_size,28,28,1],[1,1,1,1])
return t_x_image
# 实现编码卷积
encoder_out,conv1 = encoder(x_image)
# 实现编码池化
h_pool2,mask = max_pool_with_argmax(encoder_out,2)
# 实现解码器的反池化
h_upool = unpool(h_pool2,batch_size,mask,2)
# 实现解码器的反卷积层
pred = tf.reshape(decoder(h_upool,batch_size,conv1),[-1,784])
cost = tf.reduce_mean(tf.square(y - pred))
optimizer = tf.train.AdamOptimizer(learning_rate).minimize(cost)
# 训练参数
training_epochs = 1 # 仅训练 1 轮,减短时间
display_step = 5
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
total_batch = int(mnist.train.num_examples/ batch_size)
# 开始训练
for epoch in range(training_epochs):
for i in range(total_batch):
batch_xs,batch_ys = mnist.train.next_batch(batch_size)
_,c = sess.run([optimizer,cost],{x:batch_xs})
if epoch % display_step == 0:
print ("Epoch:","%04d" %(epoch + 1),"Cost =","{:.9f}".format(c))
# 查看解码器还原能力可视化
show_num = 10
reconstruction = sess.run(pred, \
feed_dict = {x:mnist.test.images[:batch_size]})[:show_num]
f,a = plt.subplots(2,10,figsize = (10,2))
for i in range(show_num):
a[0][i].imshow(np.reshape(mnist.test.images[i],(28,28)))
a[1][i].imshow(np.reshape(reconstruction[i],(28,28)))
plt.draw()
plt.show()
结果如图:(仅仅 1 轮,上面是 20 轮,主要是因为耗时太长,可以看到 1 轮的效果都比简单自编码好)
(3)变分自编码 -- 不仅重构样本,还能学习样本规律生成样本(用标准正太分布来重构)
变分自编码顾名思义就是改变分布的意思,也就是改变成用一个标准正态分布的数据去重构样本,这样得到的编码不仅能重构样本,还能得到样本的规律性。前面的自编码网络都是尝试得到跟原图一模一样的解码效果。但是变分自编码不会,而是拟合原图的规律。这就是变分自编码的特点。我的理解是 它用标准正态分布尝试平滑化图形
#-*- coding:utf-8 -*-
'''
变分自编码网络在于压缩时生成一个 mean 一个 lg_var
然后采用 mean + e的lg_var次幂的开根号 * N(0,1)
'''
import sys
import tensorflow as tf
import numpy as np
from scipy.stats import norm
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("mnist_data/",one_hot=True)
n_input = 784
n_hidden_1 = 256
n_hidden_2 = 2
x = tf.placeholder(tf.float32,[None,n_input])
# zinput 就是用来生成样本的,这里先不用
zinput = tf.placeholder(tf.float32,[None,n_hidden_2])
'''
这里初始化与以往不同,用了很小的值(方差为 0.001)由于在算 KL 离散度时计算的是与标准正太分布 N(0,1) 的距离,
如果网络初始生成的模型方差和均值很大,那么与标准正态分布的距离就会非常大,导致模型无法训练生成 Nan 的情况
'''
weights = {"encoder_w1":tf.Variable(tf.truncated_normal([n_input,n_hidden_1],stddev = 0.001)),
"encoder_b1":tf.Variable(tf.zeros([n_hidden_1])),
"encoder_mean_w2":tf.Variable(tf.truncated_normal([n_hidden_1,n_hidden_2],stddev = 0.001)),
"encoder_mean_b2":tf.Variable(tf.zeros([n_hidden_2])),
"encoder_log_sigma_w2":tf.Variable(tf.truncated_normal([n_hidden_1,n_hidden_2],stddev = 0.001)),
"encoder_log_sigma_b2":tf.Variable(tf.zeros([n_hidden_2])),
"decoder_w1":tf.Variable(tf.truncated_normal([n_hidden_2,n_hidden_1],stddev = 0.001)),
"decoder_b1":tf.Variable(tf.zeros([n_hidden_1])),
"decoder_w2":tf.Variable(tf.truncated_normal([n_hidden_1,n_input],stddev = 0.001)),
"decoder_b2":tf.Variable(tf.zeros([n_input]))}
# 第一层编码
h1 = tf.nn.relu(tf.add(tf.matmul(x,weights["encoder_w1"]),weights["encoder_b1"]))
# 最后一层编码生成 mean 和 log_var
mean = tf.add(tf.matmul(h1,weights["encoder_mean_w2"]),weights["encoder_mean_b2"])
log_var = tf.add(tf.matmul(h1,weights["encoder_log_sigma_w2"]),weights["encoder_log_sigma_b2"])
# 高斯分布样本 N(0,1)
eps = tf.random_normal(tf.stack([tf.shape(h1)[0],n_hidden_2]),mean = 0,stddev = 1,dtype = tf.float32)
z = mean + tf.sqrt(tf.exp(log_var)) * eps
# 开始解码
h2 = tf.nn.relu(tf.matmul(z,weights["decoder_w1"]) + weights["decoder_b1"])
reconstruction = tf.matmul(h2,weights["decoder_w2"]) + weights["decoder_b2"]
# 计算 loss ,再加上编码结果和标准正态分布的 KL 离散度
reconstr_loss = 0.5 + tf.reduce_sum(tf.square(reconstruction - x)) # 计算每行的 reconstr_loss
KL_loss = -0.5 * tf.reduce_sum(1+log_var - tf.square(mean) - tf.exp(log_var), axis = 1) # 计算每行的 KL_loss
cost = tf.reduce_mean(reconstr_loss + KL_loss)
optimizer = tf.train.AdamOptimizer(0.001).minimize(cost)
training_epochs = 50
batch_size = 128
display_step = 5
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for epoch in range(training_epochs):
#avg_cost = 0
total_batch = int(mnist.train.num_examples / batch_size)
for i in range(total_batch):
batch_xs,batch_ys = mnist.train.next_batch(batch_size)
_,c = sess.run([optimizer,cost],feed_dict = {x:batch_xs})
if i % display_step == 0:
print "Epoch:","%04d" %(epoch + 1),"Cost =","{:.9f}".format(c)
# 查看解码器还原能力可视化
show_num = 10
reconstruction = sess.run(reconstruction, feed_dict = {x:mnist.test.images[:show_num]})
import matplotlib.pyplot as plt
f,a = plt.subplots(2,10,figsize = (10,2))
for i in range(show_num):
a[0][i].imshow(np.reshape(mnist.test.images[i],(28,28)))
a[1][i].imshow(np.reshape(reconstruction[i],(28,28)))
plt.draw()
plt.show()
如下是解码和原图对比
可以看到,并没有像前面的网络一样尝试完全拟合原图,而是尝试获取样本的规律。
(4)条件变分自编码 -- 获取某类样本规律(变分自编码进阶)
变分自编码虽然可以学习样本规律,但是学习的是某个样本的规律,而条件变分自编码在引入标签的前提下实现了学习某一类别样本规律的功能。
#-*- coding:utf-8 -*-
'''
条件变分自编码即
在变分自编码情况下引入 标签
'''
import sys
import tensorflow as tf
import numpy as np
from scipy.stats import norm
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("mnist_data/",one_hot=True)
n_input = 784
n_hidden_1 = 256
n_hidden_2 = 2
n_labels = 10
x = tf.placeholder(tf.float32,[None,n_input])
y = tf.placeholder(tf.float32,[None,n_labels]) # 引入标签
# 等下用 zinput 生成样本
zinput = tf.placeholder(tf.float32,[None,n_hidden_2])
'''
这里初始化与以往不同,用了很小的值(方差为 0.001)由于在算 KL 离散度时计算的是与标准正太分布 N(0,1) 的距离,
如果网络初始生成的模型方差和均值很大,那么与标准正态分布的距离就会非常大,导致模型无法训练生成 Nan 的情况
'''
'''
条件变分自编码就是在输入开始编码时 concat 一个标签的编码进去 生成 2 倍长度的编码
然后在编码完成后加上标签 生成 n_labels + 的长度的编码再进入解码器 所以这里长度要 *2 或者 +n_labels
'''
weights = {"encoder_w1":tf.Variable(tf.truncated_normal([n_input,n_hidden_1],stddev = 0.001)),
"encoder_b1":tf.Variable(tf.zeros([n_hidden_1])),
"encoder_labels":tf.Variable(tf.truncated_normal([n_labels,n_hidden_1],stddev = 0.001)),
"encoder_labels_b":tf.Variable(tf.zeros([n_hidden_1])),
"encoder_mean_w2":tf.Variable(tf.truncated_normal([n_hidden_1 * 2,n_hidden_2],stddev = 0.001)),
"encoder_mean_b2":tf.Variable(tf.zeros([n_hidden_2])),
"encoder_log_sigma_w2":tf.Variable(tf.truncated_normal([n_hidden_1 * 2,n_hidden_2],stddev = 0.001)),
"encoder_log_sigma_b2":tf.Variable(tf.zeros([n_hidden_2])),
"decoder_w1":tf.Variable(tf.truncated_normal([n_hidden_2 + n_labels,n_hidden_1],stddev = 0.001)),
"decoder_b1":tf.Variable(tf.zeros([n_hidden_1])),
"decoder_w2":tf.Variable(tf.truncated_normal([n_hidden_1,n_input],stddev = 0.001)),
"decoder_b2":tf.Variable(tf.zeros([n_input]))}
# 第一层编码,加上标签一起编码,然后 concat
h1 = tf.nn.relu(tf.add(tf.matmul(x,weights["encoder_w1"]),weights["encoder_b1"]))
h1_labels = tf.nn.relu(tf.add(tf.matmul(y,weights["encoder_labels"]),weights["encoder_labels_b"]))
hall = tf.concat([h1,h1_labels],1) # 变成 256 * 2
# 最后一层编码生成 mean 和 log_var
mean = tf.add(tf.matmul(hall,weights["encoder_mean_w2"]),weights["encoder_mean_b2"])
log_var = tf.add(tf.matmul(hall,weights["encoder_log_sigma_w2"]),weights["encoder_log_sigma_b2"])
# 高斯分布样本 N(0,1)
eps = tf.random_normal(tf.stack([tf.shape(h1)[0],n_hidden_2]),mean = 0,stddev = 1,dtype = tf.float32)
# 这里z 再加上标签生成 zall
z = mean + tf.sqrt(tf.exp(log_var)) * eps
zall = tf.concat([z,y],1) # 变成 2 + 10
# 开始解码
h2 = tf.nn.relu(tf.matmul(zall,weights["decoder_w1"]) + weights["decoder_b1"])
reconstruction = tf.matmul(h2,weights["decoder_w2"]) + weights["decoder_b2"]
# 计算 loss ,再加上编码结果和标准正态分布的 KL 离散度
reconstr_loss = 0.5 + tf.reduce_sum(tf.square(reconstruction - x)) # 计算每行的 reconstr_loss
KL_loss = -0.5 * tf.reduce_sum(1+log_var - tf.square(mean) - tf.exp(log_var), axis = 1) # 计算每行的 KL_loss
cost = tf.reduce_mean(reconstr_loss + KL_loss)
optimizer = tf.train.AdamOptimizer(0.001).minimize(cost)
training_epochs = 50
batch_size = 128
display_step = 5
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for epoch in range(training_epochs):
#avg_cost = 0
total_batch = int(mnist.train.num_examples / batch_size)
for i in range(total_batch):
batch_xs,batch_ys = mnist.train.next_batch(batch_size)
_,c = sess.run([optimizer,cost],feed_dict = {x:batch_xs,y:batch_ys})
if i % display_step == 0:
print "Epoch:","%04d" %(epoch + 1),"Cost =","{:.9f}".format(c)
# 查看解码器还原能力可视化
show_num = 10
reconstruction = sess.run(reconstruction, feed_dict = {x:mnist.test.images[:show_num],y:mnist.test.labels[:show_num]})
#import matplotlib.pyplot as plt
#f,a = plt.subplots(2,10,figsize = (10,2))
image = []
import cPickle
for i in range(show_num):
image.append([np.reshape(mnist.test.images[i],(28,28)),np.reshape(reconstruction[i],(28,28))])
with open("image.pkl","w") as f:
cPickle.dump(image,f)
解码样本如下 相对变分自编码有所提高
利用标签模拟生成的样本则如下: