TensorBoard 是一个可视化词嵌入的好工具。特别是当训练自己的、基于特定领域的词嵌入时,嵌入可视化可以帮助验证语义相似性。将词模型转换为 TensorBoard 可以处理的格式很简单。将词向量和向量标签加载到 TensorBoard 后,它将执行降维到二维或三维的操作。TensorBoard目前提供 3 种降维方法:PCA、t-SNE 和自定义降维。
代码展示了如何将词嵌入转换为 TensorBoard 格式并生成投影数据:
import os
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()
import numpy as np
from io import open
from tensorboard.plugins import projector
# Tensorboard的PROJECTOR有着很美好的功能,它能够帮助我们查看网络中数据在二维、三维下的效果
# create_projection 函数有 3 个参数:嵌入数据、投影的名称,以及存储投影文件的路径
def create_projection(projection_data, projection_name='tensorboard_viz',
path='xxx\\tmp\\'):
# 数据
meta_file = "{}.tsv".format(projection_name)
vector_dim = len(projection_data[0][1])
samples = len(projection_data)
projection_matrix = np.zeros((samples, vector_dim))
with open(os.path.join(path, meta_file), 'w') as file_metadata:
for i, row in enumerate(projection_data):
label, vector = row[0], row[1]
projection_matrix[i] = np.array(vector)
file_metadata.write("{}\n".format(label))
# 要创建 TensorBoard 投影,需要创建一个TensorFlow 会话
sess = tf.InteractiveSession()
# Tensorboard中显示的PROJECTOR数据是从saver()保存的张量数据中取出,
# 所以在Embedding机制中通过一个tf.Variable变量(即下面的embedding_var)来存储需要投影的图像或者数据
embedding = tf.Variable(projection_matrix, trainable=False, name=projection_name)
tf.global_variables_initializer().run()
saver = tf.train.Saver()
writer = tf.summary.FileWriter(path, sess.graph)
# 注册一个Projector,这样Tensorboard才显示Projector,而注册Projector则需要Embedding的信息,
# 比如嵌入的变量名、tsv文件路径(下有解释)、sprite图像路径(下有解释)等
config = projector.ProjectorConfig()
embed = config.embeddings.add()
embed.tensor_name = '{}'.format(projection_name)
# Specify where you find the metadata
embed.metadata_path = os.path.join(path, meta_file)
# visualize_embeddings 方法将投影结果写入路径中,然后就可以使用 TensorBoard 了
projector.visualize_embeddings(writer, config)
saver.save(sess, os.path.join(path, '{}.ckpt'.format(projection_name)))
print('Run `tensorboard --logdir={0}` to run visualize result on tensorboard'.format(path))
projection_name = "NLP_in_Action"
# 示例数据
projection_data = [
('car1', [0.34, 0.21, -0.72, 0.54, 0.58, 0.87, 0.98]),
('car2', [0.44, 0.61, -3.72, 3.54, 0.98, 0.57, 0.28]),
('car3', [1.34, 1.21, -0.72, 0.54, 4.58, 0.87, 0.98]),
('car4', [0.94, 0.21, -5.72, 0.54, 3.58, 0.17, 0.48]),
('car5', [3.34, 0.21, -5.72, 0.54, 0.5, 0.87, 4.98])]
create_projection(projection_data, projection_name)