代码学习笔记
Unsupervised Monocular Depth Estimation with Left-Right Consistency :monodepth.main
源码:monodepth
"""
代码学习 注释专用
song
stay hungry stay foolish
"""
from __future__ import absolute_import, division, print_function # 完成 python2 和 python3 之间的转换,放在文件首部
"""
分别引入的是 python3 中的 : absolute_import:绝对引入;division:整除;print_function: 输出
"""
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '1' # only keep warnings and errors
"""
import os
os.environ["TF_CPP_MIN_LOG_LEVEL"]='1' # 这是默认的显示等级,显示所有信息
os.environ["TF_CPP_MIN_LOG_LEVEL"]='2' # 只显示 warning 和 Error
os.environ["TF_CPP_MIN_LOG_LEVEL"]='3' # 只显示 Error
"""
import numpy as np # 处理和存储大型矩阵
import argparse # 参数设置
import re # 正则表达式
import time # 提供时间功能
import tensorflow as tf
import tensorflow.contrib.slim as slim # tensorflow的辅助工具,用来简化代码
from monodepth_model import *
from monodepth_dataloader import *
from average_gradients import *
"""
从文件中引入所有的 函数 和 类
"""
parser = argparse.ArgumentParser(description='Monodepth TensorFlow implementation.') # 参数的定义
parser.add_argument('--mode', type=str, help='train or test', default='train')
parser.add_argument('--model_name', type=str, help='model name', default='monodepth')
parser.add_argument('--encoder', type=str, help='type of encoder, vgg or resnet50', default='vgg')
parser.add_argument('--dataset', type=str, help='dataset to train on, kitti, or cityscapes', default='kitti')
parser.add_argument('--data_path', type=str, help='path to the data', required=True)
parser.add_argument('--filenames_file', type=str, help='path to the filenames text file', required=True)
parser.add_argument('--input_height', type=int, help='input height', default=256)
parser.add_argument('--input_width', type=int, help='input width', default=512)
parser.add_argument('--batch_size', type=int, help='batch size', default=8)
parser.add_argument('--num_epochs', type=int, help='number of epochs', default=50)
parser.add_argument('--learning_rate', type=float, help='initial learning rate', default=1e-4)
parser.add_argument('--lr_loss_weight', type=float, help='left-right consistency weight', default=1.0)
parser.add_argument('--alpha_image_loss', type=float, help='weight between SSIM and L1 in the image loss', default=0.85)
parser.add_argument('--disp_gradient_loss_weight', type=float, help='disparity smoothness weigth', default=0.1)
parser.add_argument('--do_stereo', help='if set, will train the stereo model', action='store_true')
parser.add_argument('--wrap_mode', type=str, help='bilinear sampler wrap mode, edge or border', default='border')
parser.add_argument('--use_deconv', help='if set, will use transposed convolutions', action='store_true')
parser.add_argument('--num_gpus', type=int, help='number of GPUs to use for training', default=1)
parser.add_argument('--num_threads', type=int, help='number of threads to use for data loading', default=8)
parser.add_argument('--output_directory', type=str, help='output directory for test disparities, if empty outputs to checkpoint folder', default='')
parser.add_argument('--log_directory', type=str, help='directory to save checkpoints and summaries', default='')
parser.add_argument('--checkpoint_path', type=str, help='path to a specific checkpoint to load', default='')
parser.add_argument('--retrain', help='if used with checkpoint_path, will restart training from step zero', action='store_true')
parser.add_argument('--full_summary', help='if set, will keep more data for each summary. Warning: the file can become very large', action='store_true')
args = parser.parse_args()
def post_process_disparity(disp): # 定义一个函数
_, h, w = disp.shape # 图片的shape 水平尺寸 垂直尺寸 通道数 [[[]]]
l_disp = disp[0, :, :] # 提取内部二维的第0个矩阵
r_disp = np.fliplr(disp[1, :, :]) # np.fliplr 矩阵翻转 内部二维的第1个矩阵,然后翻转内部一维的矩阵
m_disp = 0.5 * (l_disp + r_disp) #矩阵相加乘以0.5
l, _ = np.meshgrid(np.linspace(0, 1, w), np.linspace(0, 1, h))
"""
meshgrid: 生成网络坐标点矩阵
linspace: 在指定的间隔内返回均匀间隔的数字 (start ,end , num)
"""
"""
post_process_disparities函数的各种参数值; disp = name.jpg
测试图片大小 [375,1242,3] _ = 375 h = 1242 w = 3 , return = [1242,3]
"""
l_mask = 1.0 - np.clip(20 * (l - 0.05), 0, 1) # clip 截取(数组, min ,max)
r_mask = np.fliplr(l_mask)
return r_mask * l_disp + l_mask * r_disp + (1.0 - l_mask - r_mask) * m_disp
def count_text_lines(file_path):
f = open(file_path, 'r')
lines = f.readlines() # 读取整个文件所有行,保存在一个列表(list)变量中,每行作为一个元素.
f.close()
return len(lines) # 返回行数 txt 文件中是照片的路径一共 29000 行 每一行都是一个双目图像 两张图的路径
def train(params): # 测试函数
"""Training loop."""
with tf.Graph().as_default(), tf.device('/cpu:0'): # 定义一个用于计算的图 , 定义GPU设备
global_step = tf.Variable(0, trainable=False) # tarinable = False 定义变量不可变
# OPTIMIZER
num_training_samples = count_text_lines(args.filenames_file) # 训练的次数 29000
steps_per_epoch = np.ceil(num_training_samples / params.batch_size).astype(np.int32) # ceil向上取整 astype转变数据类型
num_total_steps = params.num_epochs * steps_per_epoch
start_learning_rate = args.learning_rate
"""
num_training_samples 是训练的次数 ; steps_per_epoch 每个epoch需要需要迭代的次数
num_tota_steps 总的迭代的次数(epoch * 每次需要的次数)
start_learning_rate = 开始的学习率 0.0001
"""
boundaries = [np.int32((3/5) * num_total_steps), np.int32((4/5) * num_total_steps)]
values = [args.learning_rate, args.learning_rate / 2, args.learning_rate / 4]
learning_rate = tf.train.piecewise_constant(global_step, boundaries, values) # 当走到一定的步长时,更改学习率
"""
根据步长改变学习率, 前3/5部分使用的是0.0001,后面每过1/5,将学习率除2
"""
opt_step = tf.train.AdamOptimizer(learning_rate) # 优化器的一种,用来减少损失函数
print("total number of samples: {}".format(num_training_samples)) # 训练的样本数
print("total number of steps: {}".format(num_total_steps)) # 总迭代次数
dataloader = MonodepthDataloader(args.data_path, args.filenames_file, params, args.dataset, args.mode)
# 数据的导入,这里是 monodepth_dataloader 中的一个函数
left = dataloader.left_image_batch # 左边的图片
right = dataloader.right_image_batch # 右边的图片
# split for each gpu GPU的拆分
left_splits = tf.split(left, args.num_gpus, 0)
right_splits = tf.split(right, args.num_gpus, 0)
tower_grads = [] # 定义一个列表
tower_losses = []
reuse_variables = None
with tf.variable_scope(tf.get_variable_scope()): # 实现变量共享
for i in range(args.num_gpus):
with tf.device('/gpu:%d' % i):
model = MonodepthModel(params, args.mode, left_splits[i], right_splits[i], reuse_variables, i)
loss = model.total_loss
tower_losses.append(loss)
reuse_variables = True
grads = opt_step.compute_gradients(loss) # 优化器内部函数,梯度修剪
tower_grads.append(grads)
grads = average_gradients(tower_grads)
apply_gradient_op = opt_step.apply_gradients(grads, global_step=global_step) # It returns an Operation that applies gradients.
total_loss = tf.reduce_mean(tower_losses) # 求平均
tf.summary.scalar('learning_rate', learning_rate, ['model_0']) # 存储学习率 返回一个字符串类型的张量
tf.summary.scalar('total_loss', total_loss, ['model_0']) # 存储 损失函数的值
summary_op = tf.summary.merge_all('model_0') # 混合图内所有的 'model_0'
# SESSION
config = tf.ConfigProto(allow_soft_placement=True) # 配置 tf.Session 的运算方式: GPU & CPU
"""
allow_soft_placement=True 自动分配 CPU & GPU
"""
sess = tf.Session(config=config)
# SAVER
summary_writer = tf.summary.FileWriter(args.log_directory + '/' + args.model_name, sess.graph) # 存储图
train_saver = tf.train.Saver() # 模型保存
# COUNT PARAMS
total_num_parameters = 0
for variable in tf.trainable_variables(): # tf.trainable_variables是返回所有需要训练的变量
total_num_parameters += np.array(variable.get_shape().as_list()).prod()
"""
array 数组的形式
get_shape().as_list() 得到大小变成元组的形式
prod() 相乘
"""
print("number of trainable parameters: {}".format(total_num_parameters))
# INIT
sess.run(tf.global_variables_initializer())
sess.run(tf.local_variables_initializer())
coordinator = tf.train.Coordinator() # 线程协调器,用来管理之后Session中启动的所有线程
threads = tf.train.start_queue_runners(sess=sess, coord=coordinator) # 启动入队线程 现程由 tf.train.batch 定义
# LOAD CHECKPOINT IF SET
if args.checkpoint_path != '':
train_saver.restore(sess, args.checkpoint_path.split(".")[0]) # 恢复训练
if args.retrain: # 重新训练
sess.run(global_step.assign(0)) # 给global_step 重新赋值 0
# GO!
start_step = global_step.eval(session=sess) # 返回 restore 位置的global_step值
start_time = time.time()
for step in range(start_step, num_total_steps):
before_op_time = time.time()
_, loss_value = sess.run([apply_gradient_op, total_loss]) # 开始训练
duration = time.time() - before_op_time
if step and step % 100 == 0: # 步长为 100
examples_per_sec = params.batch_size / duration # 每个 batch 训练所需要的时间
time_sofar = (time.time() - start_time) / 3600
training_time_left = (num_total_steps / step - 1.0) * time_sofar # 训练还需要多少时间
print_string = 'batch {:>6} | examples/s: {:4.2f} | loss: {:.5f} | time elapsed: {:.2f}h | time left: {:.2f}h'
print(print_string.format(step, examples_per_sec, loss_value, time_sofar, training_time_left))
summary_str = sess.run(summary_op)
summary_writer.add_summary(summary_str, global_step=step) # 加入训练日志 和 训练步数
if step and step % 10000 == 0:
train_saver.save(sess, args.log_directory + '/' + args.model_name + '/model', global_step=step)
train_saver.save(sess, args.log_directory + '/' + args.model_name + '/model', global_step=num_total_steps)
def test(params):
"""Test function."""
dataloader = MonodepthDataloader(args.data_path, args.filenames_file, params, args.dataset, args.mode)
left = dataloader.left_image_batch
right = dataloader.right_image_batch
model = MonodepthModel(params, args.mode, left, right)
# SESSION
config = tf.ConfigProto(allow_soft_placement=True)
sess = tf.Session(config=config)
# SAVER
train_saver = tf.train.Saver()
# INIT
sess.run(tf.global_variables_initializer())
sess.run(tf.local_variables_initializer())
coordinator = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess, coord=coordinator)
# RESTORE 提取模型
if args.checkpoint_path == '':
restore_path = tf.train.latest_checkpoint(args.log_directory + '/' + args.model_name)
else:
restore_path = args.checkpoint_path.split(".")[0]
train_saver.restore(sess, restore_path)
num_test_samples = count_text_lines(args.filenames_file) # 需要测试的 次数
print('now testing {} files'.format(num_test_samples))
disparities = np.zeros((num_test_samples, params.height, params.width), dtype=np.float32) # 生成矩阵
disparities_pp = np.zeros((num_test_samples, params.height, params.width), dtype=np.float32)
for step in range(num_test_samples):
disp = sess.run(model.disp_left_est[0]) # 开始测试
disparities[step] = disp[0].squeeze() # squeeze删除单维度的条目 把训练过的维度删除
disparities_pp[step] = post_process_disparity(disp.squeeze())
print('done.')
print('writing disparities.')
if args.output_directory == '':
output_directory = os.path.dirname(args.checkpoint_path)
else:
output_directory = args.output_directory
np.save(output_directory + '/disparities.npy', disparities) # 保存 npy 文件
np.save(output_directory + '/disparities_pp.npy', disparities_pp) # 保存 png 文件
print('done.')
def main(_):
params = monodepth_parameters(
encoder=args.encoder,
height=args.input_height,
width=args.input_width,
batch_size=args.batch_size,
num_threads=args.num_threads,
num_epochs=args.num_epochs,
do_stereo=args.do_stereo,
wrap_mode=args.wrap_mode,
use_deconv=args.use_deconv,
alpha_image_loss=args.alpha_image_loss,
disp_gradient_loss_weight=args.disp_gradient_loss_weight,
lr_loss_weight=args.lr_loss_weight,
full_summary=args.full_summary)
if args.mode == 'train':
train(params)
elif args.mode == 'test':
test(params)
if __name__ == '__main__':
tf.app.run() # 执行main函数