1.命令行语句
./darknet detector train wp_data/cfg/voc.data wp_data/cfg/yolov3-voc.cfg 2>&1 | tee wp_data/visualization/train_yolov3.log #保存训练日志
#上条代码解释:./darknet表示编译后的执行文件,detector train是指令,train表示是训练的过程。wp_data/cfg/voc.data表示的是wp_data/cfg路径下的文件voc.data。wp_data/cfg/yolov3-voc.cfg表示的是wp_data/cfg路径下的文件yolov3-voc.cfg。2>&1 | tee wp_data/visualization/train_yolov3.log表示保存日志,为了后续绘制loss曲线。若没有这条语句就不会保存日志。
2. 后续对log进行处理得到 loss 图
# 该代码参考自,但是我找不到了。感谢博主!
# coding=utf-8
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import logging
logging.basicConfig(
level=logging.DEBUG,
format='%(asctime)s %(levelname)s: %(message)s',
datefmt='%Y-%m-%d %H:%M:%S'
)
logger = logging.getLogger(__name__)
class Yolov3LogVisualization:
def __init__(self, log_path, result_dir):
self.log_path = log_path
self.result_dir = result_dir
def extract_log(self, save_log_path, key_word):
with open(self.log_path, 'r') as f:
with open(save_log_path, 'w') as train_log:
next_skip = False
for line in f:
if next_skip:
next_skip = False
continue
# 去除多gpu的同步log
if 'Syncing' in line:
continue
# 去除除零错误的log
if 'nan' in line:
continue
if 'Saving weights to' in line:
next_skip = True
continue
if key_word in line:
train_log.write(line)
f.close()
train_log.close()
def parse_loss_log(self, log_path, line_num=2000):
# 用于设置忽略前多少步,上千几百的太大了,所以从几一下开始。
result = pd.read_csv(log_path,skiprows=[x for x in range(line_num) if (x<1500)],
error_bad_lines=False, names=['loss', 'avg', 'rate', 'seconds', 'images'])
result['loss'] = result['loss'].str.split(' ').str.get(1)
result['avg'] = result['avg'].str.split(' ').str.get(1)
result['rate'] = result['rate'].str.split(' ').str.get(1)
result['seconds'] = result['seconds'].str.split(' ').str.get(1)
result['images'] = result['images'].str.split(' ').str.get(1)
result['loss'] = pd.to_numeric(result['loss'])
result['avg'] = pd.to_numeric(result['avg'])
result['rate'] = pd.to_numeric(result['rate'])
result['seconds'] = pd.to_numeric(result['seconds'])
result['images'] = pd.to_numeric(result['images'])
return result
def gene_loss_pic(self, pd_loss):
fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)
ax.plot(pd_loss['avg'].values, label='avg_loss')
ax.legend(loc='best')
ax.set_title('The loss curves')
ax.set_xlabel('batches')
fig.savefig(self.result_dir + '/avg_loss')
logger.info('save iou loss done')
def loss_pic(self):
train_log_loss_path = os.path.join(self.result_dir, 'train_log_loss.txt')
self.extract_log(train_log_loss_path, 'images')
pd_loss = self.parse_loss_log(train_log_loss_path)
self.gene_loss_pic(pd_loss)
def parse_iou_log(self, log_path, line_num=2000):
result = pd.read_csv(log_path, skiprows=[x for x in range(line_num) if (x % 10 == 0 or x % 10 == 9)],
error_bad_lines=False,
names=['Region Avg IOU', 'Class', 'Obj', 'No Obj', 'Avg Recall', 'count'])
result['Region Avg IOU'] = result['Region Avg IOU'].str.split(': ').str.get(1)
result['Class'] = result['Class'].str.split(': ').str.get(1)
result['Obj'] = result['Obj'].str.split(': ').str.get(1)
result['No Obj'] = result['No Obj'].str.split(': ').str.get(1)
result['Avg Recall'] = result['Avg Recall'].str.split(': ').str.get(1)
result['count'] = result['count'].str.split(': ').str.get(1)
result['Region Avg IOU'] = pd.to_numeric(result['Region Avg IOU'])
result['Class'] = pd.to_numeric(result['Class'])
result['Obj'] = pd.to_numeric(result['Obj'])
result['No Obj'] = pd.to_numeric(result['No Obj'])
result['Avg Recall'] = pd.to_numeric(result['Avg Recall'])
result['count'] = pd.to_numeric(result['count'])
return result
def gene_iou_pic(self, pd_loss):
fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)
ax.plot(pd_loss['Region Avg IOU'].values, label='Region Avg IOU')
# ax.plot(result['Class'].values,label='Class')
# ax.plot(result['Obj'].values,label='Obj')
# ax.plot(result['No Obj'].values,label='No Obj')
# ax.plot(result['Avg Recall'].values,label='Avg Recall')
# ax.plot(result['count'].values,label='count')
ax.legend(loc='best')
ax.set_title('The Region Avg IOU curves')
ax.set_xlabel('batches')
fig.savefig(self.result_dir + '/region_avg_iou')
logger.info('save iou pic done')
def iou_pic(self):
train_log_loss_path = os.path.join(self.result_dir, 'train_log_iou.txt')
self.extract_log(train_log_loss_path, 'IOU')
pd_loss = self.parse_iou_log(train_log_loss_path)
self.gene_iou_pic(pd_loss)
if __name__ == '__main__':
log_path = '/home/studieren/论文/darknet/log_analysis/train_yolov3.log'
result_dir = '/home/studieren/论文/darknet/log_analysis'
logVis = Yolov3LogVisualization(log_path, result_dir)
logVis.loss_pic()
logVis.iou_pic()
需要修改的地方,就是x<1500 ,这里,忽略前面的1500 batch ,因为loss太大了,导致后面微小变动不明显。可以根据自己的训练过程设置。
3. 结果
因为是2300,所以2300-1500=800 步。
附百度网盘代码:
链接: https://pan.baidu.com/s/1cSR2iWGwpJ978EQ5fiy99A 提取码: gd9g