1,数据组织:
训练验证数据生成:
python create_data.py nuscenes_data_prep --data_path=NUSCENES_TRAINVAL_DATASET_ROOT --version="v1.0-trainval" --max_sweeps=10
python create_data.py nuscenes_data_prep --data_path=NUSCENES_TEST_DATASET_ROOT --version="v1.0-test" --max_sweeps=10 --dataset_name="NuscenesDataset"
如果为自定义数据要根据类别进行修改,将数据转成bin文件,打包成pkl文件再执行上面指令。
如: (1)my_common.py 将标签与点云打包成pkl。
(2)python my_create_data.py mydata --data_path=datapath
2,修改config文件
打开second.pytorch/second/configs/car.lite.config 编辑类别与训练数据路径
train_input_reader: {
...
database_sampler {
database_info_path: "/path/to/dataset_dbinfos_train.pkl"
...
}
dataset: {
dataset_class_name: "DATASET_NAME"
kitti_info_path: "/path/to/dataset_infos_train.pkl"
kitti_root_path: "DATASET_ROOT"
}
}
...
eval_input_reader: {
...
dataset: {
dataset_class_name: "DATASET_NAME"
kitti_info_path: "/path/to/dataset_infos_val.pkl"
kitti_root_path: "DATASET_ROOT"
}
}
3,开始训练
单GPU:
python ./pytorch/train.py train --config_path=./configs/car.fhd.config --model_dir=/path/to/model_dir
多GPU:
CUDA_VISIBLE_DEVICES=0,1,3 python ./pytorch/train.py train --config_path=./configs/car.fhd.config --model_dir=/path/to/model_dir --multi_gpu=True
浮点16位训练:
修改配置文件,将enable\u mixed\u precision设置为true。
(1)如果要训练新模型,请确保“/path/to/model\u dir”不存在。如果model\u dir不存在,将创建一个新目录,否则将读取其中的检查点。
(2)训练过程使用batchsize=6作为1080Ti的默认值,如果GPU内存较少,则需要减少batchsize。
(3)目前仅支持单个GPU训练,但训练一个模型在单个1080Ti中只需要20小时,并且只需要50个循环就可以达到78.3 AP,并在Kitti验证日期集中使用super converge in car Medium 3D。
4,验证
检测结果默认保存为:result.pkl,可设置--pickle_result=False,将结果保存为kitt标签格式
python ./pytorch/train.py evaluate --config_path=./configs/car.fhd.config --model_dir=/path/to/model_dir --measure_time=True --batch_size=1
5,训练自定义数据
需要修改或者重写second.data.kitti_dataset以 @register_dataset 方式注册函数
训练时要修改eval.py,主要是自定义数据的类别等。
from pathlib import Path
import pickle
import time
from functools import partial
import numpy as np
from second.core import box_np_ops
from second.core import preprocess as prep
from second.data import kitti_common as kitti
from second.utils.eval import get_coco_eval_result, get_official_eval_result
from second.data.dataset import Dataset, register_dataset
from second.utils.progress_bar import progress_bar_iter as prog_bar
@register_dataset
class KittiDataset(Dataset):
NumPointFeatures = 4
def __init__(self,
root_path,
info_path,
class_names=None,
prep_func=None,
num_point_features=None):
assert info_path is not None
with open(info_path, 'rb') as f:
infos = pickle.load(f)
self._root_path = Path(root_path)
self._kitti_infos = infos
print("remain number of infos:", len(self._kitti_infos))
self._class_names = class_names
self._prep_func = prep_func
def __len__(self):
return len(self._kitti_infos)
def convert_detection_to_kitti_annos(self, detection):
class_names = self._class_names
det_image_idxes = [det["metadata"]["image_idx"] for det in detection]
gt_image_idxes = [
info["image"]["image_idx"] for info in self._kitti_infos
]
annos = []
for i in range(len(detection)):
det_idx = det_image_idxes[i]
det = detection[i]
# info = self._kitti_infos[gt_image_idxes.index(det_idx)]
info = self._kitti_infos[i]
calib = info["calib"]
rect = calib["R0_rect"]
Trv2c = calib["Tr_velo_to_cam"]
P2 = calib["P2"]
final_box_preds = det["box3d_lidar"].detach().cpu().numpy()
label_preds = det["label_preds"].detach().cpu().numpy()
scores = det["scores"].detach().cpu().numpy()
if final_box_preds.shape[0] != 0:
final_box_preds[:, 2] -= final_box_preds[:, 5] / 2
box3d_camera = box_np_ops.box_lidar_to_camera(
final_box_preds, rect, Trv2c)
locs = box3d_camera[:, :3]
dims = box3d_camera[:, 3:6]
angles = box3d_camera[:, 6]
camera_box_origin = [0.5, 1.0, 0.5]
box_corners = box_np_ops.center_to_corner_box3d(
locs, dims, angles, camera_box_origin, axis=1)
box_corners_in_image = box_np_ops.project_to_image(
box_corners, P2)
# box_corners_in_image: [N, 8, 2]
minxy = np.min(box_corners_in_image, axis=1)
maxxy = np.max(box_corners_in_image, axis=1)
bbox = np.concatenate([minxy, maxxy], axis=1)
anno = kitti.get_start_result_anno()
num_example = 0
box3d_lidar = final_box_preds
for j in range(box3d_lidar.shape[0]):
image_shape = info["image"]["image_shape"]
if bbox[j, 0] > image_shape[1] or bbox[j, 1] > image_shape[0]:
continue
if bbox[j, 2] < 0 or bbox[j, 3] < 0:
continue
bbox[j, 2:] = np.minimum(bbox[j, 2:], image_shape[::-1])
bbox[j, :2] = np.maximum(bbox[j, :2], [0, 0])
anno["bbox"].append(bbox[j])
# convert center format to kitti format
# box3d_lidar[j, 2] -= box3d_lidar[j, 5] / 2
anno["alpha"].append(
-np.arctan2(-box3d_lidar[j, 1], box3d_lidar[j, 0]) +
box3d_camera[j, 6])
anno["dimensions"].append(box3d_camera[j, 3:6])
anno["location"].append(box3d_camera[j, :3])
anno["rotation_y"].append(box3d_camera[j, 6])
anno["name"].append(class_names[int(label_preds[j])])
anno["truncated"].append(0.0)
anno["occluded"].append(0)
anno["score"].append(scores[j])
num_example += 1
if num_example != 0:
anno = {n: np.stack(v) for n, v in anno.items()}
annos.append(anno)
else:
annos.append(kitti.empty_result_anno())
num_example = annos[-1]["name"].shape[0]
annos[-1]["metadata"] = det["metadata"]
return annos
def evaluation(self, detections, output_dir):
"""
detection
When you want to eval your own dataset, you MUST set correct
the z axis and box z center.
If you want to eval by my KITTI eval function, you must
provide the correct format annotations.
ground_truth_annotations format:
{
bbox: [N, 4], if you fill fake data, MUST HAVE >25 HEIGHT!!!!!!
alpha: [N], you can use -10 to ignore it.
occluded: [N], you can use zero.
truncated: [N], you can use zero.
name: [N]
location: [N, 3] center of 3d box.
dimensions: [N, 3] dim of 3d box.
rotation_y: [N] angle.
}
all fields must be filled, but some fields can fill
zero.
"""
if "annos" not in self._kitti_infos[0]:
return None
gt_annos = [info["annos"] for info in self._kitti_infos]
dt_annos = self.convert_detection_to_kitti_annos(detections)
# firstly convert standard detection to kitti-format dt annos
z_axis = 1 # KITTI camera format use y as regular "z" axis.
z_center = 1.0 # KITTI camera box's center is [0.5, 1, 0.5]
# for regular raw lidar data, z_axis = 2, z_center = 0.5.
result_official_dict = get_official_eval_result(
gt_annos,
dt_annos,
self._class_names,
z_axis=z_axis,
z_center=z_center)
result_coco = get_coco_eval_result(
gt_annos,
dt_annos,
self._class_names,
z_axis=z_axis,
z_center=z_center)
return {
"results": {
"official": result_official_dict["result"],
"coco": result_coco["result"],
},
"detail": {
"eval.kitti": {
"official": result_official_dict["detail"],
"coco": result_coco["detail"]
}
},
}
def __getitem__(self, idx):
input_dict = self.get_sensor_data(idx)
example = self._prep_func(input_dict=input_dict)
example["metadata"] = {}
if "image_idx" in input_dict["metadata"]:
example["metadata"] = input_dict["metadata"]
if "anchors_mask" in example:
example["anchors_mask"] = example["anchors_mask"].astype(np.uint8)
return example
def get_sensor_data(self, query):
read_image = False
idx = query
if isinstance(query, dict):
read_image = "cam" in query
assert "lidar" in query
idx = query["lidar"]["idx"]
info = self._kitti_infos[idx]
res = {
"lidar": {
"type": "lidar",
"points": None,
},
"metadata": {
"image_idx": info["image"]["image_idx"],
"image_shape": info["image"]["image_shape"],
},
"calib": None,
"cam": {}
}
pc_info = info["point_cloud"]
velo_path = Path(pc_info['velodyne_path'])
if not velo_path.is_absolute():
velo_path = Path(self._root_path) / pc_info['velodyne_path']
velo_reduced_path = velo_path.parent.parent / (
velo_path.parent.stem + '_reduced') / velo_path.name
if velo_reduced_path.exists():
velo_path = velo_reduced_path
points = np.fromfile(
str(velo_path), dtype=np.float32,
count=-1).reshape([-1, self.NumPointFeatures])
res["lidar"]["points"] = points
image_info = info["image"]
image_path = image_info['image_path']
if read_image:
image_path = self._root_path / image_path
with open(str(image_path), 'rb') as f:
image_str = f.read()
res["cam"] = {
"type": "camera",
"data": image_str,
"datatype": image_path.suffix[1:],
}
calib = info["calib"]
calib_dict = {
'rect': calib['R0_rect'],
'Trv2c': calib['Tr_velo_to_cam'],
'P2': calib['P2'],
}
res["calib"] = calib_dict
if 'annos' in info:
annos = info['annos']
# we need other objects to avoid collision when sample
annos = kitti.remove_dontcare(annos)
locs = annos["location"]
dims = annos["dimensions"]
rots = annos["rotation_y"]
gt_names = annos["name"]
# rots = np.concatenate([np.zeros([locs.shape[0], 2], dtype=np.float32), rots], axis=1)
gt_boxes = np.concatenate([locs, dims, rots[..., np.newaxis]],
axis=1).astype(np.float32)
calib = info["calib"]
gt_boxes = box_np_ops.box_camera_to_lidar(
gt_boxes, calib["R0_rect"], calib["Tr_velo_to_cam"])
# only center format is allowed. so we need to convert
# kitti [0.5, 0.5, 0] center to [0.5, 0.5, 0.5]
box_np_ops.change_box3d_center_(gt_boxes, [0.5, 0.5, 0],
[0.5, 0.5, 0.5])
res["lidar"]["annotations"] = {
'boxes': gt_boxes,
'names': gt_names,
}
res["cam"]["annotations"] = {
'boxes': annos["bbox"],
'names': gt_names,
}
return res
def convert_to_kitti_info_version2(info):
"""convert kitti info v1 to v2 if possible.
"""
if "image" not in info or "calib" not in info or "point_cloud" not in info:
info["image"] = {
'image_shape': info["img_shape"],
'image_idx': info['image_idx'],
'image_path': info['img_path'],
}
info["calib"] = {
"R0_rect": info['calib/R0_rect'],
"Tr_velo_to_cam": info['calib/Tr_velo_to_cam'],
"P2": info['calib/P2'],
}
info["point_cloud"] = {
"velodyne_path": info['velodyne_path'],
}
def kitti_anno_to_label_file(annos, folder):
folder = Path(folder)
for anno in annos:
image_idx = anno["metadata"]["image_idx"]
label_lines = []
for j in range(anno["bbox"].shape[0]):
label_dict = {
'name': anno["name"][j],
'alpha': anno["alpha"][j],
'bbox': anno["bbox"][j],
'location': anno["location"][j],
'dimensions': anno["dimensions"][j],
'rotation_y': anno["rotation_y"][j],
'score': anno["score"][j],
}
label_line = kitti.kitti_result_line(label_dict)
label_lines.append(label_line)
label_file = folder / f"{kitti.get_image_index_str(image_idx)}.txt"
label_str = '\n'.join(label_lines)
with open(label_file, 'w') as f:
f.write(label_str)
def _read_imageset_file(path):
with open(path, 'r') as f:
lines = f.readlines()
return [int(line) for line in lines]
def _calculate_num_points_in_gt(data_path,
infos,
relative_path,
remove_outside=True,
num_features=4):
for info in infos:
pc_info = info["point_cloud"]
image_info = info["image"]
calib = info["calib"]
if relative_path:
v_path = str(Path(data_path) / pc_info["velodyne_path"])
else:
v_path = pc_info["velodyne_path"]
points_v = np.fromfile(
v_path, dtype=np.float32, count=-1).reshape([-1, num_features])
rect = calib['R0_rect']
Trv2c = calib['Tr_velo_to_cam']
P2 = calib['P2']
if remove_outside:
points_v = box_np_ops.remove_outside_points(
points_v, rect, Trv2c, P2, image_info["image_shape"])
# points_v = points_v[points_v[:, 0] > 0]
annos = info['annos']
num_obj = len([n for n in annos['name'] if n != 'DontCare'])
# annos = kitti.filter_kitti_anno(annos, ['DontCare'])
dims = annos['dimensions'][:num_obj]
loc = annos['location'][:num_obj]
rots = annos['rotation_y'][:num_obj]
gt_boxes_camera = np.concatenate([loc, dims, rots[..., np.newaxis]],
axis=1)
gt_boxes_lidar = box_np_ops.box_camera_to_lidar(
gt_boxes_camera, rect, Trv2c)
indices = box_np_ops.points_in_rbbox(points_v[:, :3], gt_boxes_lidar)
num_points_in_gt = indices.sum(0)
num_ignored = len(annos['dimensions']) - num_obj
num_points_in_gt = np.concatenate(
[num_points_in_gt, -np.ones([num_ignored])])
annos["num_points_in_gt"] = num_points_in_gt.astype(np.int32)
def create_kitti_info_file(data_path, save_path=None, relative_path=True):
imageset_folder = Path(__file__).resolve().parent / "ImageSets"
train_img_ids = _read_imageset_file(str(imageset_folder / "train.txt"))
val_img_ids = _read_imageset_file(str(imageset_folder / "val.txt"))
test_img_ids = _read_imageset_file(str(imageset_folder / "test.txt"))
print("Generate info. this may take several minutes.")
if save_path is None:
save_path = Path(data_path)
else:
save_path = Path(save_path)
kitti_infos_train = kitti.get_kitti_image_info(
data_path,
training=True,
velodyne=True,
calib=True,
image_ids=train_img_ids,
relative_path=relative_path)
_calculate_num_points_in_gt(data_path, kitti_infos_train, relative_path)
filename = save_path / 'kitti_infos_train.pkl'
print(f"Kitti info train file is saved to {filename}")
with open(filename, 'wb') as f:
pickle.dump(kitti_infos_train, f)
kitti_infos_val = kitti.get_kitti_image_info(
data_path,
training=True,
velodyne=True,
calib=True,
image_ids=val_img_ids,
relative_path=relative_path)
_calculate_num_points_in_gt(data_path, kitti_infos_val, relative_path)
filename = save_path / 'kitti_infos_val.pkl'
print(f"Kitti info val file is saved to {filename}")
with open(filename, 'wb') as f:
pickle.dump(kitti_infos_val, f)
filename = save_path / 'kitti_infos_trainval.pkl'
print(f"Kitti info trainval file is saved to {filename}")
with open(filename, 'wb') as f:
pickle.dump(kitti_infos_train + kitti_infos_val, f)
kitti_infos_test = kitti.get_kitti_image_info(
data_path,
training=False,
label_info=False,
velodyne=True,
calib=True,
image_ids=test_img_ids,
relative_path=relative_path)
filename = save_path / 'kitti_infos_test.pkl'
print(f"Kitti info test file is saved to {filename}")
with open(filename, 'wb') as f:
pickle.dump(kitti_infos_test, f)
def _create_reduced_point_cloud(data_path,
info_path,
save_path=None,
back=False):
with open(info_path, 'rb') as f:
kitti_infos = pickle.load(f)
for info in prog_bar(kitti_infos):
pc_info = info["point_cloud"]
image_info = info["image"]
calib = info["calib"]
v_path = pc_info['velodyne_path']
v_path = Path(data_path) / v_path
points_v = np.fromfile(
str(v_path), dtype=np.float32, count=-1).reshape([-1, 4])
rect = calib['R0_rect']
P2 = calib['P2']
Trv2c = calib['Tr_velo_to_cam']
# first remove z < 0 points
# keep = points_v[:, -1] > 0
# points_v = points_v[keep]
# then remove outside.
if back:
points_v[:, 0] = -points_v[:, 0]
points_v = box_np_ops.remove_outside_points(points_v, rect, Trv2c, P2,
image_info["image_shape"])
if save_path is None:
save_filename = v_path.parent.parent / (
v_path.parent.stem + "_reduced") / v_path.name
# save_filename = str(v_path) + '_reduced'
if back:
save_filename += "_back"
else:
save_filename = str(Path(save_path) / v_path.name)
if back:
save_filename += "_back"
with open(save_filename, 'w') as f:
points_v.tofile(f)
def create_reduced_point_cloud(data_path,
train_info_path=None,
val_info_path=None,
test_info_path=None,
save_path=None,
with_back=False):
if train_info_path is None:
train_info_path = Path(data_path) / 'kitti_infos_train.pkl'
if val_info_path is None:
val_info_path = Path(data_path) / 'kitti_infos_val.pkl'
if test_info_path is None:
test_info_path = Path(data_path) / 'kitti_infos_test.pkl'
_create_reduced_point_cloud(data_path, train_info_path, save_path)
_create_reduced_point_cloud(data_path, val_info_path, save_path)
_create_reduced_point_cloud(data_path, test_info_path, save_path)
if with_back:
_create_reduced_point_cloud(
data_path, train_info_path, save_path, back=True)
_create_reduced_point_cloud(
data_path, val_info_path, save_path, back=True)
_create_reduced_point_cloud(
data_path, test_info_path, save_path, back=True)
if __name__ == "__main__":
fire.Fire()
修改second.utils.eval class_to_name = {
0: 'Car',
1: 'Pedestrian',
2: 'Cyclist',
3: 'Van',
4: 'Person_sitting',
5: 'car',
6: 'tractor',
7: 'trailer',
}
def get_official_eval_result(gt_annos,
dt_annos,
current_classes,
difficultys=[0, 1, 2],
z_axis=1,
z_center=1.0):
"""
gt_annos and dt_annos must contains following keys:
[bbox, location, dimensions, rotation_y, score]
"""
overlap_mod = np.array([[0.7, 0.5, 0.5, 0.7, 0.5, 0.7, 0.7, 0.7],
[0.7, 0.5, 0.5, 0.7, 0.5, 0.7, 0.7, 0.7],
[0.7, 0.5, 0.5, 0.7, 0.5, 0.7, 0.7, 0.7]])
overlap_easy = np.array([[0.7, 0.5, 0.5, 0.7, 0.5, 0.5, 0.5, 0.5],
[0.5, 0.25, 0.25, 0.5, 0.25, 0.5, 0.5, 0.5],
[0.5, 0.25, 0.25, 0.5, 0.25, 0.5, 0.5, 0.5]])
min_overlaps = np.stack([overlap_mod, overlap_easy], axis=0) # [2, 3, 5]
class_to_name = {
0: 'Car',
1: 'Pedestrian',
2: 'Cyclist',
3: 'Van',
4: 'Person_sitting',
5: 'car',
6: 'tractor',
7: 'trailer',
}
name_to_class = {v: n for n, v in class_to_name.items()}
if not isinstance(current_classes, (list, tuple)):
current_classes = [current_classes]
current_classes_int = []
for curcls in current_classes:
if isinstance(curcls, str):
current_classes_int.append(name_to_class[curcls])
else:
current_classes_int.append(curcls)
current_classes = current_classes_int
min_overlaps = min_overlaps[:, :, current_classes]
result = ''
# check whether alpha is valid
compute_aos = False
for anno in dt_annos:
if anno['alpha'].shape[0] != 0:
if anno['alpha'][0] != -10:
compute_aos = True
break
metrics = do_eval_v3(
gt_annos,
dt_annos,
current_classes,
min_overlaps,
compute_aos,
difficultys,
z_axis=z_axis,
z_center=z_center)
detail = {}
for j, curcls in enumerate(current_classes):
# mAP threshold array: [num_minoverlap, metric, class]
# mAP result: [num_class, num_diff, num_minoverlap]
class_name = class_to_name[curcls]
detail[class_name] = {}
for i in range(min_overlaps.shape[0]):
mAPbbox = get_mAP(metrics["bbox"]["precision"][j, :, i])
mAPbev = get_mAP(metrics["bev"]["precision"][j, :, i])
mAP3d = get_mAP(metrics["3d"]["precision"][j, :, i])
detail[class_name][f"bbox@{min_overlaps[i, 0, j]:.2f}"] = mAPbbox.tolist()
detail[class_name][f"bev@{min_overlaps[i, 1, j]:.2f}"] = mAPbev.tolist()
detail[class_name][f"3d@{min_overlaps[i, 2, j]:.2f}"] = mAP3d.tolist()
result += print_str(
(f"{class_to_name[curcls]} "
"AP(Average Precision)@{:.2f}, {:.2f}, {:.2f}:".format(*min_overlaps[i, :, j])))
mAPbbox = ", ".join(f"{v:.2f}" for v in mAPbbox)
mAPbev = ", ".join(f"{v:.2f}" for v in mAPbev)
mAP3d = ", ".join(f"{v:.2f}" for v in mAP3d)
result += print_str(f"bbox AP:{mAPbbox}")
result += print_str(f"bev AP:{mAPbev}")
result += print_str(f"3d AP:{mAP3d}")
if compute_aos:
mAPaos = get_mAP(metrics["bbox"]["orientation"][j, :, i])
detail[class_name][f"aos"] = mAPaos.tolist()
mAPaos = ", ".join(f"{v:.2f}" for v in mAPaos)
result += print_str(f"aos AP:{mAPaos}")
return {
"result": result,
"detail": detail,
}