1. 构建VOC文件夹
import os
def make_voc_dir():
# labels 目录若不存在,创建labels目录。若存在,则清空目录
if not os.path.exists('../VOC2007/Annotations'):
os.makedirs('../VOC2007/Annotations')
if not os.path.exists('../VOC2007/ImageSets'):
os.makedirs('../VOC2007/ImageSets')
os.makedirs('../VOC2007/ImageSets/Main')
if not os.path.exists('../VOC2007/JPEGImages'):
os.makedirs('../VOC2007/JPEGImages')
if __name__ == '__main__':
make_voc_dir()
2. 将SYSU数据集的所有图片复制到VOC2007/JPEGImages
3. 生成XML标注文件
import os
import numpy as np
import scipy.io as sio
import shutil
from lxml.etree import Element, SubElement, tostring
from xml.dom.minidom import parseString
def process_annotations(root_dir, VOCRoot):
annotation_dir = os.path.join(root_dir, 'annotation')
imgs = sio.loadmat(os.path.join(
annotation_dir, 'Images.mat'))['Img'].squeeze()
imnames = []
imnames_unique = []
all_bboxes = []
for im_name, _, boxes in imgs:
filename = str(im_name[0]) # 's14859.jpg'
bboxes = np.asarray([b[0][0] for b in boxes[0]]) # <class 'tuple'>: (9, 4)
valid_index = np.where((bboxes[:, 2] > 0) & (bboxes[:, 3] > 0))[0]
assert valid_index.size > 0, \
'Warning: {} has no valid boxes.'.format(filename)
bboxes = bboxes[valid_index].astype(np.int32)
node_root = Element('annotation')
node_folder = SubElement(node_root, 'folder')
node_folder.text = 'JPEGImages'
node_filename = SubElement(node_root, 'filename')
node_filename.text = 'VOC2007/JPEGImages/%s' % filename
node_size = SubElement(node_root, 'size')
node_width = SubElement(node_size, 'width')
node_width.text = '%s' % 640
node_height = SubElement(node_size, 'height')
node_height.text = '%s' % 480
node_depth = SubElement(node_size, 'depth')
node_depth.text = '3'
for box_idx, box in enumerate(boxes.T):
box = box[0][0][0]
xmin = box[0] + 1
ymin = box[1] + 1
obj_width= box[2]
obj_height = box[3]
xmax = xmin + obj_width
ymax = ymin + obj_height
difficult = 0
if obj_height <= 4 or obj_width <= 4:
difficult = 1
node_object = SubElement(node_root, 'object')
node_name = SubElement(node_object, 'name')
node_name.text = 'person'
node_difficult = SubElement(node_object, 'difficult')
node_difficult.text = '%s' % difficult
node_bndbox = SubElement(node_object, 'bndbox')
node_xmin = SubElement(node_bndbox, 'xmin')
node_xmin.text = '%s' % xmin
node_ymin = SubElement(node_bndbox, 'ymin')
node_ymin.text = '%s' % ymin
node_xmax = SubElement(node_bndbox, 'xmax')
node_xmax.text = '%s' % xmax
node_ymax = SubElement(node_bndbox, 'ymax')
node_ymax.text = '%s' % ymax
node_name = SubElement(node_object, 'pose')
node_name.text = 'Unspecified'
node_name = SubElement(node_object, 'truncated')
node_name.text = '0'
image_path = VOCRoot + '/JPEGImages/' + filename
xml = tostring(node_root, pretty_print=True) # 'annotation'
dom = parseString(xml)
xml_name = filename.replace('.jpg', '.xml')
xml_path = VOCRoot + '/Annotations/' + xml_name
with open(xml_path, 'wb') as f:
f.write(xml)
if __name__ == '__main__':
root_dir = '../sysu'
VOCRoot = '../VOC2007'
print('Processing the mat files...')
process_annotations(root_dir, VOCRoot)