数据处理:csv标注转换成xml
csv中部分内容:(图片名称,xmin,ymin,xmax,ymax,class,w,h)
test_0.jpg,1653,1290,1773,1535,object,2448,3264
test_0.jpg,1485,1221,1648,1544,object,2448,3264
test_0.jpg,1345,1295,1481,1540,object,2448,3264
test_0.jpg,1221,1290,1341,1543,object,2448,3264
test_0.jpg,1079,1332,1216,1537,object,2448,3264
test_0.jpg,927,1285,1069,1531,object,2448,3264
test_0.jpg,679,1279,845,1539,object,2448,3264
test_0.jpg,2187,2536,2276,2764,object,2448,3264
test_0.jpg,232,519,361,774,object,2448,3264
test_0.jpg,5,521,225,774,object,2448,3264
test_1.jpg,457,436,574,526,object,1920,2560
test_1.jpg,537,1949,612,2093,object,1920,2560
test_1.jpg,436,2020,534,2095,object,1920,2560
test_1.jpg,1774,1751,1870,1854,object,1920,2560
test_1.jpg,1679,1759,1769,1852,object,1920,2560
test_1.jpg,1578,1762,1674,1852,object,1920,2560
test_1.jpg,1470,1752,1566,1863,object,1920,2560
test_1.jpg,1403,1747,1465,1872,object,1920,2560
test_1.jpg,1231,1764,1330,1869,object,1920,2560
test_1.jpg,1130,1771,1224,1874,object,1920,2560
test_1.jpg,1028,1771,1121,1879,object,1920,2560
test_1.jpg,924,1774,1017,1872,object,1920,2560
test_1.jpg,847,1839,921,1881,object,1920,2560
test_1.jpg,844,1740,919,1833,object,1920,2560
test_1.jpg,759,1839,843,1890,object,1920,2560
test_1.jpg,764,1771,834,1838,object,1920,2560
想要转换的xml格式:
<annotation>
<filename>8a0004.jpg</filename>
<size>
<width>960</width>
<height>1280</height>
<depth>3</depth>
</size>
<object>
<name>object</name>
<bndbox>
<xmin>314</xmin>
<ymin>366</ymin>
<xmax>427</xmax>
<ymax>430</ymax>
</bndbox>
</object>
<object>
<name>object</name>
<bndbox>
<xmin>805</xmin>
<ymin>9</ymin>
<xmax>960</xmax>
<ymax>277</ymax>
</bndbox>
</object>
</annotation>
转换代码:
import csv
import os
from xml.dom import minidom
# 逐行读取csv文件
def create_xml(filename, bboxs,row):
width = row[6]
height = row[7]
depth = 3
# 1.创建DOM树对象
dom = minidom.Document()
# 2.创建根节点。每次都要用DOM对象来创建任何节点。
root_node = dom.createElement('annotation')
# 3.用DOM对象添加根节点
dom.appendChild(root_node)
filename_node = dom.createElement('filename')
root_node.appendChild(filename_node)
# 也用DOM创建文本节点,把文本节点(文字内容)看成子节点
name_text = dom.createTextNode(filename)
# 用添加了文本的节点对象(看成文本节点的父节点)添加文本节点
filename_node.appendChild(name_text)
# size
size_node = dom.createElement('size')
root_node.appendChild(size_node)
width_node = dom.createElement('width')
height_node = dom.createElement('height')
depth_node = dom.createElement('depth')
# width
size_node.appendChild(width_node)
width_text = dom.createTextNode(str(width))
width_node.appendChild(width_text)
# height
size_node.appendChild(height_node)
height_text = dom.createTextNode(str(height))
height_node.appendChild(height_text)
# depth
size_node.appendChild(depth_node)
depth_text = dom.createTextNode(str(depth))
depth_node.appendChild(depth_text)
for bbox in bboxs:
# 创建obejct
object_node = dom.createElement('object')
root_node.appendChild(object_node)
# 创建类别name
name_node = dom.createElement('name')
name_text = dom.createTextNode('object')
name_node.appendChild(name_text)
object_node.appendChild(name_node)
# 创建bndbox
# bbox [xmin, ymin, width, height]
# bbox = ast.literal_eval(bbox)
xmin, ymin = bbox[0], bbox[1]
xmax, ymax = bbox[2], bbox[3]
bndbox = dom.createElement('bndbox')
object_node.appendChild(bndbox)
# xmin
xmin_node = dom.createElement('xmin')
xmin_text = dom.createTextNode(str(xmin))
xmin_node.appendChild(xmin_text)
bndbox.appendChild(xmin_node)
# ymin
ymin_node = dom.createElement('ymin')
ymin_text = dom.createTextNode(str(ymin))
ymin_node.appendChild(ymin_text)
bndbox.appendChild(ymin_node)
# xmax
xmax_node = dom.createElement('xmax')
xmax_text = dom.createTextNode(str(xmax))
xmax_node.appendChild(xmax_text)
bndbox.appendChild(xmax_node)
# ymax
ymax_node = dom.createElement('ymax')
ymax_text = dom.createTextNode(str(ymax))
ymax_node.appendChild(ymax_text)
bndbox.appendChild(ymax_node)
# 每一个结点对象(包括dom对象本身)都有输出XML内容的方法,如:toxml()--字符串, toprettyxml()--美化树形格式。
try:
with open(os.path.join(xml_dir, filename) + '.xml', 'w', encoding='UTF-8') as fh:
# 4.writexml()第一个参数是目标文件对象,第二个参数是根节点的缩进格式,第三个参数是其他子节点的缩进格式,
# 第四个参数制定了换行格式,第五个参数制定了xml内容的编码。
dom.writexml(fh, indent='', addindent='\t', newl='\n', encoding='UTF-8')
# print('写入xml OK!')
except Exception as err:
print('错误信息:{0}'.format(err))
def main():
with open(csv_filename, 'r', encoding="utf-8") as csvfile:
reader = csv.reader(csvfile)
# reader = csv.DictReader(csvfile)
# 自动获取第一张照片的文件名,并设置为last_image
last_image = 'test_0.jpg'
img_num = 1
bboxs = []
for row in reader:
# print(row)
# print(row[0])
if row[0] == last_image:
# 叠加bbox [xmin, ymin, width, height]
# bboxs.append(row['bbox'])
box = [row[1], row[2], row[3], row[4]]
# print(box)
bboxs.append(box)
elif row[0] != last_image:
# 创建xml文件
create_xml(last_image, bboxs,row)
last_image = row[0]
img_num += 1
# 重置bbox
bboxs.clear()
box = [row[1], row[2], row[3], row[4]]
bboxs.append(box)
print(img_num)
print('写入xml OK!')
if __name__ == '__main__':
# 文件路径
xml_dir = '/home/hub/wsy/SKU110K_CVPR19/SKU110K/annotations/val_xml'#想要写入的xml文件夹
csv_filename = os.path.join('/home/hub/wsy/SKU110K_CVPR19/SKU110K/annotations', 'annotations_val.csv')#已经存在的csv路径
main()
xml转csv的博客:https://blog.csdn.net/poppyty/article/details/115342231