为了适配Bubbliiiing的yolo网络代码训练自己的数据集,使用不同的标注软件会生成json或xml两种标注文件,为了方便我整理了一份相互转化的代码,不废话直接贴:
import os
import numpy as np
import json
from shutil import copyfile
from tqdm import tqdm
from xml.etree.ElementTree import parse
# ##################################################################
# 获取地址文件名,例如输入 "./data/test1.jpg" 返回 "test1"
# ##################################################################
def get_root_file_name(root1):
s0, s1 = -1, -1
for i in range(len(root1)):
if root1[i] == "/":
s0 = i
if root1[i] == ".":
s1 = i
return root1[s0+1:s1]
# ##################################################################
# json转xml文件,
# json_file: json文件地址
# xml_root: 转换完xml文件存储地址
# ##################################################################
def json2xml(json_file, xml_root):
if not json_file.endswith(".json"):
return
if not os.path.exists(json_file):
return
json_data = json.load(open(json_file,"r",encoding="utf-8"))
file_name = get_root_file_name(json_file)
with open(xml_root+file_name+".xml", 'w', encoding='utf-8') as xml_f:
xml_f.write('<annotation>\n')
xml_f.write(' <folder>' + 'JPEGImages' + '</folder>\n')#训练时我的训练图片是放在JPEGImages下的
xml_f.write(' <filename>' + file_name + ".jpg" + '</filename>\n')
xml_f.write(' <source>\n')
xml_f.write(' <database>The Defect Detection</database>\n')
xml_f.write(' <annotation>Defect Detection</annotation>\n')
xml_f.write(' <image>flickr</image>\n')
xml_f.write(' <flickrid>NULL</flickrid>\n')
xml_f.write(' </source>\n')
xml_f.write(' <size>\n')
xml_f.write(' <width>'+ str(json_data["imageWidth"]) + '</width>\n')
xml_f.write(' <height>'+ str(json_data["imageHeight"]) + '</height>\n')
xml_f.write(' <depth>' + str(3) + '</depth>\n')
xml_f.write(' </size>\n')
xml_f.write(' <segmented>0</segmented>\n')
for multi in json_data["shapes"]:
points = np.array(multi["points"])
xmin = min(points[:,0])
xmax = max(points[:,0])
ymin = min(points[:,1])
ymax = max(points[:,1])
label = multi["label"]
if xmax <= xmin:
pass
elif ymax <= ymin:
pass
else:
xml_f.write(' <object>\n')
# xml.write(' <name>'+json_file["shapes"][0]["label"]+'</name>\n')
xml_f.write(' <name>'+label+'</name>\n')
xml_f.write(' <pose>Unspecified</pose>\n')
xml_f.write(' <truncated>1</truncated>\n')
xml_f.write(' <difficult>0</difficult>\n')
xml_f.write(' <bndbox>\n')
xml_f.write(' <xmin>' + str(xmin) + '</xmin>\n')
xml_f.write(' <ymin>' + str(ymin) + '</ymin>\n')
xml_f.write(' <xmax>' + str(xmax) + '</xmax>\n')
xml_f.write(' <ymax>' + str(ymax) + '</ymax>\n')
xml_f.write(' </bndbox>\n')
xml_f.write(' </object>\n')
# print(label)
xml_f.write('</annotation>')
# ##################################################################
# xml转json文件
# xml_ file: xml文件地址
# json_root: 转换完json文件存储地址
# ##################################################################
def xml2json(xml_file, json_root):
file_name = get_root_file_name(xml_file)
if not xml_file.endswith(".xml"):
return
if not os.path.exists(xml_file):
return
tree = parse(xml_file) #获取ElementTree
root = tree.getroot() #获取根元素
for obj in root.iter('size'):
imageHeight = obj.find('width').text
imageWidth = obj.find('height').text
labels, x_0, y_0, x_1, y_1 = [], [], [], [], []
for obj in root.iter('object'):
label_now = obj.find('name').text
labels.append(label_now)
for pixels in obj.iter('bndbox'):
x_0.append(pixels.find('xmin').text)
x_1.append(pixels.find('xmax').text)
y_0.append(pixels.find('ymin').text)
y_1.append(pixels.find('ymax').text)
# print(labels)
# print(x_0, x_1, y_0, y_1)
with open(json_root+file_name+".json", 'w', encoding='utf-8') as json_f:
json_f.write("{\n")
json_f.write(" \"version\": \"0.3.3\",\n")
json_f.write(" \"flags\": {},\n")
json_f.write(" \"shapes\": [\n")
for i in range(len(labels)):
json_f.write(" {\n")
json_f.write(" \"label\": \""+labels[i]+"\",\n")
json_f.write(" \"text\": \"\",\n")
json_f.write(" \"points\": [\n")
json_f.write(" [\n")
json_f.write(" "+str(x_0[i])+",\n")
json_f.write(" "+str(y_0[i])+"\n")
json_f.write(" ],\n")
json_f.write(" [\n")
json_f.write(" "+str(x_1[i])+",\n")
json_f.write(" "+str(y_1[i])+"\n")
json_f.write(" ]\n")
json_f.write(" ],\n")
json_f.write(" \"group_id\": null,\n")
json_f.write(" \"shape_type\": \"rectangle\",\n")
json_f.write(" \"flags\": {}\n")
json_f.write(" }")
if i != len(labels)-1:
json_f.write(",")
json_f.write("\n")
json_f.write(" ],\n")
json_f.write(" \"imagePath\": \""+file_name+".jpg\",\n")
json_f.write(" \"imageData\": null,\n")
json_f.write(" \"imageHeight\": "+str(imageHeight)+",\n")
json_f.write(" \"imageWidth\": "+str(imageWidth)+"\n")
json_f.write("}\n")
# ##################################################################
# 批量json2xml转换,默认删除文件名空格并将图像文件重新复制一份
# json_root: json文件存储地址,只会检索json文件
# jpg_root: 源图像存储地址
# new_xml_root: 生成xml文件存储地址
# new_jpg_root: 复制图像地址,若为""则不复制
# isDelSpace: 是否删除源文件空格,默认为删除,适配VOC数据集标注模式
# ##################################################################
def json2xml_batch(json_root, jpg_root, new_xml_root, new_jpg_root, isDelSpace=True):
# 容错操作
if not os.path.exists(json_root) or not os.path.exists(jpg_root):
print("No such folder!")
return
if not os.path.exists(new_xml_root):
os.makedirs(new_xml_root)
if not os.path.exists(new_jpg_root) and new_jpg_root != "":
os.makedirs(new_jpg_root)
# 转换xml文件
print("Start conterting annotations...")
for root,dirs,files in os.walk(json_root):
for file in tqdm(files):
if not file.endswith(".json"):
continue
json_file = file
if isDelSpace:
file_name = get_root_file_name(file.replace(" ",""))
else:
file_name = get_root_file_name(file)
# print(json_root+json_file)
json_data = json.load(open(json_root+json_file,"r",encoding="utf-8"))
with open(new_xml_root+file_name+".xml", 'w', encoding='utf-8') as xml_f:
xml_f.write('<annotation>\n')
xml_f.write(' <folder>' + 'JPEGImages' + '</folder>\n')#训练时我的训练图片是放在JPEGImages下的
xml_f.write(' <filename>' + file_name + ".jpg" + '</filename>\n')
xml_f.write(' <source>\n')
xml_f.write(' <database>The Defect Detection</database>\n')
xml_f.write(' <annotation>Defect Detection</annotation>\n')
xml_f.write(' <image>flickr</image>\n')
xml_f.write(' <flickrid>NULL</flickrid>\n')
xml_f.write(' </source>\n')
xml_f.write(' <size>\n')
xml_f.write(' <width>'+ str(json_data["imageWidth"]) + '</width>\n')
xml_f.write(' <height>'+ str(json_data["imageHeight"]) + '</height>\n')
xml_f.write(' <depth>' + str(3) + '</depth>\n')
xml_f.write(' </size>\n')
xml_f.write(' <segmented>0</segmented>\n')
for multi in json_data["shapes"]:
points = np.array(multi["points"])
xmin = min(points[:,0])
xmax = max(points[:,0])
ymin = min(points[:,1])
ymax = max(points[:,1])
label = multi["label"]
if xmax <= xmin:
pass
elif ymax <= ymin:
pass
else:
xml_f.write(' <object>\n')
# xml.write(' <name>'+json_file["shapes"][0]["label"]+'</name>\n')
xml_f.write(' <name>'+label+'</name>\n')
xml_f.write(' <pose>Unspecified</pose>\n')
xml_f.write(' <truncated>1</truncated>\n')
xml_f.write(' <difficult>0</difficult>\n')
xml_f.write(' <bndbox>\n')
xml_f.write(' <xmin>' + str(xmin) + '</xmin>\n')
xml_f.write(' <ymin>' + str(ymin) + '</ymin>\n')
xml_f.write(' <xmax>' + str(xmax) + '</xmax>\n')
xml_f.write(' <ymax>' + str(ymax) + '</ymax>\n')
xml_f.write(' </bndbox>\n')
xml_f.write(' </object>\n')
# print(label)
xml_f.write('</annotation>')
print("Conterting annotations done!")
# 复制图像
print("Copy images...")
for root,dirs,files in os.walk(jpg_root):
for file in tqdm(files):
if not file.endswith(".jpg"):
continue
if isDelSpace:
copyfile(jpg_root+file, new_jpg_root+file.replace(" ",""))
else:
copyfile(jpg_root+file, new_jpg_root+file)
# print(new_jpg_root+file)
print("Images copied!")
# ##################################################################
# 批量json2xml转换,默认删除文件名空格并将图像文件重新复制一份
# json_root: json文件存储地址,只会检索json文件
# jpg_root: 源图像存储地址
# new_xml_root: 生成xml文件存储地址
# new_jpg_root: 复制图像地址,若为""则不复制
# ##################################################################
def xml2json_batch(xml_root, jpg_root, new_json_root, new_jpg_root):
# 容错操作
if not os.path.exists(xml_root) or not os.path.exists(jpg_root):
print("No such folder!")
return
if not os.path.exists(new_json_root):
os.makedirs(new_json_root)
if not os.path.exists(new_jpg_root) and new_jpg_root != "":
os.makedirs(new_jpg_root)
# 转换xml文件
print("Start conterting annotations...")
for root,dirs,files in os.walk(xml_root):
for file in tqdm(files):
if not file.endswith(".xml"):
continue
xml2json(xml_root+file, new_json_root)
print("Conterting annotations done!")
# 复制图像
print("Copy images...")
for root,dirs,files in os.walk(jpg_root):
for file in tqdm(files):
if not file.endswith(".jpg"):
continue
copyfile(jpg_root+file, new_jpg_root+file)
print("Images copied!")
if __name__ == "__main__":
# json批量转xml
json_root = "./json_file/"
jpg_root = "./json_file/"
new_xml_root = "./xml_file/"
new_jpg_root = "./xml_file/"
json2xml_batch(json_root, jpg_root, new_xml_root, new_jpg_root)
# xml批量转json
# xml_root = "./xml_file/"
# jpg_root = "./xml_file/"
# new_json_root = "./json_file/"
# new_jpg_root = "./json_file/"
# xml2json_batch(xml_root, jpg_root, new_json_root, new_jpg_root)
注释都写好了,主打一个拿过去就能直接跑!