制作VOC数据集,根据groundtruth文件生成xml文件

图片保存在JPEGImages文件夹里,GroundTruth文件在gt文件夹下,命名与对应的图片名称一样,形式如下:
648,454,825,551
516,431,623,543
一行为一个BoundingBox,分别为 xmin,ymin,xmax,ymax
#! /usr/bin/python

import os, sys
import glob
from PIL import Image

#ICDAR image path
src_img_dir = "./JPEGImages"
# ICDAR ground truth path
src_txt_dir = "./gt"
src_ann_dir = "./Annotations"
img_Lists = glob.glob(src_img_dir + '/*.jpg')

img_basenames = [] # e.g. 100.jpg
for item in img_Lists:
    img_basenames.append(os.path.basename(item))

img_names = [] # e.g. 100
for item in img_basenames:
    temp1, temp2 = os.path.splitext(item)
    img_names.append(temp1)

for img in img_names:
    im = Image.open((src_img_dir + '/' + img + '.jpg'))
    width, height = im.size
    # open the crospronding txt file
    gt = open(src_txt_dir + '/' + img + '.txt').read().splitlines()

    # write in xml file
    #os.mknod(src_txt_dir + '/' + img + '.xml')
    xml_file = open((src_ann_dir + '/' + img + '.xml'), 'w')
    xml_file.write('<annotation>\n')
    xml_file.write('\t<folder>simple</folder>\n')
    xml_file.write('\t<filename>' + str(img) + '.jpg' + '</filename>\n')
    xml_file.write('\t<source>\n')
    xml_file.write('\t\t<database>' +'The simple Database' + '</database>\n')
    xml_file.write('\t\t<annotation>' +'simple' + '</annotation>\n')
    xml_file.write('\t\t<image>flickr</image>\n')
    xml_file.write('\t\t<flickrid>325991873</flickrid>\n')
    xml_file.write('\t</source>\n')
    xml_file.write('\t<owner>\n')
    xml_file.write('\t\t<flickrid>archin</flickrid>\n')
    xml_file.write('\t\t<name>?</name>\n')
    xml_file.write('\t</owner>\n')
    xml_file.write('\t<size>\n')
    xml_file.write('\t\t<width>' + str(width) + '</width>\n')
    xml_file.write('\t\t<height>' + str(height) + '</height>\n')
    xml_file.write('\t\t<depth>3</depth>\n')
    xml_file.write('\t</size>\n')
    xml_file.write('\t<segmented>0</segmented>\n')
    # write the region of text on xml file
    for img_each_label in gt:
        spt = img_each_label.split(',')
        xml_file.write('\t<object>\n')
        xml_file.write('\t\t<name>text</name>\n')
        xml_file.write('\t\t<pose>Unspecified</pose>\n')
        xml_file.write('\t\t<truncated>0</truncated>\n')
        xml_file.write('\t\t<difficult>0</difficult>\n')
        xml_file.write('\t\t<bndbox>\n')
        xml_file.write('\t\t\t<xmin>' + str(spt[0]) + '</xmin>\n')
        xml_file.write('\t\t\t<ymin>' + str(spt[1]) + '</ymin>\n')
        xml_file.write('\t\t\t<xmax>' + str(spt[2]) + '</xmax>\n')
        xml_file.write('\t\t\t<ymax>' + str(spt[3]) + '</ymax>\n')
        xml_file.write('\t\t</bndbox>\n')
        xml_file.write('\t</object>\n')

    xml_file.write('</annotation>')

猜你喜欢

转载自blog.csdn.net/kapok_lalala/article/details/78990996