python脚本清洗数据

1，目标检测试集挑选，已经有VOC格式的JPEGImages，Annotations，和testval.txt列表

JPEGImages/62F1DBDB-4798-E8A5-4FA5-14C9E19ADA5B.jpg Annotations/62F1DBDB-4798-E8A5-4FA5-14C9E19ADA5B.xml
JPEGImages/2C042431-5CBE-9D69-646C-B5930204E15B.jpg Annotations/2C042431-5CBE-9D69-646C-B5930204E15B.xml

生成导出img和对应的label-C X1 Y1 W H

import os
import cv2
from PIL import Image
from random import randint
import xml.etree.ElementTree as ET

names = ["A","B","C"]

def drawRealBox(xml, w, h):
    xmlfile=os.path.join("./Annotations", xml)
    print xmlfile
    txtfile=os.path.join("./labels", xml.replace('xml','txt'))
    f = open(txtfile,'w')
    tree = ET.parse(xmlfile)
    objs = tree.findall('object')
    for obj in objs:
        box = obj.find('bndbox')
        name = obj.find('name').text

        x1=int(box.find('xmin').text)
        y1=int(box.find('ymin').text)
        x2=int(box.find('xmax').text)
        y2=int(box.find('ymax').text)
	
        cx = (x1)/float(w)
        cy = (y1)/float(h)
        cw = (x2-x1)/float(w)
        ch = (y2-y1)/float(h)
        index = names.index(name)
        f.write("%d %f %f %f %f\n" % (index, cx,cy,cw,ch))
    f.close()

allimg = os.listdir('./JPEGImages')
file_ynh = open("./testval.txt",'r')
lines = file_ynh.readlines()
for line in lines:
    line.replace("\n","")
    txtpath = line.split(' ')[0]
    txtimg = txtpath.split('/')[-1]
    jpg = cv2.imread(os.path.join('./JPEGImages', txtimg))
    cv2.imwrite(os.path.join('./image', txtimg),jpg)
    h,w_,C=jpg.shape
    xml = txtimg.replace('jpg','xml')
    drawRealBox(xml, w_, h)
file_ynh.close()

python脚本清洗数据

猜你喜欢