目录
引言
最近在做自己的数据集,发现采集到的图像尺寸很大(大部分是手机拍摄的)、图片格式不统一。
我们在做深度学习和机器学习的时候,自己制作数据集格式应该是统一的,本文的脚本的功能就是对数据集中的图像进行统一式整理。
功能包括:
批量图像重命名、批量图像尺寸转换、批量图像剪切、批量图像格式转换、测试图像大小、测试标签情况。
程序介绍
程序1:批量调整图像尺寸
功能:在编辑器内运行,批量resize图像,并另保存。
函数输入:image_path=image_path, save_path=save_path, x=1920, y=1080, quality=95, dpi_x=72, dpi_y=72
对应:输入图像路径、输出图像路径、x,y,质量、dpi的x值,dpi的y值;若没有输入某些参数,将按默认值运行
示例:
resize(image_path=image_path, save_path=save_path, x=1920, y=1080, quality=95, dpi_x=72, dpi_y=72)
代码:
import cv2
import os
from PIL import Image
image_path = './image/' # 图像输入路径
save_path = './resize/' # 图像输出路径
# 路径不要有中文
def resize(image_path, save_path, x=256, y=256, quality=95, dpi_x=72.0, dpi_y=72.0):
# 设置图像的输入、输出、需要resize的大小,质量、和dpi值
im_name = os.listdir(image_path)
paths = []
for name in im_name:
path = os.path.join(image_path, name)
paths += [path]
for i, path in enumerate(paths):
im = cv2.imread(path) # 读取图像
print("Extract %s image, %d of %d images" % (im_name[i], i + 1, len(paths)))
im_resize = cv2.resize(im, (x, y)) # resize图像大小
im_dpi = Image.fromarray(cv2.cvtColor(im_resize, cv2.COLOR_BGR2RGB))
im_dpi.save(save_path + im_name[i], quality=quality, dpi=(dpi_x, dpi_y)) # 保存
resize(image_path=image_path, save_path=save_path, x=1920, y=1080, quality=95, dpi_x=72, dpi_y=72)
运行结果,以其中一个图像为例:
程序2:批量图像尺寸调整
功能:使用命令行运行,批量resize图像,并另保存。
函数输入:image_path=image_path, save_path=save_path, x=1920, y=1080, quality=95, dpi_x=72, dpi_y=72
对应:输入图像路径、输出图像路径、x,y,质量、dpi的x值,dpi的y值;若没有输入某些参数,将按默认值运行。
示例:在命令行输入
python image_resize_args.py --image_path ./image/ --save_path ./resize/ -x 1024 -y 720 -quality 95 -dpi_x 72 -dpi_y 72
代码:
import cv2
import os
from PIL import Image
import argparse as ap
# Get the path of the training set
parser = ap.ArgumentParser()
parser.add_argument("--image_path", help='输入图片来源路径', required="True")
parser.add_argument("--save_path", help='输入图片存储路径', required="True")
parser.add_argument("-x", type=int, help='输入图像的长度', default=256, required="True")
parser.add_argument("-y", type=int, default=256, help='输入图像的宽度', required="True")
parser.add_argument("-quality", type=int, default=95, help='输入图像存储质量', required = "True")
parser.add_argument("-dpi_x", type=int, default=72, help='输入图像dpi_x', required="True")
parser.add_argument("-dpi_y", type=int, default=72, help='输入图像dpi_y', required="True")
args = vars(parser.parse_args())
image_path = args["image_path"]
save_path = args["save_path"]
x = args["x"]
y = args["y"]
quality = args["quality"]
dpi_x = args["dpi_x"]
dpi_y = args["dpi_y"]
def resize(image_path, save_path, x=256, y=256, quality=95, dpi_x=72, dpi_y=72):
im_name = os.listdir(image_path)
paths = []
for name in im_name:
path = os.path.join(image_path, name)
paths += [path]
for i, path in enumerate(paths):
im = cv2.imread(path)
print("Extract %s image, %d of %d images" % (im_name[i], i + 1, len(paths)))
im_resize = cv2.resize(im, (x, y))
im_dpi = Image.fromarray(cv2.cvtColor(im_resize, cv2.COLOR_BGR2RGB))
im_dpi.save(save_path + im_name[i], quality=quality, dpi=(dpi_x, dpi_y))
print(image_path, save_path, x, y, quality, dpi_x, dpi_y)
print(type(image_path), type(save_path), type(x), type(y), type(quality), type(dpi_x), type(dpi_y))
resize(image_path=image_path, save_path=save_path, x=x, y=y, quality=quality, dpi_x=dpi_x, dpi_y=dpi_y)
运行结果,以其中一个图像为例:
程序3:批量图像重命名
功能:对文件夹内的图像进行重名,
函数输入:无,只需调整脚本里的path和Newdir = os.path.join(path, str(count).zfill(4) + filetype)中的4这个位置,换成你想要的位数。
代码:
# 按顺序修改图片的名字
# 修改一个文件夹下所有图片的名字,修改成000000.jpg格式
import os
path = './image'
filelist = os.listdir(path)
# filelist.sort()
count = 0
for file in filelist:
print(file)
Olddir = os.path.join(path, file)
if os.path.isdir(Olddir):
continue
filename = os.path.splitext(file)[0]
filetype = os.path.splitext(file)[1]
Newdir = os.path.join(path, str(count).zfill(4) + filetype)
os.rename(Olddir, Newdir)
count += 1
zfill(5)的运行结果
程序4:图像裁剪
功能:将原先图像裁剪成自定义的尺寸
函数输入:res = cut(image_path, save_path, 832, 832),注意修改路径和尺寸
对应:图像路径,裁剪后的保存路径,需要得到的x,需要得到的y
代码:
# -*- coding:utf-8 -*-
from PIL import Image
import os
def cut(image_path, save_path, vx, vy):
count = 0
im_name = os.listdir(image_path)
paths = []
for name in im_name:
path = os.path.join(image_path, name)
paths += [path]
for i, path in enumerate(paths):
name = (path.split('/')[-1]).split('.')[0]
name2 = save_path + name + '_'
im = Image.open(path)
w = im.size[0]
h = im.size[1]
# print(w, h)
# 偏移量
dx = 300
dy = 300
n = 1
# 左上角切割
x1 = 0
y1 = 0
x2 = vx
y2 = vy
# 纵向
while x2 <= h:
while y2 <= w:
name3 = name2 + '%06d' % (n) + ".jpg"
# print(n, x1, y1, x2, y2)
im2 = im.crop((y1, x1, y2, x2))
im2.save(name3)
y1 = y1 + dy
y2 = y1 + vy
n = n + 1
if y2 >= w:
name3 = name2 + '%06d' % (n) + ".jpg"
# print(n, x1, y1, x2, y2)
y1 = w - vy
y2 = w
im2 = im.crop((y1, x1, y2, x2))
im2.save(name3)
# print n, x1, y1, x2, y2
n = n + 1
x1 = x1 + dx
x2 = x1 + vx
y1 = 0
y2 = vy
x1 = h - vx
x2 = h
y1 = 0
y2 = vy
while y2 <= w:
name3 = name2 + '%06d' % (n) + ".jpg"
# print(n, x1, y1, x2, y2)
im2 = im.crop((y1, x1, y2, x2))
im2.save(name3)
y1 = y1 + dy
y2 = y1 + vy
n = n + 1
if y2 >= w:
name3 = name2 + '%06d' % (n) + ".jpg"
# print(n, x1, y1, x2, y2)
y1 = w - vy
y2 = w
im2 = im.crop((y1, x1, y2, x2))
im2.save(name3)
n = n + 1
print(path + "切割成功,切割得到的子图片数为", n - 1, ':', i + 1, '/', len(paths))
count += n
return count
if __name__ == "__main__":
image_path = './image/'
save_path = './image_cut/'
# 切割图片的面积 vx,vy
# 大
res = cut(image_path, save_path, 832, 832)
# 中
# res = cut(id,120,120)
# 小
# res = cut(id,80,80)
print('all sub image:', res)
程序5:图像格式转换
功能:图像的格式有png jpg等,把路径里的png转成jpg或者 JPEG转成jpg等,需要怎么转换,可以自己定义。
注意:dirName是图像存在的路径,newname[-1] == "png"是需要转换的格式(转换之前),newname[-1] = "jpg"是想要转换的格式(转换之后)
下面脚本实现的是把,png转成.jpg格式。
代码:
# 把某种类型的图片改为.jpg格式
import os
import string
dirName = './image/'
li = os.listdir(dirName)
for filename in li:
newname = filename
newname = newname.split(".")
if newname[-1] == "png": # 这里是你图片的原格式的后缀
newname[-1] = "jpg"
newname = str.join(".", newname) # 这里要用str.join
filename = dirName + filename
newname = dirName + newname
os.rename(filename, newname)
print(newname, "updated successfully")
程序6:测试图像尺寸是否是指定大小
功能:测试文件下图像尺寸是否是指定大小,将尺寸不正确的图像名输出
代码:
import os
import cv2
im_path = 'myData/VOC2007/JPEGImages'
x = 1920
y = 1080
ims = os.listdir(im_path)
ims.sort()
for i, name in enumerate(ims):
im = cv2.imread(im_path + '/' + name)
n, m, r = im.shape
if n != y:
print(name)
elif m != x:
print(name)
else:
continue
print('over')
程序7:测试图像的ground truth
功能:测试使用LabelImg打过标签后的标注情况,查看标签是否正确,程序运行后,按空格继续查看。
代码:
# -*- coding: utf-8 -*-
import os
import random
import cv2 as cv
import matplotlib.pyplot as plt
labels = ["TA"]
color_list = [(0, 0, 255), (255, 0, 0), (0, 255, 0), (0, 255, 255)]
img_dir = "myData/VOC2007/JPEGImages"
yolo_txt_dir = "myData/VOC2007/labels"
# result_dst_dir = "/home/youyheng/DJIdata/robomaster_Final_Tournament/check_label_result"
scale_percent = 80
# rates that represent the imgs of all datasets
# 1 for all imgs, 0.5 for half of the imgs
check_rate = 1
random_check = False
def cv_imread(file_path):
img = plt.imread(file_path)
img_rgb = cv.cvtColor(img, cv.COLOR_BGR2RGB)
return img_rgb
def my_line(img, start, end):
thickness = 2
line_type = 8
cv.line(img,
start,
end,
(0, 0, 0),
thickness,
line_type)
# draw rectangle with the data caught in the data file
# And set the name of the label to it
def draw_label_rec(img, label_index, label_info_list, img_name):
global labels
img_height = img.shape[0]
img_width = img.shape[1]
x = float(label_info_list[0])
y = float(label_info_list[1])
w = float(label_info_list[2])
h = float(label_info_list[3])
x_center = x * img_width
y_center = y * img_height
xmax = int(x_center + w * img_width / 2)
xmin = int(x_center - w * img_width / 2)
ymax = int(y_center + w * img_height / 2)
ymin = int(y_center - w * img_height / 2)
# Set font
font = cv.FONT_HERSHEY_SIMPLEX
global color_list
# draw_rectangle
cv.rectangle(img, # img to paint on
(xmin, ymin), # bottom top
(xmax, ymax), # bottom right
color_list[int(label_index)], # bgr color
2) # line thickness
###########need perfection
cv.putText(img, str(img_name), (5, 50), cv.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
def main():
global img_dir, yolo_txt_dir, labels, random_check
origin_window = "Origin Window"
# Load all imgs with label info
img_name_list = os.listdir(img_dir)
img_name_list.sort()
if random_check is True:
random.shuffle(img_name_list)
check_max_times = int(check_rate * len(img_name_list))
for index, img_name in enumerate(img_name_list):
if not img_name.endswith('jpg'):
continue
# Checked for max_times and quit
if index >= check_max_times:
return
print("**check img : {0} **".format(os.path.join(img_dir, img_name)))
# Open IMG
src_image = cv_imread(os.path.join(img_dir, img_name))
# Open yolo label txt
if os.path.exists(os.path.join(yolo_txt_dir, img_name.rpartition(".")[0] + ".txt")):
file_reader = open(os.path.join(yolo_txt_dir, img_name.rpartition(".")[0] + ".txt"), "r")
else:
continue
## Dada loaded ##
if src_image is None:
print("Open image Error")
return
if file_reader is None:
print("Open txt error")
return
# Pre-handling for Img
src_height = src_image.shape[0]
src_width = src_image.shape[1]
# percent of original size
global scale_percent
width = int(src_width * scale_percent / 100)
height = int(src_height * scale_percent / 100)
dim = (width, height)
# Decode the data
while True:
line = file_reader.readline()
if not line:
break
label_info_list = line.split()
# Get 5 nums in labeled_obj_info_list:
# labels[label_info_list[0]] obj type : 0 ArmorBlue, 1 ArmorRed, 2 Base, 3 Watcher
# label_info_list[1] x
# label_info_list[2] y
# label_info_list[3] w
# label_info_list[4] h
label_index = int(label_info_list[0])
x = label_info_list[1]
y = label_info_list[2]
w = label_info_list[3]
h = label_info_list[4]
########################
# need perfection
draw_label_rec(src_image, label_index, [x, y, w, h], img_name)
resized_src = cv.resize(src_image, dim, interpolation=cv.INTER_CUBIC)
# show the result
cv.imshow(origin_window, resized_src)
cv.waitKey(0)
# Debug
# print("src_height = {0}".format(src_height))
# print("src_width = {0}".format(src_width))
cv.destroyAllWindows()
file_reader.close()
print("**check over**")
if __name__ == "__main__":
main()
注意事项
首先要安装需要的库,其次注意在命令行输入时不要多输入、漏输入空格。
这些脚本可以二次开发,只需按照自己的要求修改,就可以实现对其他文件的类似操作
------------------------------------------------------------------------------------------------------------------------------------
2020-6-11更新:增加图像剪切、图像重命名、图像格式转换功能、尺寸测试、xml测试,同时修改标题、引言部分。