图像扩充用于图像目标检测

				版权声明:本文为博主原创文章,未经博主允许不得转载。					https://blog.csdn.net/wei_guo_xd/article/details/74199729				</div>
							            <link rel="stylesheet" href="https://csdnimg.cn/release/phoenix/template/css/ck_htmledit_views-d7e2a68c7c.css">
					<div class="htmledit_views" id="content_views">
常用的图像扩充方式有:
水平翻转,裁剪,视角变换,jpeg压缩,尺度变换,颜色变换,旋转
当用于分类数据集时,这些变换方法可以全部被使用,然而考虑到目标检测标注框的变换,我们选择如下几种方式用于目标检测数据集扩充:
jpeg压缩,尺度变换,颜色变换
这里,我们介绍一个图象变换包
这是项目主页,里面介绍了用于图像变换的基本方法,以及如何组合它们可以得到最好的效果,项目主页里同时带python程序。

里面的图像变换程序如下(用于windows下,用于目标检测时,做了一些修改):


  
  
  1. import os, sys, pdb, numpy
  2. from PIL import Image,ImageChops,ImageOps,ImageDraw
  3. #parameters used for the CVPR paper
  4. NCROPS = 10
  5. NHOMO = 8
  6. JPG=[ 70, 50, 30]
  7. ROTS = [ 3, 6, 9, 12, 15]
  8. SCALES=[ 1.5** 0.5, 1.5, 1.5** 1.5, 1.5** 2, 1.5** 2.5]
  9. #parameters computed on ILSVRC10 dataset
  10. lcolor = [ 381688.61379382 , 4881.28307136, 2316.10313483]
  11. pcolor = [[ -0.57848371, -0.7915924, 0.19681989],
  12. [ -0.5795621 , 0.22908373, -0.78206676],
  13. [ -0.57398987 , 0.56648223 , 0.59129816]]
  14. #pre-generated gaussian values
  15. alphas = [[ 0.004894 , 0.153527, -0.012182],
  16. [ -0.058978, 0.114067, -0.061488],
  17. [ 0.002428, -0.003576, -0.125031]]
  18. def gen_colorimetry(i):
  19. p1r = pcolor[ 0][ 0]
  20. p1g = pcolor[ 1][ 0]
  21. p1b = pcolor[ 2][ 0]
  22. p2r = pcolor[ 0][ 1]
  23. p2g = pcolor[ 1][ 1]
  24. p2b = pcolor[ 2][ 1]
  25. p3r = pcolor[ 0][ 2]
  26. p3g = pcolor[ 1][ 2]
  27. p3b = pcolor[ 2][ 2]
  28. l1 = numpy.sqrt(lcolor[ 0])
  29. l2 = numpy.sqrt(lcolor[ 1])
  30. l3 = numpy.sqrt(lcolor[ 2])
  31. if i<= 3:
  32. alpha = alphas[i]
  33. else:
  34. numpy.random.seed(i* 3)
  35. alpha = numpy.random.randn( 3, 0, 0.01)
  36. a1 = alpha[ 0]
  37. a2 = alpha[ 1]
  38. a3 = alpha[ 2]
  39. return (a1*l1*p1r + a2*l2*p2r + a3*l3*p3r,
  40. a1*l1*p1g + a2*l2*p2g + a3*l3*p3g,
  41. a1*l1*p1b + a2*l2*p2b + a3*l3*p3b)
  42. def gen_crop(i,w,h):
  43. numpy.random.seed( 4*i)
  44. x0 = numpy.random.random()*(w/ 4)
  45. y0 = numpy.random.random()*(h/ 4)
  46. x1 = w - numpy.random.random()*(w/ 4)
  47. y1 = h - numpy.random.random()*(h/ 4)
  48. return (int(x0),int(y0),int(x1),int(y1))
  49. def gen_homo(i,w,h):
  50. if i== 0:
  51. return ( 0, 0,int( 0.125*w),h,int( 0.875*w),h,w, 0)
  52. elif i== 1:
  53. return ( 0, 0,int( 0.25*w),h,int( 0.75*w),h,w, 0)
  54. elif i== 2:
  55. return ( 0,int( 0.125*h), 0,int( 0.875*h),w,h,w, 0)
  56. elif i== 3:
  57. return ( 0,int( 0.25*h), 0,int( 0.75*h),w,h,w, 0)
  58. elif i== 4:
  59. return (int( 0.125*w), 0, 0,h,w,h,int( 0.875*w), 0)
  60. elif i== 5:
  61. return (int( 0.25*w), 0, 0,h,w,h,int( 0.75*w), 0)
  62. elif i== 6:
  63. return ( 0, 0, 0,h,w,int( 0.875*h),w,int( 0.125*h))
  64. elif i== 7:
  65. return ( 0, 0, 0,h,w,int( 0.75*h),w,int( 0.25*h))
  66. else:
  67. assert False
  68. def rot(image,angle,fname):
  69. white = Image.new( 'L',image.size, "white")
  70. wr = white.rotate(angle,Image.NEAREST,expand= 0)
  71. im = image.rotate(angle,Image.BILINEAR,expand= 0)
  72. try:
  73. image.paste(im,wr)
  74. except ValueError:
  75. print >>sys.stderr, 'error: image do not match '+fname
  76. return image
  77. def gen_corner(n, w, h):
  78. x0 = 0
  79. x1 = w
  80. y0 = 0
  81. y1 = h
  82. rat = 256 - 227
  83. if n == 0: #center
  84. x0 = (rat*w)/( 2* 256.0)
  85. y0 = (rat*h)/( 2* 256.0)
  86. x1 = w - (rat*w)/( 2* 256.0)
  87. y1 = h - (rat*h)/( 2* 256.0)
  88. elif n == 1:
  89. x0 = (rat*w)/ 256.0
  90. y0 = (rat*h)/ 256.0
  91. elif n == 2:
  92. x1 = w - (rat*w)/ 256.0
  93. y0 = (rat*h)/ 256.0
  94. elif n == 3:
  95. x1 = w - (rat*w)/ 256.0
  96. y1 = h - (rat*h)/ 256.0
  97. else:
  98. assert n== 4
  99. x0 = (rat*w)/ 256.0
  100. y1 = h - (rat*h)/ 256.0
  101. return (int(x0),int(y0),int(x1),int(y1))
  102. #the main fonction to call
  103. #takes a image input path, a transformation and an output path and does the transformation
  104. def gen_trans(imgfile,trans,outfile):
  105. for trans in trans.split( '*'):
  106. image = Image.open(imgfile)
  107. w,h = image.size
  108. if trans== "plain":
  109. image.save(outfile, "JPEG",quality= 100)
  110. elif trans== "flip":
  111. ImageOps.mirror(image).save(outfile, "JPEG",quality= 100)
  112. elif trans.startswith( "crop"):
  113. c = int(trans[ 4:])
  114. image.crop(gen_crop(c,w,h)).save(outfile, "JPEG",quality= 100)
  115. elif trans.startswith( "homo"):
  116. c = int(trans[ 4:])
  117. image.transform((w,h),Image.QUAD,
  118. gen_homo(c,w,h),
  119. Image.BILINEAR).save(outfile, "JPEG",quality= 100)
  120. elif trans.startswith( "jpg"):
  121. image.save(outfile,quality=int(trans[ 3:]))
  122. elif trans.startswith( "scale"):
  123. scale = SCALES[int(trans.replace( "scale", ""))]
  124. image.resize((int(w/scale),int(h/scale)),Image.BILINEAR).save(outfile, "JPEG",quality= 100)
  125. elif trans.startswith( 'color'):
  126. (dr,dg,db) = gen_colorimetry(int(trans[ 5]))
  127. table = numpy.tile(numpy.arange( 256),( 3))
  128. table[ : 256]+= (int)(dr)
  129. table[ 256: 512]+= (int)(dg)
  130. table[ 512: ]+= (int)(db)
  131. image.convert( "RGB").point(table).save(outfile, "JPEG",quality= 100)
  132. elif trans.startswith( 'rot-'):
  133. angle =int(trans[ 4:])
  134. for i in range(angle):
  135. image = rot(image, -1,outfile)
  136. image.save(outfile, "JPEG",quality= 100)
  137. elif trans.startswith( 'rot'):
  138. angle =int(trans[ 3:])
  139. for i in range(angle):
  140. image = rot(image, 1,outfile)
  141. image.save(outfile, "JPEG",quality= 100)
  142. elif trans.startswith( 'corner'):
  143. i = int(trans[ 6:])
  144. image.crop(gen_corner(i,w,h)).save(outfile, "JPEG",quality= 100)
  145. else:
  146. assert False, "Unrecognized transformation: "+trans
  147. imgfile = outfile # in case we iterate
  148. #Our 41 transformations used in the CVPR paper
  149. def get_all_trans():
  150. # transformations = (["plain","flip"]
  151. # # +["crop%d"%i for i in range(NCROPS)]
  152. # # +["homo%d"%i for i in range(NHOMO)]
  153. # +["jpg%d"%i for i in JPG]
  154. # +["scale0","scale1","scale2","scale3","scale4"]
  155. # +["color%d"%i for i in range(3)]
  156. # # +["rot-%d"%i for i in ROTS]
  157. # # +["rot%d"%i for i in ROTS]
  158. # )+["scale0","scale1","scale2","scale3","scale4"]
  159. transformations=([ "plain"]
  160. + [ "jpg%d" % i for i in JPG]
  161. + [ "scale0", "scale1", "scale2", "scale3", "scale4"]
  162. + [ "color%d" % i for i in range( 3)])
  163. return transformations
  164. #transformations used at test time in deep architectures
  165. def get_deep_trans():
  166. return [ 'corner0', 'corner1', 'corner2', 'corner3', 'corner4', 'corner0*flip', 'corner1*flip', 'corner2*flip', 'corner3*flip', 'corner4*flip']
  167. if __name__== "__main__":
  168. inputpath = sys.argv[ 1]
  169. name = [name for name in os.listdir(inputpath) if os.path.isfile(os.path.join(inputpath,name))]
  170. #img_input = sys.argv[1]
  171. outpath = sys.argv[ 2]
  172. if len(sys.argv)>= 4:
  173. trans = sys.argv[ 3]
  174. if not trans.startswith( "["):
  175. trans = [trans]
  176. else:
  177. trans = eval(trans)
  178. else:
  179. trans = get_all_trans()
  180. print "Generating transformations and storing in %s"%(outpath)
  181. for k in name:
  182. for t in trans:
  183. img_input=inputpath+ '\\'+k
  184. gen_trans(img_input,t,outpath+ '\\%s_%s.jpg'%( ".".join(img_input.split( "\\")[ -1].split( ".")[: -1]),t))
  185. #gen_trans(k, t, outpath + '\\%s_%s.jpg' % (".".join(k.split(".")[:-1]), t))
  186. print "Finished. Transformations generated: %s"%( " ".join(trans))
这是变换前的图片:1._7_17.jpg

变换后的图片如下:
1_7_17_color0.jpg

1_7_17_color1.jpg

1_7_17_color2.jpg

1_7_17_jpg30.jpg

1_7_17_jog50.jpg

1_7_17_jpg70.jpg

1_7_17_scale0.jpg

1_7_17_scale1.jpg

1_7_17_scale2.jpg

1_7_17_scale3.jpg

1_7_17_scale4.jpg

用于目标检测时:XML标注文件也需要做相应修改,主要是针对尺度变换:
修改前的xml文件如下(1_7_17.jpg):

   
   
  1. <annotation>
  2. <folder>spl </folder>
  3. <filename>1_7_17.jpg </filename>
  4. <source>
  5. <database>The spl Database </database>
  6. <annotation>The spl Database </annotation>
  7. <image>spl </image>
  8. <flickrid>0 </flickrid>
  9. </source>
  10. <owner>
  11. <flickrid>spl </flickrid>
  12. <name>xiaovv </name>
  13. </owner>
  14. <size>
  15. <width>800 </width>
  16. <height>800 </height>
  17. <depth>3 </depth>
  18. </size>
  19. <segmented>0 </segmented>
  20. <object>
  21. <name>aeroplane </name>
  22. <pose>Unspecified </pose>
  23. <truncated>0 </truncated>
  24. <difficult>0 </difficult>
  25. <bndbox>
  26. <xmin>151 </xmin>
  27. <ymin>357 </ymin>
  28. <xmax>212 </xmax>
  29. <ymax>399 </ymax>
  30. </bndbox>
  31. </object>
  32. <object>
  33. <name>aeroplane </name>
  34. <pose>Unspecified </pose>
  35. <truncated>0 </truncated>
  36. <difficult>0 </difficult>
  37. <bndbox>
  38. <xmin>134 </xmin>
  39. <ymin>593 </ymin>
  40. <xmax>193 </xmax>
  41. <ymax>654 </ymax>
  42. </bndbox>
  43. </object>
  44. </annotation>
修改后的xml文件如下(1_7_17_scale4.jpg):

   
   
  1. <?xml version="1.0" encoding="utf-8"?> <annotation>
  2. <folder>spl </folder>
  3. <filename>1_7_17_scale4.jpg </filename>
  4. <source>
  5. <database>The spl Database </database>
  6. <annotation>The spl Database </annotation>
  7. <image>spl </image>
  8. <flickrid>0 </flickrid>
  9. </source>
  10. <owner>
  11. <flickrid>spl </flickrid>
  12. <name>xiaovv </name>
  13. </owner>
  14. <size>
  15. <width>290 </width>
  16. <height>290 </height>
  17. <depth>3 </depth>
  18. </size>
  19. <segmented>0 </segmented>
  20. <object>
  21. <name>aeroplane </name>
  22. <pose>Unspecified </pose>
  23. <truncated>0 </truncated>
  24. <difficult>0 </difficult>
  25. <bndbox>
  26. <xmin>54 </xmin>
  27. <ymin>129 </ymin>
  28. <xmax>76 </xmax>
  29. <ymax>144 </ymax>
  30. </bndbox>
  31. </object>
  32. <object>
  33. <name>aeroplane </name>
  34. <pose>Unspecified </pose>
  35. <truncated>0 </truncated>
  36. <difficult>0 </difficult>
  37. <bndbox>
  38. <xmin>48 </xmin>
  39. <ymin>215 </ymin>
  40. <xmax>70 </xmax>
  41. <ymax>237 </ymax>
  42. </bndbox>
  43. </object>
  44. </annotation>
修改xml文件的程序如下;

   
   
  1. # -*- coding=utf-8 -*-
  2. import os
  3. import sys
  4. import shutil
  5. from xml.dom.minidom import Document
  6. from xml.etree.ElementTree import ElementTree,Element
  7. import xml.dom.minidom
  8. JPG=[ 70, 50, 30]
  9. SCALES=[ 1.5** 0.5, 1.5, 1.5** 1.5, 1.5** 2, 1.5** 2.5]
  10. #产生变换后的xml文件
  11. def gen_xml(xml_input,trans,outfile):
  12. for trans in trans.split( '*'):
  13. if trans== "plain" or trans.startswith( "jpg") or trans.startswith( 'color'): #如果是这几种变换,直接修改xml文件名就好
  14. dom = xml.dom.minidom.parse(xml_input)
  15. root = dom.documentElement
  16. filenamelist = root.getElementsByTagName( 'filename')
  17. filename = filenamelist[ 0]
  18. c = str(filename.firstChild.data)
  19. d = ".".join(outfile.split( "\\")[ -1].split( ".")[: -1]) + '.jpg'
  20. filename.firstChild.data = d
  21. f = open(outfile, 'w')
  22. dom.writexml(f, encoding= 'utf-8')
  23. elif trans.startswith( "scale"): #对于尺度变换,xml文件信息也需要改变
  24. scale = SCALES[int(trans.replace( "scale", ""))]
  25. dom=xml.dom.minidom.parse(xml_input)
  26. root=dom.documentElement
  27. filenamelist=root.getElementsByTagName( 'filename')
  28. filename=filenamelist[ 0]
  29. c=str(filename.firstChild.data)
  30. d= ".".join(outfile.split( "\\")[ -1].split( ".")[: -1])+ '.jpg'
  31. filename.firstChild.data=d
  32. heightlist = root.getElementsByTagName( 'height')
  33. height = heightlist[ 0]
  34. a = int(height.firstChild.data)
  35. b = str(int(a / scale))
  36. height.firstChild.data = b
  37. widthlist=root.getElementsByTagName( 'width')
  38. width=widthlist[ 0]
  39. a = int(width.firstChild.data)
  40. b = str(int(a / scale))
  41. width.firstChild.data=b
  42. objectlist=root.getElementsByTagName( 'xmin')
  43. for object in objectlist:
  44. a=int(object.firstChild.data)
  45. b=str(int(a/scale))
  46. object.firstChild.data=b
  47. objectlist = root.getElementsByTagName( 'ymin')
  48. for object in objectlist:
  49. a = int(object.firstChild.data)
  50. b = str(int(a / scale))
  51. object.firstChild.data = b
  52. objectlist = root.getElementsByTagName( 'xmax')
  53. for object in objectlist:
  54. a = int(object.firstChild.data)
  55. b = str(int(a / scale))
  56. object.firstChild.data = b
  57. objectlist = root.getElementsByTagName( 'ymax')
  58. for object in objectlist:
  59. a = int(object.firstChild.data)
  60. b = str(int(a / scale))
  61. object.firstChild.data = b
  62. f=open(outfile, 'w')
  63. dom.writexml(f,encoding= 'utf-8')
  64. else:
  65. assert False, "Unrecognized transformation: "+trans
  66. #产生各种变换名
  67. def get_all_trans():
  68. transformations=([ "plain"]
  69. + [ "jpg%d" % i for i in JPG]
  70. + [ "scale0", "scale1", "scale2", "scale3", "scale4"]
  71. + [ "color%d" % i for i in range( 3)])
  72. return transformations
  73. if __name__== "__main__":
  74. inputpath = sys.argv[ 1]
  75. name = [name for name in os.listdir(inputpath) if os.path.isfile(os.path.join(inputpath,name))]
  76. outpath = sys.argv[ 2]
  77. if len(sys.argv)>= 4:
  78. trans = sys.argv[ 3]
  79. if not trans.startswith( "["):
  80. trans = [trans]
  81. else:
  82. trans = eval(trans)
  83. else:
  84. trans = get_all_trans()
  85. print "Generating transformations and storing in %s"%(outpath)
  86. for k in name:
  87. for t in trans:
  88. xml_input=inputpath+ '\\'+k
  89. gen_xml(xml_input,t,outpath+ '\\%s_%s.xml'%( ".".join(xml_input.split( "\\")[ -1].split( ".")[: -1]),t))





				版权声明:本文为博主原创文章,未经博主允许不得转载。					https://blog.csdn.net/wei_guo_xd/article/details/74199729				</div>
							            <link rel="stylesheet" href="https://csdnimg.cn/release/phoenix/template/css/ck_htmledit_views-d7e2a68c7c.css">
					<div class="htmledit_views" id="content_views">

猜你喜欢

转载自blog.csdn.net/qq_31511955/article/details/85322777