(ILSVRC2012)imagenet2012数据集下载与处理

1.训练集的数据亮已经足够了,所以我只下载了训练集,下载地址:

 
2.数据集中图片类别是用wordnet编码进行命名的,wordnet编码与实际的语义映射,参考以下教程:
 
3.对下载好的数据集图片进行缩小,划分训练/验证/测试集
import os
import glob
from PIL import Image
import random

#create a dir for save uncompress files
uncompress_path = 'imagenet2012'
os.system('mkdir '+uncompress_path)

#uncompress all_tars
all_tars = glob.glob('*.tar')
for tar_file in all_tars:
    #uncompress
    print('uncompress '+tar_file+' ...')
    s1, _ = tar_file.split('.')
    os.system('mkdir '+uncompress_path+'/'+s1)
    os.system('tar -xf '+tar_file+' -C '+uncompress_path+'/'+s1)
    #resize images
    all_images = glob.glob(uncompress_path+'/'+s1+'/*')
    for image_file in all_images:
        im = Image.open(image_file)
        im = im.resize((84, 84), resample=Image.LANCZOS)
        #image_file rename
        #TODO:
        im.save(image_file)

#put in correct directory
all_classes = glob.glob(uncompress_path+'/*')
all_classes_num = len(all_classes)
trian_classes_num = int(all_classes_num*0.64)
val_classes_num = int(all_classes_num*0.16)
test_classes_num = all_classes_num - trian_classes_num - val_classes_num
#trian_directory fill
os.system('mkdir '+uncompress_path+'/train')
trian_classes = random.sample(all_classes,trian_classes_num)
for train_class in trian_classes:
    print('mv '+train_class+' to trian_directory...')
    os.system('mv '+train_class+' '+uncompress_path+'/train')
    all_classes.remove(train_class)
#val_directory fill
os.system('mkdir '+uncompress_path+'/val')
val_classes = random.sample(all_classes,val_classes_num)
for val_class in val_classes:
    print('mv '+val_class+' to val_directory...')
    os.system('mv '+val_class+' '+uncompress_path+'/val')
    all_classes.remove(val_class)
#test_directory fill
os.system('mkdir '+uncompress_path+'/test')
test_classes = random.sample(all_classes,test_classes_num)
for test_class in test_classes:
    print('mv '+test_class+' to test_directory...')
    os.system('mv '+test_class+' '+uncompress_path+'/test')
    all_classes.remove(test_class)

#compress result
print('compress result...')
os.system('zip -r '+uncompress_path+'.zip '+uncompress_path)

#success
print('proc success!!!')
 

猜你喜欢

转载自www.cnblogs.com/hiram-zhang/p/10107632.html