MS-Celeb-1M数据集
MS-Celeb-1M | Clean | 100,000 | 5,084,127 | - | - | Google Drive |
MS-Celeb-1M | Align_112x112 | 85,742 | 5,822,653 | - | - | Google Drive |
解压数据集:还未测试
import base64
import csv
import os
filename = "K:\celib\dataset\MsCelebV1-Faces-Aligned.part.04.tsv"
outputDir = "K:\celib"
with open(filename, 'r') as tsvF:
reader = csv.reader(tsvF, delimiter='\t')
i = 0
for row in reader:
MID, imgSearchRank, faceID, data = row[0], row[1], row[4], base64.b64decode(row[-1])
saveDir = os.path.join(outputDir, MID)
savePath = os.path.join(saveDir, "{}-{}.jpg".format(imgSearchRank, faceID))
if not os.path.exists(saveDir):
os.mkdir(saveDir)
with open(savePath, 'wb') as f:
f.write(data)
i += 1
if i % 1000 == 0:
print("Extracted {} images.".format(i))