使用Python3 快速找出文件夹中所有相同的文件

import glob
import hashlib
from time import perf_counter

def get_file_md5(file):
    md5 = hashlib.md5()
    with open(file,'rb') as fp:
        sc_read = fp.read()
    md5.update(sc_read)
    return md5.hexdigest()

if __name__ == '__main__':
    all_md5 = dict()
    file_dir = r'E:\images\*.png'
    start = perf_counter()
    for file in glob.iglob(file_dir):
        md5 = get_file_md5(file)
        if md5 in all_md5:
            all_md5[md5] += '\n' + file
        else:
            all_md5[md5] =  '\n' * 2 + file
    end = perf_counter()
    print (f"耗时:{end-start}")
    print (len(all_md5))

猜你喜欢

转载自blog.csdn.net/qq523176585/article/details/88184745