Python 大文件查找

新博客链接

前言

因为win10没有安装360,主要是现在360体验不是很好,一不小心就安装了全家桶,何况win10有自带的安全中心。但win10自带的磁盘清理没有查找大文件的功能,这样清理起来有些麻烦,所以干脆自己用python写一个查找大文件的脚本(当然也可以直接用windows的高级搜索功能来查找大文件)

功能

1.支持指定最小文件大小
2.支持指定最大显示数量
3.支持文件名正则表达式筛选

截图

大文件查找

Python 代码

#coding: utf-8
import os
import re
import sys
import time
from os.path import getsize,join,abspath
data=[]
maxShow=100
KB=1024
MB=KB*KB
GB=MB*KB
minSize=100
def sort():#插入排序
    for i in reversed(range(len(data)-1)):
        if data[i+1]>data[i]:
            data[i],data[i+1]=data[i+1],data[i]
        else: break
def search(d,patterns):
    for root,dirs,files in os.walk(d):
        for f in files:
            try:
                p=join(root,f)
                l=getsize(p)
                if l>=minSize*MB:
                    for pattern in patterns:
                        if re.search(pattern,f):
                            data.append((l,p))
                            sort()
                            if not maxShow==-1 and len(data)>maxShow:
                                data.pop()
                            break
            except:
                pass
def printAns():
    global maxShow
    print(len(data))
    n=min(maxShow,len(data))
    if n==-1:
        n=len(data)
    print('The files with size not less than %d MB are listed below, %d in all'%(minSize,n))
    for v in sorted(data,reverse=True):
        l,p=v
        if l<minSize*MB or maxShow==0:
            return
        if l<KB:
            print('%s\t\t\t\t%d B'%(p,l))
        elif l<5*MB:
            print('%s\t\t\t\t%d KB'%(p,l/KB))
        elif l<100*MB:
            print('%s\t\t\t\t%.2f MB'%(p,l/MB))
        else:
            print('%s\t\t\t\t%d MB'%(p,l/MB))
        maxShow-=1
def main():
    global minSize,maxShow
    if len(sys.argv)<4:
        print(
        '''
        Usage:\t\t\tpython **.py directory minSize maxShow [re-patterns]
        minSize\t\t\tint , size in MB
        maxShow\t\t\tint , show the biggest 'maxShow' files with size bigger than 'minSize, -1 means no limitation'
        [re-patterns]\t\tregular rule[s] that the file name should obey , not necessary    

        Example:\t\tpython main.py C:/ 100 10 .mp3$ .mp4$ 
        ''')
        exit(1)
    path=sys.argv[1]
    minSize=int(sys.argv[2])
    maxShow=int(sys.argv[3])
    ps=[]
    for p in sys.argv[4:]:
        ps.append(p)
    if len(ps)==0:
        ps.append('.*')
    search(abspath(path),ps)
    print()
    printAns()
if __name__ == '__main__':
    s=time.clock()
    main()
    print('time used: %.2f s'%(time.clock()-s))

猜你喜欢

转载自blog.csdn.net/HelloMyPeople/article/details/80376979