txt合并

今天闲来无事,写了个txt合并的小脚本,或许以后在做个有关网络小说的词库。

# -*- coding: utf-8 -*-
"""
Created on Mon Oct  1 11:08:56 2018

@author: 96jie
"""

import os

file_dir = 'XIAOSHUO_txt/'
#指点生成txt文件的大小,不超过100M
SIZE_M = 100

def getfile(file_dir):
    num = 0
    dir = 'new' + str(num) +'.txt'
    new_text = open(dir,'w',encoding='utf-8')
    sub_dir = os.listdir(file_dir)
    epi = 1    
    for idx in sub_dir:
        sec_dir = file_dir + idx       
        for obj in os.listdir(sec_dir):
            #txt的命名格式是1__XXX.txt
            name = obj.split('.')[0].split('__')[1]
            print('-------正在写入' + name + '-----')
            new_text.write('\n\n' + '第' + str(epi) + '本  ' + name + '\n\n')
            epi = epi + 1
            _dir = sec_dir + '/' + obj
            #注意文本的编码格式,防止txt中有乱码,加入了errors
            f = open(_dir,encoding='UTF-8',errors='ignore')
            '''
            line = f.readline()
            while line:
              new_text.write(line + '\n')
              line = f.readline()
            '''
            word = f.read()
            new_text.write(word + '\n')
            f.close()
            if os.path.getsize(dir) > SIZE_M * 1048576:
                new_text.close()
                print('-------------finish'+str(num)+'-------------------' )
                num += 1
                epi = 1
                dir = 'new' + str(num) +'.txt'
                new_text = open(dir,'w',encoding='utf-8')
if __name__ == "__main__":
    
    getfile(file_dir)
    
                 
               
            
           
            




            

猜你喜欢

转载自blog.csdn.net/i96jie/article/details/82917839
txt