今天闲来无事,写了个txt合并的小脚本,或许以后在做个有关网络小说的词库。
# -*- coding: utf-8 -*-
"""
Created on Mon Oct 1 11:08:56 2018
@author: 96jie
"""
import os
file_dir = 'XIAOSHUO_txt/'
#指点生成txt文件的大小,不超过100M
SIZE_M = 100
def getfile(file_dir):
num = 0
dir = 'new' + str(num) +'.txt'
new_text = open(dir,'w',encoding='utf-8')
sub_dir = os.listdir(file_dir)
epi = 1
for idx in sub_dir:
sec_dir = file_dir + idx
for obj in os.listdir(sec_dir):
#txt的命名格式是1__XXX.txt
name = obj.split('.')[0].split('__')[1]
print('-------正在写入' + name + '-----')
new_text.write('\n\n' + '第' + str(epi) + '本 ' + name + '\n\n')
epi = epi + 1
_dir = sec_dir + '/' + obj
#注意文本的编码格式,防止txt中有乱码,加入了errors
f = open(_dir,encoding='UTF-8',errors='ignore')
'''
line = f.readline()
while line:
new_text.write(line + '\n')
line = f.readline()
'''
word = f.read()
new_text.write(word + '\n')
f.close()
if os.path.getsize(dir) > SIZE_M * 1048576:
new_text.close()
print('-------------finish'+str(num)+'-------------------' )
num += 1
epi = 1
dir = 'new' + str(num) +'.txt'
new_text = open(dir,'w',encoding='utf-8')
if __name__ == "__main__":
getfile(file_dir)