def split_file():
#將文件按行分割,我這份文件是260910行數據,所以最後分爲10份
LIMIT=26091
file_count=0
url_list=[]
with open(r'E:\p2pinvestment\nn.txt') as f:
for line in f:
url_list.append(line)
if len(url_list)<LIMIT:
continue
file_name=str(file_count)+'.txt'
with open(file_name,'w') as file:
for url in url_list[:-1]:
file.write(url)
file.write(url_list[-1]. strip())
url_list=[]
file_count+=1
if url_list:
file_name=str(file_count)+'.txt'
with open(file_name,'w') as file:
for url in url_list:
file.write(url)
print('done')
def splittxt_to_twotxt(file: 'str'):
'''按占比将文本进行分割
最后将文本分割为one.txt /two.txt
'''
N = 0.5 # 50%
lines = open(file, 'r').readlines()
lines_for_b = int(len(lines) * N)
file_name = os.path.basename(file).split('.')[0]
open(str(file_name) + '_one.txt', 'w').write(''.join(lines[:lines_for_b]))
open(str(file_name) + '_two.txt', 'w').write(''.join(lines[lines_for_b:]))
def merge_twotxt(file_one: 'str', file_two: 'str'):
'''将两个文本进行合并
'''
os.system('type {} >> {}'.format(file_one, file_two)) # 将file_onw所有内容写入file_two下面
def write_datetime(path):
'''在文件的第一行写入日期
最后要修改文件为按行写
'''
current_time = datetime.datetime.now().strftime('%Y-%m-%d')
with open(path, 'a') as file:
file.write(current_time.center(100))
def delete_threeday_file(path: 'str'):
'''清理创建超过三天的文件'''
current_time = datetime.datetime.now().strftime('%Y-%m-%d')
for root, dirs, files in os.walk(path):
for file in files:
file_path = os.path.join(root, file)
file_time = get_filetime(file_path)
d1 = datetime.datetime.strptime(current_time, '%Y-%m-%d')
d2 = datetime.datetime.strptime(file_time, '%Y-%m-%d')
day = (d1 - d2).days
if day > 3 or day == 0:
os.system('del {}'.format(file_path))
def get_filetime(filename: 'str'):
'''获取文件创建时间'''
timestamp = os.path.getmtime(filename)
timeStruct = time.localtime(timestamp)
create_time = time.strftime('%Y-%m-%d', timeStruct)
return create_time
def filetext_todict(filename: 'str', exchange: 'str' = 'key'):
'''将text文本转换为字典
猫 cat
狗 dog
猪 pig
RES:{'cat': '猫', 'dog': '狗', 'pig': '猪'} OR {'猪': 'pig', '猫': 'cat', '狗': 'dog'}
'''
file_dict = {}
with open(filename, 'r') as file:
contents = file.read().splitlines()
for content in contents:
data_a, data_b = content.split()
file_dict[data_a] = data_b
if exchange == 'value':
file_dict = {value: key for key, value in file_dict.items()}
return file_dict
def filetext_tolist(filename: 'str'):
'''将txt文本返回为列表'''
with open(filename, 'r') as file:
file_res = file.read().splitlines()
return file_res
def get_oneraw(path: 'str', ):
'''读取文件的一行'''
with open(path, 'r') as e:
return e.readline().replace('\n', '')
def get_filesize(path: 'str'):
'''获取文件的大小字节'''
file_size = os.stat(path)
return file_size.st_size
def delete_defineraw(path: 'str', key: 'str'):
'''自定义删除文件的一行'''
for line in fileinput.input(path, inplace=1):
if line.startswith(key):
line.strip(key)
else:
print(line.strip())
def delete_oneraw(path: 'str'):
'''删除txt文件的一行'''
for line in fileinput.input(path, inplace=1):
if not fileinput.isfirstline():
print(line.replace('\n', ''))
def deal_file(file: 'str', method: 'str', folder: 'str' = None, rename: 'str' = None):
'''操作文件'''
try:
if method == 'remove':
os.remove(file) # 删除文件
elif method == 'move':
shutil.move(file, folder) # 剪切文件到另一个文件夹
elif method == 'copy':
path = shutil.copy(file, folder) # 复制文件到另一个文件夹
if rename:
name = os.path.split(path)[-1]
rename_path = path.replace(name, rename + name)
os.rename(path, rename_path)
return rename_path
except Exception as e:
pass
# logger.error('[OCCURE FILE ERROR]:%s',traceback.print_exc())
else:
pass
# logger.info('[DEAL FILE SUCCESSE]')
def deal_path(path: 'str' = '.', scope: 'str' = 'behind'):
'''处理路径
path='https://www.cnblogs.com/kaituorensheng/archive/2013/03/18/2965766.html'
scope:behind
2965766.html
scope:front
https://www.cnblogs.com/kaituorensheng/archive/2013/03/18
'''
if scope == 'behind':
return os.path.basename(path)
elif scope == 'front':
return os.path.dirname(path)
def display_allfile(path: 'str' = '.'):
'''获取该路径下的所有目录项
'''
os.chdir(path)
os.system('tree/f')
def read_file(filename: 'str' = r'./root', container: 'list' = ['r', 'txt', 'read'], field: 'str' = None):
'''读取各类格式文件'''
if '.' not in filename[-8:]:
filename = filename + '.' + container[-2]
if 'json' in container[1]:
'''读取json文件list'''
try:
with open(filename) as folder:
data = json.load(folder)
if isinstance(data, list):
if bool(field):
'''删除一个元素:list'''
data.remove(field)
raise ConnectionAbortedError
'''读取json文件中的一个:list'''
if 'r1' in container[0]:
return data[0]
elif 'r2' in container[0]:
'''读所有文件:list'''
return data
# '''读取json文件dict'''
elif isinstance(data, dict):
if 'del' in container[0]:
'''删除一个元素:dict'''
del data[field]
raise ConnectionAbortedError
'''读取json文件中的一个:dict'''
if 'r1' in container[0]:
return data[field]
elif 'r2' in container[0]:
'''都所有文件:list'''
return data
return data
except ConnectionAbortedError:
'''删除元素,重置json'''
with open(filename, 'w') as folder:
json.dump(data, folder, ensure_ascii=False)
# logger.info('[SETUPED JSON]:%s', filename)
return filename
for encoding in ['gb2312', 'gb18030', 'utf8']:
try:
with open(filename, encoding=encoding) as file:
if 'read' not in container[-1]:
data = file.readlines()
else:
data = file.read()
except:
continue
else:
break
else:
with open(filename, encoding='gb18030', errors='ignore') as file:
if 'read' not in container[-1]:
data = file.readlines()
else:
data = file.read()
return data
def setup_file(filename: 'str' = r'./root', container: 'list' = ['w', 'txt', 'boy and girl'], exists=1):
'''创建文件,写入文件''' # 如果==1则找到文件以后会返回,否则的话会继续执行下去
'''继续改善'''
filename = PNG_RE.sub('.png', filename)
if '.' not in filename[-8:]:
filename = filename + '.' + container[-2]
if os.path.exists(filename) and exists == 1:
return filename
if container[0] == 'c':
open(filename, "w+").close()
return filename
elif 'json' in container:
if container[0] == 'w':
"""folder创建json文件""" # 传入字典必须是有序字典 当然列表也无问题
with open(filename, 'w') as folder:
json.dump(container[-1], folder, ensure_ascii=False)
# logger.info('[SETUPED JSON]:%s',filename)
return filename
with open(filename, container[0]) as file:
if container[0] == 'a':
for i in container[-1]:
file.write(i + '\n')
# logger.info('[SETUPED FILE]:%s', filename)
return filename
else:
file.write(container[-1])
# logger.info('[SETUPED FILE]:%s', filename)
return filename
def search_files(path: 'str' = None, postfix: 'str' = ''):
'''查找所有文件夹下,xx后缀的文件路径'''
files = glob.glob("{}/**/*.{}".format(path, postfix), recursive=True)
return files
def delete_folder(path: 'str()' = r'./root'):
'''默认在根路径下删除root文件夹
注意路径会发生转义,要在前面添加r
'''
if os.path.exists(path):
shutil.rmtree(path)
# logger.info('[DELETED FILE]: %s',path)
else:
pass
# logger.error('[NOT FOUND FILE ERROR]: %s',path)
def setup_folder(path: 'str' = r'./root'):
'''默认在根路径下创建root文件夹
注意路径会发生转义,要在前面添加r
'''
path = transform_data.correct_encode(path)
if not os.path.exists(path):
os.mkdir(path)
return path