python自動化之處理文件持续更新:【内向即失败--王奕君】

def split_file():

#將文件按行分割,我這份文件是260910行數據,所以最後分爲10份

LIMIT=26091
file_count=0
url_list=[]
with open(r'E:\p2pinvestment\nn.txt') as f:
    for line in f:
        url_list.append(line)
        if len(url_list)<LIMIT:
            continue
        file_name=str(file_count)+'.txt'
        with open(file_name,'w') as file:
            for url in url_list[:-1]:
                file.write(url)
            file.write(url_list[-1]. strip())
            url_list=[]
            file_count+=1
if url_list:
    file_name=str(file_count)+'.txt'
    with open(file_name,'w') as file:
        for url in url_list:
            file.write(url)
print('done')

def splittxt_to_twotxt(file: 'str'):

    '''按占比将文本进行分割
    最后将文本分割为one.txt  /two.txt
    '''
    N = 0.5  # 50%
    lines = open(file, 'r').readlines()
    lines_for_b = int(len(lines) * N)
    file_name = os.path.basename(file).split('.')[0]
    open(str(file_name) + '_one.txt', 'w').write(''.join(lines[:lines_for_b]))
    open(str(file_name) + '_two.txt', 'w').write(''.join(lines[lines_for_b:]))


def merge_twotxt(file_one: 'str', file_two: 'str'):
    '''将两个文本进行合并
    '''
    os.system('type {} >> {}'.format(file_one, file_two))  # 将file_onw所有内容写入file_two下面


def write_datetime(path):
    '''在文件的第一行写入日期
    最后要修改文件为按行写
    '''
    current_time = datetime.datetime.now().strftime('%Y-%m-%d')
    with open(path, 'a') as file:
        file.write(current_time.center(100))


def delete_threeday_file(path: 'str'):
    '''清理创建超过三天的文件'''
    current_time = datetime.datetime.now().strftime('%Y-%m-%d')
    for root, dirs, files in os.walk(path):
        for file in files:
            file_path = os.path.join(root, file)
            file_time = get_filetime(file_path)
            d1 = datetime.datetime.strptime(current_time, '%Y-%m-%d')
            d2 = datetime.datetime.strptime(file_time, '%Y-%m-%d')
            day = (d1 - d2).days
            if day > 3 or day == 0:
                os.system('del {}'.format(file_path))


def get_filetime(filename: 'str'):
    '''获取文件创建时间'''
    timestamp = os.path.getmtime(filename)
    timeStruct = time.localtime(timestamp)
    create_time = time.strftime('%Y-%m-%d', timeStruct)
    return create_time


def filetext_todict(filename: 'str', exchange: 'str' = 'key'):
    '''将text文本转换为字典
    猫 cat
    狗 dog
    猪 pig
    RES:{'cat': '猫', 'dog': '狗', 'pig': '猪'} OR {'猪': 'pig', '猫': 'cat', '狗': 'dog'}
    '''
    file_dict = {}
    with open(filename, 'r') as file:
        contents = file.read().splitlines()
        for content in contents:
            data_a, data_b = content.split()
            file_dict[data_a] = data_b
    if exchange == 'value':
        file_dict = {value: key for key, value in file_dict.items()}
    return file_dict


def filetext_tolist(filename: 'str'):
    '''将txt文本返回为列表'''
    with open(filename, 'r') as file:
        file_res = file.read().splitlines()
    return file_res


def get_oneraw(path: 'str', ):
    '''读取文件的一行'''
    with open(path, 'r') as e:
        return e.readline().replace('\n', '')


def get_filesize(path: 'str'):
    '''获取文件的大小字节'''
    file_size = os.stat(path)
    return file_size.st_size


def delete_defineraw(path: 'str', key: 'str'):
    '''自定义删除文件的一行'''
    for line in fileinput.input(path, inplace=1):
        if line.startswith(key):
            line.strip(key)
        else:
            print(line.strip())


def delete_oneraw(path: 'str'):
    '''删除txt文件的一行'''
    for line in fileinput.input(path, inplace=1):
        if not fileinput.isfirstline():
            print(line.replace('\n', ''))


def deal_file(file: 'str', method: 'str', folder: 'str' = None, rename: 'str' = None):
    '''操作文件'''
    try:
        if method == 'remove':
            os.remove(file)  # 删除文件
        elif method == 'move':
            shutil.move(file, folder)  # 剪切文件到另一个文件夹
        elif method == 'copy':
            path = shutil.copy(file, folder)  # 复制文件到另一个文件夹
            if rename:
                name = os.path.split(path)[-1]
                rename_path = path.replace(name, rename + name)
                os.rename(path, rename_path)
                return rename_path
    except Exception as e:
        pass
        # logger.error('[OCCURE FILE ERROR]:%s',traceback.print_exc())
    else:
        pass
        # logger.info('[DEAL FILE SUCCESSE]')


def deal_path(path: 'str' = '.', scope: 'str' = 'behind'):
    '''处理路径
    path='https://www.cnblogs.com/kaituorensheng/archive/2013/03/18/2965766.html'
    scope:behind
    2965766.html
    scope:front
    https://www.cnblogs.com/kaituorensheng/archive/2013/03/18
    '''
    if scope == 'behind':
        return os.path.basename(path)
    elif scope == 'front':
        return os.path.dirname(path)


def display_allfile(path: 'str' = '.'):
    '''获取该路径下的所有目录项
    '''
    os.chdir(path)
    os.system('tree/f')


def read_file(filename: 'str' = r'./root', container: 'list' = ['r', 'txt', 'read'], field: 'str' = None):
    '''读取各类格式文件'''
    if '.' not in filename[-8:]:
        filename = filename + '.' + container[-2]
    if 'json' in container[1]:
        '''读取json文件list'''
        try:
            with open(filename) as folder:
                data = json.load(folder)
                if isinstance(data, list):
                    if bool(field):
                        '''删除一个元素:list'''
                        data.remove(field)
                        raise ConnectionAbortedError
                    '''读取json文件中的一个:list'''
                    if 'r1' in container[0]:
                        return data[0]
                    elif 'r2' in container[0]:
                        '''读所有文件:list'''
                        return data
                        # '''读取json文件dict'''
                elif isinstance(data, dict):
                    if 'del' in container[0]:
                        '''删除一个元素:dict'''
                        del data[field]
                        raise ConnectionAbortedError
                    '''读取json文件中的一个:dict'''
                    if 'r1' in container[0]:
                        return data[field]
                    elif 'r2' in container[0]:
                        '''都所有文件:list'''
                        return data
                return data
        except ConnectionAbortedError:
            '''删除元素,重置json'''
            with open(filename, 'w') as folder:
                json.dump(data, folder, ensure_ascii=False)
                # logger.info('[SETUPED JSON]:%s', filename)
            return filename
    for encoding in ['gb2312', 'gb18030', 'utf8']:
        try:
            with open(filename, encoding=encoding) as file:
                if 'read' not in container[-1]:
                    data = file.readlines()
                else:
                    data = file.read()
        except:
            continue
        else:
            break
    else:
        with open(filename, encoding='gb18030', errors='ignore') as file:
            if 'read' not in container[-1]:
                data = file.readlines()
            else:
                data = file.read()
    return data


def setup_file(filename: 'str' = r'./root', container: 'list' = ['w', 'txt', 'boy and girl'], exists=1):
    '''创建文件,写入文件'''  # 如果==1则找到文件以后会返回,否则的话会继续执行下去
    '''继续改善'''
    filename = PNG_RE.sub('.png', filename)
    if '.' not in filename[-8:]:
        filename = filename + '.' + container[-2]
    if os.path.exists(filename) and exists == 1:
        return filename
    if container[0] == 'c':
        open(filename, "w+").close()
        return filename
    elif 'json' in container:
        if container[0] == 'w':
            """folder创建json文件"""  # 传入字典必须是有序字典 当然列表也无问题
            with open(filename, 'w') as folder:
                json.dump(container[-1], folder, ensure_ascii=False)
                # logger.info('[SETUPED JSON]:%s',filename)
                return filename
    with open(filename, container[0]) as file:
        if container[0] == 'a':
            for i in container[-1]:
                file.write(i + '\n')
            # logger.info('[SETUPED FILE]:%s', filename)
            return filename
        else:
            file.write(container[-1])
            # logger.info('[SETUPED FILE]:%s', filename)
            return filename


def search_files(path: 'str' = None, postfix: 'str' = ''):
    '''查找所有文件夹下,xx后缀的文件路径'''
    files = glob.glob("{}/**/*.{}".format(path, postfix), recursive=True)
    return files


def delete_folder(path: 'str()' = r'./root'):
    '''默认在根路径下删除root文件夹
    注意路径会发生转义,要在前面添加r
        '''
    if os.path.exists(path):
        shutil.rmtree(path)
        # logger.info('[DELETED FILE]: %s',path)
    else:
        pass
        # logger.error('[NOT FOUND FILE ERROR]: %s',path)


def setup_folder(path: 'str' = r'./root'):
    '''默认在根路径下创建root文件夹
    注意路径会发生转义,要在前面添加r
    '''
    path = transform_data.correct_encode(path)
    if not os.path.exists(path):
        os.mkdir(path)
    return path


 

猜你喜欢

转载自blog.csdn.net/qq_37995231/article/details/79279143
今日推荐