python处理大数据文件,set、enumerate使用,python传参,获取文件路径

 
 
###   enumerate(ad_set,start=1)
###   start=1:指定索引起始值
#-*- coding:utf-8 -*-
#ad转家庭、政企ad
#编写者:zhangqm
#日期:20170810
#功能:分别找出家庭,政企用户,家庭的ad转jzad,政企ad重新编号且相同ad编号相同
######################################################################
# 备注:
#       File_target_zhengq_temp(临时文件)是处理的文件中的政企数据
#
# 调用方式:python Jzad_jiat_zhengq.py /data/u_lx_data/zhangqm/处理的文件.txt
#
# 注意:
#      此程序只适用于ad在File_deal文件的第一列,如果不在第一列,请酌情修改程序
####################################################################### 

from datetime import datetime
from sys import argv
import os

#配置文件
File_source = "/data/u_lx_data/tangran/Jz/JZ_ad_lzy.txt"

#处理文件所在的目录
dir = os.getcwd()+"/"

#要处理的文件
File_deal = argv[1]

if File_deal.find("/") != -1:
        File_deal = argv[1].split("/")[-1]

#临时文件即处理文件里的政企数据
File_target_zhengq_temp = dir+"政企原始数据.txt"

#政企结果文件
File_target_zhengq = dir+"zq_"+File_deal

#家庭结果文件
File_target_jiating = dir+"jt_"+File_deal

print ("程序已开始执行!!!")
print (datetime.now().strftime('%Y-%m-%d %H:%M:%S'))

dist_source = {}
list = []
ad_set = set()
dist2 = {}

def Read_File_source():
        with open(File_source, "r") as ff:
                for line in ff:
                        line = line.strip().split("\t")
                        dist_source[line[0]] = line[1]

def Main_zhengqi():
        with open(File_target_zhengq_temp, "w") as f_write:
                with open(File_deal, "r") as ff:
                        for line in ff:
                                line = line.strip().split("\t")
                                if line[0] not in dist_source:
                                        f_write.write("\t".join(line) + "\n")
                                        ad_set.add(line[0])

        with open(File_target_zhengq_temp,"r") as ff:
                for line in ff:
                        list.append(line)

        with open(File_target_zhengq, "w") as f_write:
                for num,ad in enumerate(ad_set,start=1):
                        dist2[ad] = num
                for line in list:
                        line = line.strip().split("\t")
                        if line[0] in dist2:
                                f_write.write(str(dist2[line[0]])+"\t"+"\t".join(line[1:])+"\n")
        print("政企已完成!!!")
        print(datetime.now().strftime('%Y-%m-%d %H:%M:%S'))

def Main_jiating():
        with open(File_target_jiating, "w") as f_write:
                with open(File_deal, "r") as ff:
                        for line in ff:
                                line = line.strip().split("\t")
                                if line[0] not in dist_source:
                                        continue
                                line[0] = dist_source[line[0]]
                                f_write.write("\t".join(line) + "\n")
        print("家庭已完成!!!")
        print(datetime.now().strftime('%Y-%m-%d %H:%M:%S'))

if __name__ == "__main__":
        Read_File_source()
        Main_jiating()
        Main_zhengqi()
        print("程序已完成!!!")
        print(datetime.now().strftime('%Y-%m-%d %H:%M:%S'))

版权声明:原创文章,未经允许不得转载,谢谢。

猜你喜欢

转载自blog.csdn.net/iboyman/article/details/79443150
今日推荐