python----使用re正则表达式刷选数据,去重,列表,取特定行数据(适用于web的html回包数据提取)

python—-使用re正则表达式刷选数据,去重,列表,取特定行数据(适用于web的html回包数据提取)

环境配置:对目标服务器的日志文件进行刷选特定数据(192.168.4.27)
/usr/local/tomcat_corp/logs/catalina.out

python脚本必须在该服务器上运行

1、筛选银行卡字段bankCode=

python代码:
[root@cdn tmp]# ls
findbankid_back_before.py  findbankid.py  findemail.py  findidno.py  findmobile.py  findreadlname.py
[root@cdn tmp]# 

[root@cdn tmp]# cat findbankid_back_before.py 
#!/usr/nbin/python  
# --*-- coding:utf-8 --*--  
import re  

lastlist = []
logyzm = open("/usr/local/tomcat_corp/logs/catalina.out").read()  
#print logyzm  
temp = logyzm.decode("utf8")  
findword = r'.{75}bankCode=.{100}'#取该字符串前75个字符以及其后面100个字符数据
pattern = re.compile(findword)  
results = re.findall(pattern,temp)  
for result in results:  
    #print result
    lastlist.append(result)

list = set(lastlist)#对重复数据进行去重处理
for l in list:
    print l
[root@cdn tmp]
脚本运行情况:
[root@cdn tmp]# python findbankid_back_before.py 
..............................
.............................
bjectDTO [t=[com.dinpay.dpp.domain.system.config.BankGateway@*****[id=3,bankCode=CCB,bankAccount=62148502********,rate=0.0,name=建设银行,status=1,remark=<null>,defaultFlag=0,maxLimitAmo
uency=0], com.dinpay.dpp.domain.system.config.BankGateway@*****[id=1002,bankCode=SPABANK,bankAccount=01120004********,rate=0.0,name=深圳平安银企直连代付,status=1,remark=<null>,defaultFlag=0,
tDTO [t=[com.dinpay.dpp.domain.system.config.PayChannel@*****[id=<null>,bankCode=GDB,chargeType=<null>,rate=<null>,dinpayRate=<null>,name=广东发展银行,status=<null>,remark=<null>,remark2=

2、筛选email邮箱地址

python代码:
[root@cdn tmp]# cat findemail.py 
#!/usr/nbin/python  
# --*-- coding:utf-8 --*--  
import re  

lastlist = []
logyzm = open("/usr/local/tomcat_corp/logs/catalina.out").read()  
#print logyzm  
temp = logyzm.decode("utf8")  
findword = r'.{100}bindEmail.{90}'#取该字符串前100个字符以及其后面90个字符数据
pattern = re.compile(findword)  
results = re.findall(pattern,temp)  
for result in results:  
    #print result
    lastlist.append(result)

list = set(lastlist)#去重
for l in list:
    print l
[root@cdn tmp]#
代码运行情况:
[root@cdn tmp]# python findemail.py 
anageController toFindPayPwdByCard memberObjectResponse:MemberDetailResponse [memberId=137****1580, bindEmail=null, bindMobile=137*****1580, companyName=李*, certificationType=1, createDate=Tue Dec 19
ankCardController toBankCardManage memberObjectResponse:MemberDetailResponse [memberId=186****3214, bindEmail=null, bindMobile=186*****3214, companyName=聂*平, certificationType=1, createDate=Thu May 0
eController toAccountManage memberObjectResponse:MemberDetailResponse [memberId=*****@163.com, bindEmail=ssh*****.com, bindMobile=137*****4764, companyName=沈*, certificationType=1, createDate=Tu

3、筛选身份证号码

python代码:
[root@cdn tmp]# cat findidno.py 
#!/usr/nbin/python  
# --*-- coding:utf-8 --*--  
import re  

lastlist = []
logyzm = open("/usr/local/tomcat_corp/logs/catalina.out").read()  
#print logyzm  
temp = logyzm.decode("utf8")  
findword = r'.{100}certNum.{20}'#取该字符串前100个字符以及其后面20个字符数据
pattern = re.compile(findword)  
results = re.findall(pattern,temp)  
for result in results:  
    #print result
    lastlist.append(result)

list = set(lastlist)#去重
for l in list:
    print l
[root@cdn tmp]#
代码运行情况:
[root@cdn tmp]# python findidno.py 
l, address=null, supportBalance=1, bankCode=CCB, auditStatus=null, authStatus=null, isEnterprise=1, certNum=4*****************3
l, address=null, supportBalance=1, bankCode=ABC, auditStatus=null, authStatus=null, isEnterprise=1, certNum=4****************2]
l, address=null, supportBalance=1, bankCode=ABC, auditStatus=null, authStatus=null, isEnterprise=1, certNum=4*****************3
, address=null, supportBalance=1, bankCode=ICBC, auditStatus=null, authStatus=null, isEnterprise=0, certNum=4******************
l, address=null, supportBalance=1, bankCode=CMB, auditStatus=null, authStatus=null, isEnterprise=1, certNum=4****************X]
, address=null, supportBalance=1, bankCode=ICBC, auditStatus=null, authStatus=null, isEnterprise=1, certNum=4****************X]

4、筛选手机号码

python代码;
[root@cdn tmp]# cat  findmobile.py 
#!/usr/nbin/python  
# --*-- coding:utf-8 --*--  
import re  

lastlist = []
logyzm = open("/usr/local/tomcat_corp/logs/catalina.out").read()  
#print logyzm  
temp = logyzm.decode("utf8")  
findword = r'.{100}bindMobile.{65}'#取该字符串前100个字符以及其后面65个字符数据
pattern = re.compile(findword)  
results = re.findall(pattern,temp)  
for result in results:  
    #print result
    lastlist.append(result)

list = set(lastlist)
for l in list:
    print l
[root@cdn tmp]# 
代码运行情况:
[root@cdn tmp]# python findmobile.py 
oller setMemberExtInfo:MemberDetailResponse [memberId=*****@163.com, bindEmail=464*****.com, bindMobile=null, companyName=聂*平, certificationType=1, createDate=Thu Jun 2
er toAccountManage memberObjectResponse:MemberDetailResponse [memberId=131****8888, bindEmail=null, bindMobile=131*****8888, companyName=陈*荣2, certificationType=1, createDate=
-MemberLoginController setMemberExtInfo:MemberDetailResponse [memberId=131****8888, bindEmail=null, bindMobile=861*****1066, companyName=陈*荣, certificationType=1, createDate=S
-MemberLoginController setMemberExtInfo:MemberDetailResponse [memberId=131****8888, bindEmail=null, bindMobile=153*****6761, companyName=陈*荣, certificationType=0, createDate=S

5、筛选姓名

python代码:
[root@cdn tmp]# cat findreadlname.py 
#!/usr/nbin/python  
# --*-- coding:utf-8 --*--  
import re  

lastlist = []
logyzm = open("/usr/local/tomcat_corp/logs/catalina.out").read()  
#print logyzm  
temp = logyzm.decode("utf8")  
findword = r'.{100}realName=.{90}'
pattern = re.compile(findword)  
results = re.findall(pattern,temp)  
for result in results:  
    #print result
    lastlist.append(result)

list = set(lastlist)
for l in list:
    print l
[root@cdn tmp]# 
代码运行情况:

[root@cdn tmp]# python findreadlname.py 
,rgeRecordVO [rechargeDateStr=2017-11-20 16:35:18, dealDateStr=2017-11-20 16:35:18, transferType=充值, realName=陈*荣, memberId=q******[email protected], getSerialno()=21686, getAccountId()=35700*****, getRechar
rgeRecordVO [rechargeDateStr=2018-01-17 11:53:41, dealDateStr=2018-01-17 11:53:41, transferType=充值, realName=聂*平, memberId=j**********[email protected], getSerialno()=22012, getAccountId()=25800*****, getRec
rgeRecordVO [rechargeDateStr=2018-04-23 15:39:57, dealDateStr=2018-04-23 15:39:57, transferType=充值, realName=徐*波, memberId=b***********[email protected], getSerialno()=22191, getAccountId()=10000000*****, 
rgeRecordVO [rechargeDateStr=2017-04-26 16:54:14, dealDateStr=2017-04-26 16:54:14, transferType=充值, realName=田*君, memberId=b******[email protected], getSerialno()=19996, getAccountId()=10100*****, getRecharg
rgeRecordVO [rechargeDateStr=2017-11-17 09:39:10, dealDateStr=2017-11-17 09:39:10, transferType=充值, realName=深*店, memberId=5*******[email protected], getSerialno()=21616, getAccountId()=10000000*****, getRec
ordVO [rechargeDateStr=2017-09-19 17:15:32, dealDateStr=2017-09-19 17:15:32, transferType=Recharge, realName=聂*平, memberId=j**********[email protected], getSerialno()=21239, getAccountId()=100000000*****, g
ordVO [rechargeDateStr=2017-11-20 16:17:49, dealDateStr=2017-11-20 16:17:49, transferType=Recharge, realName=深*店, memberId=q******[email protected], getSerialno()=21683, getAccountId()=35700*****, getRechar

猜你喜欢

转载自blog.csdn.net/xwbk12/article/details/80696334