python --日志分析

import datetime
import re
#def convert_time(timestr):
#    return datetime.datetime.strptime(timestr, "%d/%b/%Y:%H:%M:%S %z")

#def convert_request(request:str):
#    return dict(zip(('method','url','protocol'),request.split()))

def extract(line):
    #pattern = '''(?P<http_cdn_src_ip>[\d\.]{7,}) - - \[(?P<time_local>[^\[\]])\] "(?P<request>[^"])" (?P<status>\d+) (?P<body_bytes_sent>\d+) "-" "(?P<http_user_agent>[^"])" - [\d\.]{7,} \d.\d+ [\d\.]{7,}:\d+ \d.\d+'''
    #regex = re.compile(pattern)
    p='''(?P<http_cdn_src_ip>[\d\.]{7,}) - - \[(?P<time_local>[^\[\]]+)\] "(?P<request>[^"]+)" (?P<status>\d+) (?P<body_bytes_sent>\d+) "-" "(?P<http_user_agent>[^"]+)"'''
    mat=re.match(p,line)
    #matcher=regex.search(line)
    if mat is not None:
      d = mat.groupdict()
      print(d)
    else:
      print("re.search() returned None")
    return d

names = ['http_cdn_src_ip','-','remote_user','time_local','request',
        'status','body_bytes_sent','http_referer',
        'http_user_agent',
        'http_x_forwarded_for', 'remote_addr',
        'request_time','upstream_addr','upstream_response_time']

ops={'time_local':lambda timestr:datetime.datetime.strptime(timestr, "%d/%b/%Y:%H:%M:%S %z"),
     'request':lambda request:dict(zip(('method','url','protocol'),request.split())),
     'status':int,
     'body_bytes_sent':int,
}


log_data='''1.24.17.6 - - [07/Dec/2017:15:55:07 +0800] "GET /tch/ApchReprt/getAllon HTTP/1.1" 200 113 "-" "Mozilla/5.0 (Linux; Android 6.0.1; OPPO R9s Build/MMB29M; wv)\
AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/53.0.2785.49 Mobile MQQBrowser/6.2 TBS/043015 \
Safari/537.36 V1_AND_SQ_6.6.9_482_YYB_D QQ/6.6.9.3060 NetType/4G WebP/0.3.0 Pixel/1080" - 68.20.3.21 0.058 127.0.0.1:1111 0.014''' d={} #dd={k:ops.get(k, lambda x:x)(v) for k,v in extract(log_data).items()} for k,v in extract(log_data).items(): if not ops.get(k): d[k]=v else: d[k] = ops.get(k)(v) #try: # d[k] = ops.get(k)(v) #except: # d[k]=v print(d)

  

猜你喜欢

转载自www.cnblogs.com/kuku0223/p/12177083.html