基上结构图写个python脚本来分析binlog日志:
#_*_ coding:utf8 _*_
import os
import sys
import re
import time
import datetime
import re
# binlog_name = sys.argv[1]
binlog_path='/mysqlbinlog/'
if len(sys.argv) == 4 and 'bin' in sys.argv[1]:
print('INPUT <binlog_name><start-datetime(format:YYYY-MM-DD HH24:MI:SS)><stop-datetime(format:YYYY-MM-DD HH24:MI:SS)>')
binlog_name = sys.argv[1]
start_datetime = sys.argv[2]
stop_datetime = sys.argv[3]
elif len(sys.argv) == 3:
start_datetime = sys.argv[1]
stop_datetime = sys.argv[2]
# print(binlog_name)
# print(start_datetime)
# print(stop_datetime)
# binlog_name='/mysqlbinlog/master-bin.000008'
rx = os.popen('find ' + binlog_path + ' -mindepth 1 -maxdepth 1 -name *.index ')
binlog_index = rx.read().split('\n')[0]
sql_text=''
flag_rows_query=0
dml_cnt = 0
one_trans = {}
all_trans = {}
trans_sql = {}
sql_list=[]
gtid_x=''
dml_dic = {}
dur_dic = {}
binlog_rows_query_events =0
# 将 不规格的字符串 转换为 日期
def get_dateformat(dt):
d1 = dt.replace(' ',' ').split(' ')[0] + ' '+ dt.replace(' ',' ').split(' ')[1].zfill(8)
return datetime.datetime.strptime(d1, '%y%m%d %H:%M:%S')
# 判断一个unicode是否是汉字
def is_chinese(uchar):
if uchar >= u'\u4e00' and uchar <= u'\u9fa5':
return True
else:
return False
# 用空格填充定义的宽度(汉字多填充一个字符)
def fill_text(text, width):
stext = str(text)
utext = stext.decode("utf-8")
cn_count = 0
for u in utext:
if is_chinese(u):
cn_count += 1
return stext + (width - cn_count - len(utext)) * " "
def cut_text(text, lenth):
textArr = re.findall('.{' + str(lenth) + '}', text)
textArr.append(text[(len(textArr) * lenth):])
return textArr
# print(cut_text('123456789abcdefg', 3))
def get_data(binlog_name,start_datetime,stop_datetime):
global sql_text
global flag_rows_query
global dml_cnt
global one_trans
global all_trans
global trans_sql
global sql_list
global gtid_x
global dml_dic
global dur_dic
global binlog_rows_query_events
global binlog_path
cmd = "/home/db/mysql/product/bin/mysqlbinlog -vv --base64-output='decode-rows' " + binlog_name + ' --start-datetime=' + "'" + start_datetime + "'" + ' --stop-datetime=' + "'" + stop_datetime +"'"
print(cmd)
result = os.popen(cmd)
## 解析binlog
for line in result.read().split('\n'):
# 事务gtid
if "SET @@SESSION.GTID_NEXT=" in line:
gtid_x = line.split("'")[1]
# 事务开始时间
if "Query thread_id" in line:
one_trans['start_time'] = get_dateformat(line.replace("#",'').split("server id")[0])
continue
# 事务结束时间
if "GTID last_committed=" in line:
end_time = line.replace("#",'').split("server id")[0]
one_trans['end_time'] = get_dateformat(line.replace("#",'').split("server id")[0])
continue
# 事务sql语句
if "Rows_query" in line:
flag_rows_query = 1
binlog_rows_query_events = 1
continue
if binlog_rows_query_events == 1 :
if flag_rows_query == 1 and '# at' not in line:
sql_text += str(line.split("#")[1].strip())+'\n'
if flag_rows_query == 1 and "# at" in line:
flag_rows_query = 2
trans_sql['sql'] = str(sql_text.rstrip('\n'))
sql_text = ''
continue
if flag_rows_query == 2 and 'flags: STMT_END_F' in line:
flag_rows_query = 3
continue
# 记录事务数
if flag_rows_query == 3 and '### INSERT'in line:
dml_cnt += 1
trans_sql['dml']=dml_cnt
if flag_rows_query == 3 and '### UPDATE' in line:
dml_cnt += 1
trans_sql['dml'] = dml_cnt
if flag_rows_query == 3 and '### DELETE' in line:
dml_cnt += 1
trans_sql['dml'] = dml_cnt
if flag_rows_query == 3 and '# at' in line:
dml_cnt = 0
flag_rows_query = 0
sql_list.append(trans_sql)
trans_sql={}
else:
if 'flags: STMT_END_F' in line:
flag_rows_query = 3
continue
# 记录事务数
if flag_rows_query == 3 and '### INSERT' in line:
dml_cnt += 1
trans_sql['dml'] = dml_cnt
trans_sql['sql'] = '### INSERT'
if flag_rows_query == 3 and '### UPDATE' in line:
dml_cnt += 1
trans_sql['dml'] = dml_cnt
trans_sql['sql'] = '### UPDATE'
if flag_rows_query == 3 and '### DELETE' in line:
dml_cnt += 1
trans_sql['dml'] = dml_cnt
trans_sql['sql'] = '### DELETE'
if flag_rows_query == 3 and '# at' in line:
dml_cnt = 0
flag_rows_query = 0
sql_list.append(trans_sql)
trans_sql = {}
if 'COMMIT/*!*/;' in line:
one_trans['sql'] = sql_list
dml_sum = 0
for x in sql_list:
dml_sum +=x['dml']
one_trans['dml_sum'] = dml_sum
one_trans['dur'] = (one_trans['end_time'] - one_trans['start_time']).seconds
dur_dic[gtid_x] = (one_trans['end_time'] - one_trans['start_time']).seconds
dml_dic[gtid_x] = dml_sum
all_trans[ gtid_x] = one_trans
one_trans = {}
sql_list=[]
gtid_x =''
# print(binlog_index)
if len(sys.argv) == 3:
with open(binlog_index,'r') as f:
for x in f.read().split('\n') :
print(x)
get_data(x, start_datetime, stop_datetime)
# get_data(file,start_datetime,stop_datetime)
# dml_cnt 降序列表
dml_list = []
# dur 降序列表
dur_list = []
# dur 降序列表 对应的 gtid
gtid_dur_list = []
# dml_cmt 降序列表 对应的 gtid
gtid_dml_list = []
# 总dur
tol_dur=0
# 总 dml_cnt
tol_dml = 0
# 屏幕输出字符宽度定义
a0 = 43
a1 = 10
a2 = 15
a3 = 15
a4 = 15
a5 = 10
a6 = 10
print('显示事务量前10事务gtid信息:')
# 取事务量前10的事务gtid,并排序,显示事务量,显示事务量占比,显示执行时长
# 按时长排序,获取前10的gtid
dur_dic1 = dur_dic
tol_dur = sum(dur_dic.values())
tol_dur_num = -1 if tol_dur == 0 else tol_dur
for dur in sorted(dur_dic.values(),reverse=True)[:10]:
for key,value in dur_dic.items():
if value == dur :
gtid_dur_list.append(key)
del dur_dic[key]
break
# 按事务量排许,获取前10的gtid
dml_dic1 = dml_dic
tol_dml = sum(dml_dic.values())
tol_dml_num = -1 if tol_dml == 0 else tol_dml
for dml in sorted(dml_dic.values(),reverse=True)[:10]:
for key,value in dml_dic.items():
if value == dml :
gtid_dml_list.append(key)
del dml_dic[key]
break
print(("+ {0:{0}<%d} + {0:{0}<%d} + {0:{0}<%d} + {0:{0}<%d} + {0:{0}<%d} + {0:{0}<%d}+ {0:{0}<%d} +" % (a0, a1,a5, a2, a3,a6, a4)).format('-'))
fmt = "| {0} | {1} | {2} | {3} | {4} | {5} | {6} |"
print(fmt.format(fill_text("gtid", a0), fill_text("事务量", a1), fill_text("总事务量", a1), fill_text("事务占比", a2), fill_text("执行时长", a3), fill_text("总时长", a6),fill_text("时长占比",a4)))
print(("+ {0:{0}<%d} + {0:{0}<%d} + {0:{0}<%d}+ {0:{0}<%d} + {0:{0}<%d} + {0:{0}<%d} + {0:{0}<%d} +" % (a0, a1,a5, a2, a3,a6, a4)).format('-'))
for gtid_dml in gtid_dml_list:
print(fmt.format(fill_text(gtid_dml, a0),
fill_text(all_trans[gtid_dml]['dml_sum'], a1),
fill_text(tol_dml, a5),
fill_text(str(round(float(all_trans[gtid_dml]['dml_sum'])*100/tol_dml_num,3))+'%', a2 ),
fill_text(all_trans[gtid_dml]['dur'], a3),
fill_text(tol_dur, a6),
fill_text(str(round(float(all_trans[gtid_dml]['dur'])*100/tol_dur_num,3) )+'%', a4) ))
print(("+ {0:{0}<%d} + {0:{0}<%d} + {0:{0}<%d} + {0:{0}<%d} + {0:{0}<%d} + {0:{0}<%d}+ {0:{0}<%d} +" % (a0, a1,a5, a2, a3,a6, a4)).format('-'))
print('显示事务量前10的事务的详细信息:')
# 计算 最长sql的长度
# sql_maxlen= max([len(sql['sql'].strip()) for sql in all_trans[gtid_dml]['sql'] ])
# print(sql_maxlen)
#a3 = sql_maxlen + 2
a3 = 40
a4 = 20
print(("+ {0:{0}<%d} + {0:{0}<%d} + {0:{0}<%d} + {0:{0}<%d} + {0:{0}<%d} + {0:{0}<%d} + " % (a0, a3,a1, a2, a4, a4)).format('-'))
fmt = "| {0} | {1} | {2} | {3} | {4} | {5} |"
print(fmt.format(fill_text("gtid", a0),
fill_text("sql", a3),
fill_text("总事务量", a1),
fill_text("事务占比", a2),
fill_text("开始时间", a4),
fill_text("结束时间", a4)))
print(("+ {0:{0}<%d} + {0:{0}<%d} + {0:{0}<%d} + {0:{0}<%d} + {0:{0}<%d} + {0:{0}<%d} + " % (a0, a3,a1, a2, a4, a4)).format('-'))
for gtid_dml in gtid_dml_list:
x_sql =''
# print("gtid:%s" % gtid_dml)
i = 0
for sql in all_trans[gtid_dml]['sql']:
if i == 0:
f = 0
for sql_cut in cut_text(sql['sql'], a3):
if f == 0 :
# print("gtid:%s sql:%s dml:%s" % (gtid_dml,sql['sql'],sql['dml']))
print(fmt.format(fill_text(gtid_dml, a0),
fill_text(sql_cut, a3),
fill_text(sql['dml'], a1),
fill_text(str(round(float(sql['dml']) * 100 / tol_dml, 3)) + '%', a2),
fill_text(all_trans[gtid_dml]['start_time'], a4),
fill_text(all_trans[gtid_dml]['end_time'], a4)))
else:
print(fmt.format(fill_text('', a0),
fill_text(sql_cut, a3),
fill_text('', a1),
fill_text('', a2),
fill_text('', a4),
fill_text('', a4)))
f += 1
else:
f = 0
for sql_cut in cut_text(sql['sql'], a3):
if f == 0:
print(fmt.format(fill_text('', a0),
fill_text(sql_cut, a3),
fill_text(sql['dml'], a1),
fill_text(str(round(float(sql['dml']) * 100 / tol_dml, 3)) + '%', a2),
fill_text('', a4),
fill_text('', a4)))
else:
print(fmt.format(fill_text('', a0),
fill_text(sql_cut, a3),
fill_text('', a1),
fill_text('', a2),
fill_text('', a4),
fill_text('', a4)))
f += 1
i += 1
print(("+ {0:{0}<%d} + {0:{0}<%d} + {0:{0}<%d} + {0:{0}<%d} + {0:{0}<%d}+ {0:{0}<%d} + " % (a0, a3, a1, a2,a4,a4)).format('-'))
# print(all_trans)
显示结果如下:
显示事务量前10事务gtid信息:
+ ------------------------------------------- + ---------- + ---------- + --------------- + --------------- + ----------+ --------------- +
| gtid | 事务量 | 总事务量 | 事务占比 | 执行时长 | 总时长 | 时长占比 |
+ ------------------------------------------- + ---------- + ----------+ --------------- + --------------- + ---------- + --------------- +
| 360bfb62-db39-11e8-b641-000c29beb35b:2006 | 1000 | 2017 | 49.579% | 2 | 55 | 3.636% |
| 360bfb62-db39-11e8-b641-000c29beb35b:2010 | 10 | 2017 | 0.496% | 2 | 55 | 3.636% |
| 360bfb62-db39-11e8-b641-000c29beb35b:2009 | 3 | 2017 | 0.149% | 6 | 55 | 10.909% |
| 360bfb62-db39-11e8-b641-000c29beb35b:2008 | 3 | 2017 | 0.149% | 43 | 55 | 78.182% |
| 360bfb62-db39-11e8-b641-000c29beb35b:1006 | 1 | 2017 | 0.05% | 0 | 55 | 0.0% |
| 360bfb62-db39-11e8-b641-000c29beb35b:1007 | 1 | 2017 | 0.05% | 0 | 55 | 0.0% |
| 360bfb62-db39-11e8-b641-000c29beb35b:1778 | 1 | 2017 | 0.05% | 0 | 55 | 0.0% |
| 360bfb62-db39-11e8-b641-000c29beb35b:1779 | 1 | 2017 | 0.05% | 0 | 55 | 0.0% |
| 360bfb62-db39-11e8-b641-000c29beb35b:1740 | 1 | 2017 | 0.05% | 0 | 55 | 0.0% |
| 360bfb62-db39-11e8-b641-000c29beb35b:1773 | 1 | 2017 | 0.05% | 0 | 55 | 0.0% |
+ ------------------------------------------- + ---------- + ---------- + --------------- + --------------- + ----------+ --------------- +
显示事务量前10的事务的详细信息:
+ ------------------------------------------- + ---------------------------------------- + ---------- + --------------- + -------------------- + -------------------- +
| gtid | sql | 总事务量 | 事务占比 | 开始时间 | 结束时间 |
+ ------------------------------------------- + ---------------------------------------- + ---------- + --------------- + -------------------- + -------------------- +
| 360bfb62-db39-11e8-b641-000c29beb35b:2006 | update test set m = 'haha' | 1000 | 49.579% | 2019-08-22 02:45:50 | 2019-08-22 02:45:52 |
| 360bfb62-db39-11e8-b641-000c29beb35b:2010 | update test set m = 'x' limit 10 | 10 | 0.496% | 2019-08-22 22:21:53 | 2019-08-22 22:21:55 |
| 360bfb62-db39-11e8-b641-000c29beb35b:2009 | insert into test(id) select 10001 | 1 | 0.05% | 2019-08-22 22:21:03 | 2019-08-22 22:21:09 |
| | insert into test(id) select 10002 | 1 | 0.05% | | |
| | insert into test(id) select 10003 | 1 | 0.05% | | |
| 360bfb62-db39-11e8-b641-000c29beb35b:2008 | delete from test limit 1 | 1 | 0.05% | 2019-08-22 21:40:30 | 2019-08-22 21:41:13 |
| | insert into test select 1000,'z' | 1 | 0.05% | | |
| | update test set m = 'Null' where id = 99 | 1 | 0.05% | | |
| | 8 | | | | |
| 360bfb62-db39-11e8-b641-000c29beb35b:1006 | insert into test(id) select 0 | 1 | 0.05% | 2019-08-22 02:45:19 | 2019-08-22 02:45:19 |
| 360bfb62-db39-11e8-b641-000c29beb35b:1007 | insert into test(id) select 1 | 1 | 0.05% | 2019-08-22 02:45:19 | 2019-08-22 02:45:19 |
| 360bfb62-db39-11e8-b641-000c29beb35b:1778 | insert into test(id) select 772 | 1 | 0.05% | 2019-08-22 02:45:24 | 2019-08-22 02:45:24 |
| 360bfb62-db39-11e8-b641-000c29beb35b:1779 | insert into test(id) select 773 | 1 | 0.05% | 2019-08-22 02:45:24 | 2019-08-22 02:45:24 |
| 360bfb62-db39-11e8-b641-000c29beb35b:1740 | insert into test(id) select 734 | 1 | 0.05% | 2019-08-22 02:45:24 | 2019-08-22 02:45:24 |
| 360bfb62-db39-11e8-b641-000c29beb35b:1773 | insert into test(id) select 767 | 1 | 0.05% | 2019-08-22 02:45:24 | 2019-08-22 02:45:24 |
+ ------------------------------------------- + ---------------------------------------- + ---------- + --------------- + --------------------+ -------------------- +
Process finished with exit code 0