mysql binlog分析脚本及binlog结构内容

基上结构图写个python脚本来分析binlog日志:

#_*_ coding:utf8 _*_
import os
import sys
import  re
import  time
import  datetime
import re
# binlog_name = sys.argv[1]

binlog_path='/mysqlbinlog/'

if len(sys.argv) == 4 and 'bin' in sys.argv[1]:
    print('INPUT <binlog_name><start-datetime(format:YYYY-MM-DD HH24:MI:SS)><stop-datetime(format:YYYY-MM-DD HH24:MI:SS)>')

    binlog_name = sys.argv[1]
    start_datetime = sys.argv[2]
    stop_datetime = sys.argv[3]
elif len(sys.argv) == 3:
    start_datetime = sys.argv[1]
    stop_datetime = sys.argv[2]

# print(binlog_name)
# print(start_datetime)
# print(stop_datetime)
# binlog_name='/mysqlbinlog/master-bin.000008'
rx  = os.popen('find ' + binlog_path + ' -mindepth 1 -maxdepth 1 -name *.index ')
binlog_index = rx.read().split('\n')[0]

sql_text=''
flag_rows_query=0
dml_cnt = 0
one_trans = {}
all_trans = {}
trans_sql = {}
sql_list=[]
gtid_x=''
dml_dic = {}
dur_dic = {}
binlog_rows_query_events =0

# 将 不规格的字符串 转换为 日期
def get_dateformat(dt):
    d1 = dt.replace('  ',' ').split(' ')[0] + ' '+ dt.replace('  ',' ').split(' ')[1].zfill(8)
    return datetime.datetime.strptime(d1, '%y%m%d %H:%M:%S')

# 判断一个unicode是否是汉字
def is_chinese(uchar):
    if uchar >= u'\u4e00' and uchar <= u'\u9fa5':
        return True
    else:
        return False


# 用空格填充定义的宽度(汉字多填充一个字符)
def fill_text(text, width):
    stext = str(text)
    utext = stext.decode("utf-8")
    cn_count = 0
    for u in utext:
        if is_chinese(u):
            cn_count += 1
    return stext + (width - cn_count - len(utext)) * " "


def cut_text(text, lenth):
    textArr = re.findall('.{' + str(lenth) + '}', text)
    textArr.append(text[(len(textArr) * lenth):])
    return textArr

# print(cut_text('123456789abcdefg', 3))


def get_data(binlog_name,start_datetime,stop_datetime):
    global sql_text
    global flag_rows_query
    global dml_cnt
    global one_trans
    global all_trans
    global trans_sql
    global sql_list
    global gtid_x
    global dml_dic
    global dur_dic
    global binlog_rows_query_events
    global binlog_path

    cmd = "/home/db/mysql/product/bin/mysqlbinlog -vv --base64-output='decode-rows' "  + binlog_name + ' --start-datetime=' + "'" + start_datetime + "'" + ' --stop-datetime=' + "'" + stop_datetime +"'"
    print(cmd)
    result = os.popen(cmd)

    ## 解析binlog
    for line in  result.read().split('\n'):
        # 事务gtid
        if "SET @@SESSION.GTID_NEXT=" in line:
            gtid_x = line.split("'")[1]

        # 事务开始时间
        if "Query	thread_id" in line:
            one_trans['start_time'] = get_dateformat(line.replace("#",'').split("server id")[0])
            continue

        # 事务结束时间
        if "GTID	last_committed="  in line:
            end_time = line.replace("#",'').split("server id")[0]
            one_trans['end_time'] = get_dateformat(line.replace("#",'').split("server id")[0])
            continue

        # 事务sql语句
        if "Rows_query" in line:
            flag_rows_query = 1
            binlog_rows_query_events = 1
            continue

        if binlog_rows_query_events == 1 :
            if flag_rows_query ==  1 and '# at' not in line:
                sql_text += str(line.split("#")[1].strip())+'\n'

            if flag_rows_query == 1 and "# at" in line:
                flag_rows_query = 2
                trans_sql['sql'] = str(sql_text.rstrip('\n'))
                sql_text = ''
                continue

            if flag_rows_query == 2 and 'flags: STMT_END_F' in line:
                flag_rows_query = 3
                continue

            # 记录事务数
            if flag_rows_query == 3  and '### INSERT'in line:
                dml_cnt += 1
                trans_sql['dml']=dml_cnt

            if flag_rows_query == 3 and '### UPDATE' in line:
                dml_cnt += 1
                trans_sql['dml'] = dml_cnt
            if flag_rows_query == 3 and '### DELETE' in line:
                dml_cnt += 1
                trans_sql['dml'] = dml_cnt

            if flag_rows_query == 3  and '# at' in line:
                dml_cnt = 0
                flag_rows_query = 0
                sql_list.append(trans_sql)
                trans_sql={}
        else:
            if  'flags: STMT_END_F' in line:
                flag_rows_query = 3
                continue

            # 记录事务数
            if flag_rows_query == 3 and '### INSERT' in line:
                dml_cnt += 1
                trans_sql['dml'] = dml_cnt
                trans_sql['sql'] = '### INSERT'

            if flag_rows_query == 3 and '### UPDATE' in line:
                dml_cnt += 1
                trans_sql['dml'] = dml_cnt
                trans_sql['sql'] = '### UPDATE'

            if flag_rows_query == 3 and '### DELETE' in line:
                dml_cnt += 1
                trans_sql['dml'] = dml_cnt
                trans_sql['sql'] = '### DELETE'

            if flag_rows_query == 3 and '# at' in line:
                dml_cnt = 0
                flag_rows_query = 0
                sql_list.append(trans_sql)
                trans_sql = {}

        if 'COMMIT/*!*/;' in line:
            one_trans['sql'] = sql_list
            dml_sum = 0
            for x in sql_list:
                dml_sum +=x['dml']
            one_trans['dml_sum'] = dml_sum

            one_trans['dur'] = (one_trans['end_time'] - one_trans['start_time']).seconds
            dur_dic[gtid_x] = (one_trans['end_time'] - one_trans['start_time']).seconds
            dml_dic[gtid_x] = dml_sum
            all_trans[ gtid_x] = one_trans
            one_trans = {}
            sql_list=[]
            gtid_x =''

# print(binlog_index)
if len(sys.argv) == 3:
    with open(binlog_index,'r') as f:
         for x in f.read().split('\n') :
             print(x)
             get_data(x, start_datetime, stop_datetime)

       #  get_data(file,start_datetime,stop_datetime)

# dml_cnt 降序列表
dml_list = []
# dur    降序列表
dur_list = []
# dur    降序列表 对应的 gtid
gtid_dur_list = []
# dml_cmt  降序列表 对应的 gtid
gtid_dml_list = []
# 总dur
tol_dur=0
# 总 dml_cnt
tol_dml = 0
# 屏幕输出字符宽度定义
a0 = 43
a1 = 10
a2 = 15
a3 = 15
a4 = 15
a5 = 10
a6 = 10

print('显示事务量前10事务gtid信息:')
# 取事务量前10的事务gtid,并排序,显示事务量,显示事务量占比,显示执行时长
# 按时长排序,获取前10的gtid
dur_dic1 = dur_dic
tol_dur = sum(dur_dic.values())

tol_dur_num = -1 if tol_dur == 0  else tol_dur


for dur in sorted(dur_dic.values(),reverse=True)[:10]:
    for key,value in dur_dic.items():
        if value == dur :
            gtid_dur_list.append(key)
            del dur_dic[key]
            break

# 按事务量排许,获取前10的gtid
dml_dic1 = dml_dic
tol_dml = sum(dml_dic.values())
tol_dml_num = -1 if tol_dml == 0  else tol_dml
for dml in sorted(dml_dic.values(),reverse=True)[:10]:
    for key,value in dml_dic.items():
        if value == dml :
            gtid_dml_list.append(key)
            del dml_dic[key]
            break



print(("+ {0:{0}<%d} + {0:{0}<%d} + {0:{0}<%d} + {0:{0}<%d} + {0:{0}<%d} + {0:{0}<%d}+ {0:{0}<%d} +" % (a0, a1,a5, a2, a3,a6, a4)).format('-'))
fmt = "| {0} | {1} | {2} | {3} | {4} | {5} | {6} |"
print(fmt.format(fill_text("gtid", a0), fill_text("事务量", a1), fill_text("总事务量", a1), fill_text("事务占比", a2), fill_text("执行时长", a3), fill_text("总时长", a6),fill_text("时长占比",a4)))
print(("+ {0:{0}<%d} + {0:{0}<%d} + {0:{0}<%d}+ {0:{0}<%d} + {0:{0}<%d} + {0:{0}<%d} + {0:{0}<%d} +" % (a0, a1,a5, a2, a3,a6, a4)).format('-'))
for gtid_dml in gtid_dml_list:
    print(fmt.format(fill_text(gtid_dml, a0),
                     fill_text(all_trans[gtid_dml]['dml_sum'], a1),
                     fill_text(tol_dml, a5),
                     fill_text(str(round(float(all_trans[gtid_dml]['dml_sum'])*100/tol_dml_num,3))+'%', a2 ),
                     fill_text(all_trans[gtid_dml]['dur'], a3),
                     fill_text(tol_dur, a6),
                     fill_text(str(round(float(all_trans[gtid_dml]['dur'])*100/tol_dur_num,3)  )+'%', a4)  ))
print(("+ {0:{0}<%d} + {0:{0}<%d} + {0:{0}<%d} + {0:{0}<%d} + {0:{0}<%d} + {0:{0}<%d}+ {0:{0}<%d} +" % (a0, a1,a5, a2, a3,a6, a4)).format('-'))


print('显示事务量前10的事务的详细信息:')
# 计算 最长sql的长度
# sql_maxlen= max([len(sql['sql'].strip()) for sql in all_trans[gtid_dml]['sql'] ])
# print(sql_maxlen)
#a3 = sql_maxlen + 2
a3 = 40
a4 = 20
print(("+ {0:{0}<%d} + {0:{0}<%d} + {0:{0}<%d} + {0:{0}<%d} + {0:{0}<%d} + {0:{0}<%d} + " % (a0, a3,a1, a2, a4, a4)).format('-'))

fmt = "| {0} | {1} | {2} | {3} | {4} | {5} |"
print(fmt.format(fill_text("gtid", a0),
                 fill_text("sql", a3),
                 fill_text("总事务量", a1),
                 fill_text("事务占比", a2),
                 fill_text("开始时间", a4),
                 fill_text("结束时间", a4)))
print(("+ {0:{0}<%d} + {0:{0}<%d} + {0:{0}<%d} + {0:{0}<%d} + {0:{0}<%d} + {0:{0}<%d} + " % (a0, a3,a1, a2, a4, a4)).format('-'))
for gtid_dml in gtid_dml_list:
    x_sql =''
    # print("gtid:%s" % gtid_dml)
    i = 0
    for sql in all_trans[gtid_dml]['sql']:
        if i == 0:
            f = 0
            for sql_cut in cut_text(sql['sql'], a3):
                if f == 0 :
                    # print("gtid:%s sql:%s dml:%s" % (gtid_dml,sql['sql'],sql['dml']))
                    print(fmt.format(fill_text(gtid_dml, a0),
                                     fill_text(sql_cut, a3),
                                     fill_text(sql['dml'], a1),
                                     fill_text(str(round(float(sql['dml']) * 100 / tol_dml, 3)) + '%', a2),
                                     fill_text(all_trans[gtid_dml]['start_time'], a4),
                                     fill_text(all_trans[gtid_dml]['end_time'], a4)))
                else:
                    print(fmt.format(fill_text('', a0),
                                     fill_text(sql_cut, a3),
                                     fill_text('', a1),
                                     fill_text('', a2),
                                     fill_text('', a4),
                                     fill_text('', a4)))
                f += 1
        else:
            f = 0
            for sql_cut in cut_text(sql['sql'], a3):
                if f == 0:
                    print(fmt.format(fill_text('', a0),
                                     fill_text(sql_cut, a3),
                                     fill_text(sql['dml'], a1),
                                     fill_text(str(round(float(sql['dml']) * 100 / tol_dml, 3)) + '%', a2),
                                     fill_text('', a4),
                                     fill_text('', a4)))
                else:
                    print(fmt.format(fill_text('', a0),
                                     fill_text(sql_cut, a3),
                                     fill_text('', a1),
                                     fill_text('', a2),
                                     fill_text('', a4),
                                     fill_text('', a4)))
                f += 1
        i += 1

print(("+ {0:{0}<%d} + {0:{0}<%d} + {0:{0}<%d} + {0:{0}<%d} + {0:{0}<%d}+ {0:{0}<%d} + " % (a0, a3, a1, a2,a4,a4)).format('-'))


# print(all_trans)

显示结果如下:

显示事务量前10事务gtid信息:
+ ------------------------------------------- + ---------- + ---------- + --------------- + --------------- + ----------+ --------------- +
| gtid                                        | 事务量     | 总事务量   | 事务占比        | 执行时长        | 总时长     | 时长占比        |
+ ------------------------------------------- + ---------- + ----------+ --------------- + --------------- + ---------- + --------------- +
| 360bfb62-db39-11e8-b641-000c29beb35b:2006   | 1000       | 2017       | 49.579%         | 2               | 55         | 3.636%          |
| 360bfb62-db39-11e8-b641-000c29beb35b:2010   | 10         | 2017       | 0.496%          | 2               | 55         | 3.636%          |
| 360bfb62-db39-11e8-b641-000c29beb35b:2009   | 3          | 2017       | 0.149%          | 6               | 55         | 10.909%         |
| 360bfb62-db39-11e8-b641-000c29beb35b:2008   | 3          | 2017       | 0.149%          | 43              | 55         | 78.182%         |
| 360bfb62-db39-11e8-b641-000c29beb35b:1006   | 1          | 2017       | 0.05%           | 0               | 55         | 0.0%            |
| 360bfb62-db39-11e8-b641-000c29beb35b:1007   | 1          | 2017       | 0.05%           | 0               | 55         | 0.0%            |
| 360bfb62-db39-11e8-b641-000c29beb35b:1778   | 1          | 2017       | 0.05%           | 0               | 55         | 0.0%            |
| 360bfb62-db39-11e8-b641-000c29beb35b:1779   | 1          | 2017       | 0.05%           | 0               | 55         | 0.0%            |
| 360bfb62-db39-11e8-b641-000c29beb35b:1740   | 1          | 2017       | 0.05%           | 0               | 55         | 0.0%            |
| 360bfb62-db39-11e8-b641-000c29beb35b:1773   | 1          | 2017       | 0.05%           | 0               | 55         | 0.0%            |
+ ------------------------------------------- + ---------- + ---------- + --------------- + --------------- + ----------+ --------------- +
显示事务量前10的事务的详细信息:
+ ------------------------------------------- + ---------------------------------------- + ---------- + --------------- + -------------------- + -------------------- + 
| gtid                                        | sql                                      | 总事务量   | 事务占比        | 开始时间             | 结束时间             |
+ ------------------------------------------- + ---------------------------------------- + ---------- + --------------- + -------------------- + -------------------- + 
| 360bfb62-db39-11e8-b641-000c29beb35b:2006   | update test set m = 'haha'               | 1000       | 49.579%         | 2019-08-22 02:45:50  | 2019-08-22 02:45:52  |
| 360bfb62-db39-11e8-b641-000c29beb35b:2010   | update test set m = 'x' limit 10         | 10         | 0.496%          | 2019-08-22 22:21:53  | 2019-08-22 22:21:55  |
| 360bfb62-db39-11e8-b641-000c29beb35b:2009   | insert into test(id) select 10001        | 1          | 0.05%           | 2019-08-22 22:21:03  | 2019-08-22 22:21:09  |
|                                             | insert into test(id) select 10002        | 1          | 0.05%           |                      |                      |
|                                             | insert into test(id) select 10003        | 1          | 0.05%           |                      |                      |
| 360bfb62-db39-11e8-b641-000c29beb35b:2008   | delete from test limit 1                 | 1          | 0.05%           | 2019-08-22 21:40:30  | 2019-08-22 21:41:13  |
|                                             | insert into test select 1000,'z'         | 1          | 0.05%           |                      |                      |
|                                             | update test set m = 'Null' where id = 99 | 1          | 0.05%           |                      |                      |
|                                             | 8                                        |            |                 |                      |                      |
| 360bfb62-db39-11e8-b641-000c29beb35b:1006   | insert into test(id) select 0            | 1          | 0.05%           | 2019-08-22 02:45:19  | 2019-08-22 02:45:19  |
| 360bfb62-db39-11e8-b641-000c29beb35b:1007   | insert into test(id) select 1            | 1          | 0.05%           | 2019-08-22 02:45:19  | 2019-08-22 02:45:19  |
| 360bfb62-db39-11e8-b641-000c29beb35b:1778   | insert into test(id) select 772          | 1          | 0.05%           | 2019-08-22 02:45:24  | 2019-08-22 02:45:24  |
| 360bfb62-db39-11e8-b641-000c29beb35b:1779   | insert into test(id) select 773          | 1          | 0.05%           | 2019-08-22 02:45:24  | 2019-08-22 02:45:24  |
| 360bfb62-db39-11e8-b641-000c29beb35b:1740   | insert into test(id) select 734          | 1          | 0.05%           | 2019-08-22 02:45:24  | 2019-08-22 02:45:24  |
| 360bfb62-db39-11e8-b641-000c29beb35b:1773   | insert into test(id) select 767          | 1          | 0.05%           | 2019-08-22 02:45:24  | 2019-08-22 02:45:24  |
+ ------------------------------------------- + ---------------------------------------- + ---------- + --------------- + --------------------+ -------------------- + 

Process finished with exit code 0
发布了117 篇原创文章 · 获赞 20 · 访问量 33万+

猜你喜欢

转载自blog.csdn.net/u010719917/article/details/99979216