用python制简单报表,输入.docx文件

前言

       之前,公司要求每周做一次用户访问量、活跃用户等统计分析,并以邮件发出。为了快速高效地实现邮件的内容,即word的内容,我写了下面的demo。本demo使用的数据完全是小编杜撰,不具有任何实际意义。运行截图见图1。
在这里插入图片描述

图1 运行结果
数据格式如下:
date	appId	visit	activity
20180801	1001	4988	906
20180801	1002	2209	950
20180801	1003	1323	455
20180801	1004	1361	128
20180801	1005	2117	373
20180801	1006	7982	633
20180801	1007	5073	115
20180802	1001	5119	909
20180802	1002	2057	948
20180802	1003	1004	400
20180802	1004	1461	108
20180802	1005	2204	391
20180802	1006	8199	640
20180802	1007	5126	115

代码

# -*- coding: utf-8 -*-
import pandas as pd
from datetime import datetime
import matplotlib.pyplot as plt
from docx import Document
from docx.shared import Inches

"""
导入数据
"""
file_path = "C:\\Users\\yang\\Desktop\\log.xls"
file_data = pd.read_excel(file_path, sheetname="Sheet1")
date = [datetime.strptime(str(x), '%Y%m%d') for x in file_data['date']]  # 把string转化为日期格式
del file_data['date']  # 从excel读到的date的格式是int,所以删除原来得列,然后在拼接上转化为日期格式后的date
file_data['date'] = date

"""
本周与上周的比较
"""
last_week_begin = '2018-08-01'  # 上周开始日期
last_week_end = '2018-08-07'  # 上周结束日期
this_week_begin = '2018-08-08'  # 本周开始日期
this_week_end = '2018-08-14'  # 本周结束日期
last_week_data_frame = file_data[file_data['date'].isin(pd.date_range(last_week_begin, last_week_end))]  # 截取上周的数据
this_week_data_frame = file_data[file_data['date'].isin(pd.date_range(this_week_begin, this_week_end))]  # 截取本周的数据

partner_map = {'1001': 'BaiDu', '1002': 'AiQiYi', '1003': 'WeChat', '1004': 'YY', '1005': '163mail', '1006': 'QQ',
                '1007': 'DiDi'}
# 本周与上周的所有合作方的平均活跃用户
last_week_activity_mean = last_week_data_frame['activity'].groupby(last_week_data_frame['appId']).mean()
this_week_activity_mean = this_week_data_frame['activity'].groupby(this_week_data_frame['appId']).mean()
this_week_mean_sort = this_week_activity_mean.sort_values(ascending=False)
message_top3 = "本周活跃用户数排名前三的合作方由高到低依次为(合作方名称:均值) "+partner_map[str(this_week_mean_sort.index[0])]+": "+str(int(this_week_mean_sort.values[0]))+", "+partner_map[str(this_week_mean_sort.index[1])]+": "+str(int(this_week_mean_sort.values[1]))+", "+partner_map[str(this_week_mean_sort.index[2])]+": "+str(int(this_week_mean_sort.values[2]))+";"
message_min = "本周活跃用户数最少的合作方为 "+partner_map[str(this_week_mean_sort.index[6])]+": "+str(int(this_week_mean_sort.values[6]))+";"
print(message_top3)
print(message_min)
"""
生成柱状图
"""
x_label = pd.Series(partner_map)
x = list(range(len(x_label)))
total_width, n = 0.8, 2  # 并列的两个柱状图的总宽度
width = total_width / n  # 一个柱状图的宽度
plt.bar(x, last_week_activity_mean, width=width, label='last week', fc='y')
for i in range(len(x)):
    x[i] = x[i] + width
plt.bar(x, this_week_activity_mean, width=width, label='this week', tick_label=x_label, fc='r')
plt.legend()
plt.savefig('C:\\Users\\yang\\Desktop\\demo.png')
# plt.show()

"""
把message和图片写入docx文件
"""
document = Document()
document.add_heading('本周周报', 0)     # 题目
document.add_heading('Heading, level 1', level=1)   # 一级标题
document.add_paragraph(message_top3, style='List Bullet')
document.add_paragraph(message_min, style='List Bullet')

document.add_picture('C:\\Users\\yang\\Desktop\\demo.png', width=Inches(3), height=Inches(2))
document.save('C:\\Users\\yang\\Desktop\\demo.docx')

后续工作

  • 实现插入图片居中、添加图名
  • 优化代码中不合理的地方
  • 美化柱状图,如:添加标题等

参考

python-docx官方文档

猜你喜欢

转载自blog.csdn.net/qq_41228463/article/details/82943539