Pyecharts亚马逊订单可视化

本文数据来源于亚马逊平台一服饰类卖家3个月的订单数据,这里用pyecharts做可视化展示。

导入数据并处理

import pandas as pd
import numpy as np
from pyecharts import options as opts
from pyecharts.globals import ThemeType
from pyecharts.charts import Line,Bar,Map,PictorialBar,Pie,WordCloud,Page
from pyecharts.commons.utils import JsCode
import requests
from collections import Counter
orders=pd.read_excel('亚马逊入驻商订单报表.xlsx')
orders.info()

在这里插入图片描述

mkt=pd.read_excel('市场.xlsx')
mkt.info()

在这里插入图片描述

#删除下单时间为空的记录
orders.dropna(subset=['下单时间'],inplace=True)

# 对时间字段进行处理
#提取下单日期、时间
orders['date']=pd.to_datetime(orders['下单时间'], utc=False)   

#不间断空白符
mkt.美国州名英文=mkt.美国州名英文.replace('\xa0',' ',regex=True)
# 对配送州字段进行处理,原始数据中既有州缩写也有全称,统一为全称呼;
def states(s):
    s=s.upper().replace('.','')
    t=list(mkt.美国州名英文.str.upper())
    if s in t:
        return mkt[[i==s for i in t]].美国州名英文.iloc[0]
    else:
        return mkt[[i==s for i in list(mkt.州名简写)]].美国州名英文.iloc[0]
orders['配送州']=orders.配送州.apply(states)
orders['配送州']=orders['配送州'].str.replace('South dakota','South Dakota')\
                                 .str.replace('New mexico','New Mexico')\
                                 .str.replace('South carolina','South Carolina')\
                                 .str.replace('New hampshire','New Hampshire')\
                                 .str.replace('New jersey','New Jersey')

data=pd.DataFrame({
    
    '订单号':orders['订单ID'],
                   '用户':orders['买家姓名'],
                   '产品':orders['产品名称'],
                   '数量':orders['产品数量'],
                   '单价':orders['产品价格'],
                   '销售额':orders['产品数量']*orders['产品价格'],
                   '日期':orders['date'].dt.day,
                   '星期':orders['date'].dt.day_name(),
                   '时间':pd.to_datetime(orders['date']).dt.hour,
                   '配送州':orders['配送州']})
data.head()

在这里插入图片描述

时间属性

各时间段订单量、客单价

#自定义背景
bg_color_js = (
    "new echarts.graphic.LinearGradient(0, 0, 0, 1, "
    "[{offset: 0, color: 'rgba(128, 255, 165, 0.2)'}, {offset: 1, color: 'rgba(1, 191, 236, 0.2)'}], false)"
)

#颜色样式:
color_js = """new echarts.graphic.LinearGradient(0, 0, 0, 1,
    [{offset: 0, color: 'rgba(128, 255, 165)'}, {offset: 1, color: 'rgba(1, 191, 236)'}], false)"""


hour_df=data.groupby('时间').agg({
    
    '订单号':['count'],'销售额':['sum']})
hour_df.columns=['订单量','销售额']
hour_df['平均客单价']=(hour_df['销售额']/hour_df['订单量']).map(lambda x:"%.2f" % x)
hour_df.head()

在这里插入图片描述

def hour_view():    
    line = (
        Line(init_opts=opts.InitOpts(bg_color=JsCode(bg_color_js),chart_id='hour_chart'))
        .add_xaxis(['{}点'.format(i) for i in hour_df.index.tolist()])
        .add_yaxis('订单量',
                   hour_df.订单量.tolist(),
                   yaxis_index=0,
                   is_smooth=True, 
                   symbol='circle', 
                   is_symbol_show=False, 
                   linestyle_opts=opts.LineStyleOpts(color='#04c1ea',width=3),
                   itemstyle_opts=opts.ItemStyleOpts(color='#04c1ea'),
                  )
        .extend_axis(
            yaxis=opts.AxisOpts(
                name='平均客单价',
                min_=15,
                position="right",
                axisline_opts=opts.AxisLineOpts(is_show=False), #不显示坐标轴轴线
                axistick_opts=opts.AxisTickOpts(is_show=False), #不显示坐标轴刻度线
            )
        )    
        .set_series_opts(label_opts=opts.LabelOpts(is_show=False))
        .set_global_opts(title_opts=opts.TitleOpts(title='各时间段订单量和客单价', pos_left='center'),
                        legend_opts=opts.LegendOpts(is_show=False),
                        tooltip_opts=opts.TooltipOpts(trigger='axis',axis_pointer_type='cross'),
                        yaxis_opts=opts.AxisOpts(name='订单量',
                                                 axisline_opts=opts.AxisLineOpts(is_show=False),
                                                 axistick_opts=opts.AxisTickOpts(is_show=False),
                                                 splitline_opts=opts.SplitLineOpts(is_show=True,linestyle_opts=opts.LineStyleOpts(color='#E0E6F1')),    
                                                 ),)

    )
    bar = (
        Bar()
        .add_xaxis(['{}点'.format(i) for i in hour_df.index.tolist()])
        .add_yaxis('平均客单价',
                   hour_df.平均客单价.tolist(),
                   yaxis_index=1,
                   itemstyle_opts=opts.ItemStyleOpts(color=JsCode(color_js), opacity=0.7),
                   label_opts=opts.LabelOpts(is_show=False),)
    )

#     return line.overlap(bar).render_notebook()
    return line.overlap(bar)
    
hour_view()

在这里插入图片描述
订单量高峰出现在7点到11点,和国内用户习惯不太一样;
平均客单价最高的三个时间点是13点、6点、5点。

周订单量分布

week_df=data.groupby('星期')['订单号'].count().reset_index()
cat_day_of_week = pd.api.types.CategoricalDtype(['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday'], ordered=True)
week_df['星期'] = week_df['星期'].astype(cat_day_of_week)
week_df = week_df.sort_values(['星期'])
week_df

在这里插入图片描述

def week_view():
    line = (
        Line(init_opts=opts.InitOpts(bg_color=JsCode(bg_color_js),chart_id='week_chart'))
        .add_xaxis(week_df.星期.tolist())
        .add_yaxis('订单量',
                   week_df.订单号.tolist(),
                   is_smooth=True,
                   symbol='circle',
                   is_symbol_show=False,#不显示圆点
                   linestyle_opts=opts.LineStyleOpts(color="#fff"),
                   areastyle_opts=opts.AreaStyleOpts(color=JsCode(color_js), opacity=1),
                   itemstyle_opts=opts.ItemStyleOpts(color="#5aecbb"),
                  )
        .set_series_opts(label_opts=opts.LabelOpts(is_show=False))
        .set_global_opts(title_opts=opts.TitleOpts(title='各周段订单量', pos_left='center'),
                        legend_opts=opts.LegendOpts(is_show=False),
                        tooltip_opts=opts.TooltipOpts(trigger='axis',axis_pointer_type='cross'),
                        xaxis_opts=opts.AxisOpts(boundary_gap=False), #x轴刻度起始点从原点开始,刻度终点为数据最大点
                        yaxis_opts=opts.AxisOpts(axisline_opts=opts.AxisLineOpts(is_show=False),
                                                 axistick_opts=opts.AxisTickOpts(is_show=False),
                                                 min_=180,
                                                 splitline_opts=opts.SplitLineOpts(is_show=True,linestyle_opts=opts.LineStyleOpts(color='#E0E6F1')),  
                                                 ),)

    )

#     return line.render_notebook()
    return line
week_view()

在这里插入图片描述
周三、周五为订单的高峰期,周二则完全为一周的最低。

各州订单

geo_df = data.groupby(['配送州']).agg({
    
    '订单号':['count'],'销售额':['sum']}).reset_index()
geo_df.columns=['配送州','订单量','销售额']
geo_df.sort_values(['订单量'], ascending = False,inplace=True)

data_pair = []
for idx, row in geo_df.iterrows():
    data_pair.append((row['配送州'], row['订单量']))
    
geo_df['累计']=(geo_df['订单量'].cumsum()/(geo_df['订单量'].sum())).round(2)
geo_df['平均客单价']=(geo_df['销售额']/geo_df['订单量']).round(2)
geo_df.head()

在这里插入图片描述

各州订单及订单累计分布

def pro_ord_view():    
    bar = (
        Bar(init_opts=opts.InitOpts(chart_id='pro_ord_chart'))
        .add_xaxis(geo_df.配送州.tolist())
        .add_yaxis('订单量',
                   geo_df.订单量.tolist(),
                   yaxis_index=0,
#                    is_symbol_show=False, 
                   itemstyle_opts=opts.ItemStyleOpts(color=JsCode(color_js), opacity=0.7),
                  )
        .extend_axis(
            yaxis=opts.AxisOpts(
                name='订单累计比',
                position="right",
                axisline_opts=opts.AxisLineOpts(is_show=False), 
                axistick_opts=opts.AxisTickOpts(is_show=False), 
            )
        )    
        .set_series_opts(label_opts=opts.LabelOpts(is_show=False))
        .set_global_opts(title_opts=opts.TitleOpts(title='各州订单及订单累计分布', pos_left='center'),
                        legend_opts=opts.LegendOpts(is_show=False),
                        tooltip_opts=opts.TooltipOpts(trigger='axis',axis_pointer_type='cross'),
                        yaxis_opts=opts.AxisOpts(name='订单数',
                                                 axisline_opts=opts.AxisLineOpts(is_show=False),
                                                 axistick_opts=opts.AxisTickOpts(is_show=False),
                                                 splitline_opts=opts.SplitLineOpts(is_show=True,linestyle_opts=opts.LineStyleOpts(color='#E0E6F1')),  
                                                 ),
                        xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=-45)),)
        

    )
    line = (
        Line()
        .add_xaxis(geo_df.配送州.tolist())
        .add_yaxis('订单累计比',
                   geo_df.累计.tolist(),
                   is_smooth=True,
                   is_symbol_show=False,
                   symbol='circle',
                   yaxis_index=1,
                   linestyle_opts=opts.LineStyleOpts(color='#04c1ea',width=3),
                   itemstyle_opts=opts.ItemStyleOpts(color='#04c1ea'),
                   label_opts=opts.LabelOpts(is_show=False),)
    )

#     return bar.overlap(line).render_notebook()
    return bar.overlap(line)
    
pro_ord_view()

在这里插入图片描述

GEO_data = requests.get(url="https://echarts.apache.org/examples/data/asset/geo/USA.json").json()

area_move = """{
        Alaska: {              // 把阿拉斯加移到美国主大陆左下方
            left: -128,
            top: 25,
            width: 15
        },
        Hawaii: {
            left: -110,        // 夏威夷
            top: 25,
            width: 5
        },
        'Puerto Rico': {       // 波多黎各
            left: -76,
            top: 26,
            width: 2
        }
    }"""
def pro_map_view(): 
    map=(
         Map(init_opts=opts.InitOpts(chart_id='pro_map_chart'))
         .add_js_funcs("""echarts.registerMap('USA', {}, {});""".format(GEO_data, area_move))
         .add('订单量',
              data_pair=data_pair,
              maptype='USA',
              is_roam=False,  # 是否开启鼠标缩放和平移漫游
              # 关闭symbol的显示
              is_map_symbol_show=False,
              zoom=1.1,  # 当前视角的缩放比例
              label_opts=opts.LabelOpts(is_show=False),
              )
        .set_global_opts(
              legend_opts=opts.LegendOpts(is_show=False),
              title_opts=opts.TitleOpts(title="美国各州订单量分布", pos_left='center'),
              visualmap_opts=opts.VisualMapOpts(
                                                is_piecewise=True,
                                                pos_left='2%',
                                                pos_top='65%',
                                                range_text=['订单量', ''],# 两端的文本
                                                pieces=[{
    
    'min': 101},
                                                        {
    
    'min': 61,'max': 100},
                                                        {
    
    'min': 31, 'max': 60},
                                                        {
    
    'min': 11,'max': 30},
                                                        {
    
    'min': 1,'max': 10}],
                                                range_color=["#CCD3D9", "#E6B6C2", "#D4587A", "#DC364C"])
              )
          )
#     return map.render_notebook()
    return map

pro_map_view()

在这里插入图片描述

各州订单价格分布

pro_price=geo_df[['配送州','平均客单价']].sort_values('平均客单价',ascending=False)
pro_price.head()

在这里插入图片描述

def pro_price_view():    
    bar = (
        Bar(init_opts=opts.InitOpts(chart_id='pro_price_chart'))
        .add_xaxis(pro_price.配送州.tolist())
        .add_yaxis('平均客单价',
                   pro_price.平均客单价.tolist(),
                   yaxis_index=0,
                   itemstyle_opts=opts.ItemStyleOpts(color=JsCode(color_js)),
                  )
        .set_series_opts(label_opts=opts.LabelOpts(is_show=False))
        .set_global_opts(title_opts=opts.TitleOpts(title='各州平均客单价', pos_left='center'),
                        legend_opts=opts.LegendOpts(is_show=False),
                        tooltip_opts=opts.TooltipOpts(trigger='axis',axis_pointer_type='shadow'),
                        yaxis_opts=opts.AxisOpts(min_=int(pro_price.平均客单价.min()-1),
                                                 max_=int(pro_price.平均客单价.max()+1),
                                                 axisline_opts=opts.AxisLineOpts(is_show=False),
                                                 axistick_opts=opts.AxisTickOpts(is_show=False),
                                                 splitline_opts=opts.SplitLineOpts(is_show=True,linestyle_opts=opts.LineStyleOpts(color='#E0E6F1')),     
                                                 ),
                        xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=-45)),)
    )
#     return bar.render_notebook()
    return bar

pro_price_view()   

商品属性

性别属性

根据商品名称关键词来判断,93%的商品都是女款

f, m = 0, 0
for i in data['产品']:
    try:
        if i.upper().__contains__('WOMEN') or i.upper().__contains__('GIRL'):
            f+=1
        elif i.upper().__contains__('MEN'):
            m+=1
        else:
            pass
    except AttributeError:
        pass

f_p = round(f/(f+m)*100)
m_p = round(m/(f+m)*100)

symbols = [
    'path://M18.2629891,11.7131596 L6.8091608,11.7131596 C1.6685112,11.7131596 0,13.032145 0,18.6237673 L0,34.9928467 C0,38.1719847 4.28388932,38.1719847 4.28388932,34.9928467 L4.65591984,20.0216948 L5.74941883,20.0216948 L5.74941883,61.000787 C5.74941883,65.2508314 11.5891201,65.1268798 11.5891201,61.000787 L11.9611506,37.2137775 L13.1110872,37.2137775 L13.4831177,61.000787 C13.4831177,65.1268798 19.3114787,65.2508314 19.3114787,61.000787 L19.3114787,20.0216948 L20.4162301,20.0216948 L20.7882606,34.9928467 C20.7882606,38.1719847 25.0721499,38.1719847 25.0721499,34.9928467 L25.0721499,18.6237673 C25.0721499,13.032145 23.4038145,11.7131596 18.2629891,11.7131596 M12.5361629,1.11022302e-13 C15.4784742,1.11022302e-13 17.8684539,2.38997966 17.8684539,5.33237894 C17.8684539,8.27469031 15.4784742,10.66467 12.5361629,10.66467 C9.59376358,10.66467 7.20378392,8.27469031 7.20378392,5.33237894 C7.20378392,2.38997966 9.59376358,1.11022302e-13 12.5361629,1.11022302e-13',
    'path://M28.9624207,31.5315864 L24.4142575,16.4793596 C23.5227152,13.8063773 20.8817445,11.7111088 17.0107398,11.7111088 L12.112691,11.7111088 C8.24168636,11.7111088 5.60080331,13.8064652 4.70917331,16.4793596 L0.149791395,31.5315864 C-0.786976655,34.7595013 2.9373074,35.9147532 3.9192135,32.890727 L8.72689855,19.1296485 L9.2799493,19.1296485 C9.2799493,19.1296485 2.95992025,43.7750224 2.70031069,44.6924335 C2.56498417,45.1567684 2.74553639,45.4852068 3.24205501,45.4852068 L8.704461,45.4852068 L8.704461,61.6700801 C8.704461,64.9659872 13.625035,64.9659872 13.625035,61.6700801 L13.625035,45.360657 L15.5097899,45.360657 L15.4984835,61.6700801 C15.4984835,64.9659872 20.4191451,64.9659872 20.4191451,61.6700801 L20.4191451,45.4852068 L25.8814635,45.4852068 C26.3667633,45.4852068 26.5586219,45.1567684 26.4345142,44.6924335 C26.1636859,43.7750224 19.8436568,19.1296485 19.8436568,19.1296485 L20.3966199,19.1296485 L25.2043926,32.890727 C26.1862111,35.9147532 29.9105828,34.7595013 28.9625083,31.5315864 L28.9624207,31.5315864 Z M14.5617154,0 C17.4960397,0 19.8773132,2.3898427 19.8773132,5.33453001 C19.8773132,8.27930527 17.4960397,10.66906 14.5617154,10.66906 C11.6274788,10.66906 9.24611767,8.27930527 9.24611767,5.33453001 C9.24611767,2.3898427 11.6274788,0 14.5617154,0 L14.5617154,0 Z',
]
def gender_view():
    pbar=(PictorialBar(init_opts=opts.InitOpts(bg_color=JsCode(bg_color_js),chart_id='gender_chart'))
         .add_xaxis([0, 1])
         # 此部分数据为要显示的数值
         .add_yaxis("",
            [{
    
    "value": m_p,
                "symbol": symbols[0],
                'symbolBoundingData': 100,
                "itemStyle": {
    
    "normal": {
    
    "color": 'rgba(105,204,230)'}}, # 单独控制颜色
                 },
                {
    
    "value": f_p,
                "symbol": symbols[1],
                'symbolBoundingData': 100,
                "itemStyle": {
    
    "normal": {
    
    "color": 'rgba(255,130,130)'}},  # 单独控制颜色     
               }],
            label_opts=opts.LabelOpts(is_show=True, position='inside',font_family='Arial',font_weight='bolder',
                                      font_size=40,formatter='{c}%'),
    #         symbol_repeat=False,
            is_symbol_clip=True
          )
         # 此部分数据用于背景,设置为100
         .add_yaxis("",
                [{
    
    "value": 100,
                    "symbol": symbols[0],
                    'symbolBoundingData': 100,
                    "itemStyle": {
    
    "normal": {
    
    "color": 'rgba(105,204,230,0.40)'}},  # 单独控制颜色   
                   },
                  {
    
    "value": 100,
                    "symbol": symbols[1],
                    'symbolBoundingData': 100,
                    "itemStyle": {
    
    "normal": {
    
    "color": 'rgba(255,130,130,0.40)'}},  # 单独控制颜色
                  }],
                category_gap='35%',  #柱形间距
                label_opts=opts.LabelOpts(is_show=False),
                is_symbol_clip=True,
            )
         .set_global_opts(
            title_opts=opts.TitleOpts(title="男款商品 VS 女款商品",
                                      subtitle='依据订单商品名称中的关键词判断, 如“women”,“girl”等。',
                                      pos_left='center'),
            tooltip_opts=opts.TooltipOpts(is_show=False), #鼠标移动到柱形时不显示数据提示
            legend_opts=opts.LegendOpts(is_show=False),
            xaxis_opts=opts.AxisOpts(is_show=False),
            yaxis_opts=opts.AxisOpts(is_show=False, max_=100),
        )
                      )
#     return pbar.render_notebook()
    return pbar
gender_view()

在这里插入图片描述

尺码和颜色

哪个尺码的衣服买的更多?
那个颜色更受欢迎?

#分词
word_list = []
for item in data['产品']:
    try:
        words = item.replace('(', ' ').replace(')', ' ').replace(',', ' ').replace('\xa0', ' ')\
                    .replace('T Shirt', 'T-Shirt').replace("Women's", 'Womens').split(' ')
        word_list.extend(words)
    except AttributeError:
        pass

#统计尺码的词频
size_list = []
for word in word_list:
    if word.upper() in ['L', 'XL', '2XL', '3XL', 'M', 'S', 'XS', '4XL']:
        size_list.append(word)
    else:
        pass

c = Counter(size_list)
c

在这里插入图片描述

#统计颜色的词频
color_list = []
for word in word_list:
    if word in ['Black', 'Blue', 'Green', 'Grey', 'White', 'Yellow', 'Purple', 'Pink']:
        color_list.append(word)
    else:
        pass

c1 = Counter(color_list)
c1

在这里插入图片描述

def size_col_view():    
    pie = (Pie(init_opts=opts.InitOpts(chart_id='size_col_chart'))
           .add("",
                c.most_common(10),
                radius=["30%", "50%"],
                center=["25%", "50%"],
                # rosetype="area",
                label_opts=opts.LabelOpts(is_show=True, formatter='{b}:{d}%'),
                itemstyle_opts={
    
    
                'normal': {
    
    
                    'shadowColor': 'rgba(0, 0, 0, .5)',  # 阴影颜色
                    'shadowBlur': 5,  # 阴影大小
                    'shadowOffsetY': 5,  # Y轴方向阴影偏移
                    'shadowOffsetX': 5,  # x轴方向阴影偏移
                    'opacity': '0.7',}}
            )
           .add("",
                c1.most_common(10),
                radius=["30%", "50%"],
                center=["75%", "50%"],
                # rosetype="area",
                label_opts=opts.LabelOpts(is_show=True, formatter='{b}:{d}%'),
                itemstyle_opts={
    
    
                'normal': {
    
    
                    'shadowColor': 'rgba(0, 0, 0, .5)',  
                    'shadowBlur': 5, 
                    'shadowOffsetY': 5,
                    'shadowOffsetX': 5,
                    # 'opacity': '0.7',
                }}
            )
        .set_global_opts(
                title_opts=[
                dict(text='商品属性',left='center',top='5%',textStyle=dict(color='#282828',fontSize=20)),
                dict(text='SIZE',left='23%',top='48%',textStyle=dict(color='#282828',fontSize=17)),
                dict(text='COLOR',left='72%',top='48%',textStyle=dict(color='#282828',fontSize=17))
                            ],
                tooltip_opts=opts.TooltipOpts(is_show=False),
                legend_opts=opts.LegendOpts(is_show=False),
                visualmap_opts=opts.VisualMapOpts(is_show=False,max_=300,
                    range_color=['rgb(1, 191, 236)', 'rgb(128, 255, 165)']
                          )
           ))
#     return pie.render_notebook()
    return pie
size_col_view()

在这里插入图片描述

#取词量排前100的词,排除掉出现次数最多的空格
c2=Counter(word_list).most_common(101)[1:]

def cloud_view():    
    cloud=(
            WordCloud(init_opts=opts.InitOpts(chart_id='cloud_chart'))
            .add('', c2,mask_image='amazon.jpg',width='900px', height='900px',word_size_range=[10, 50],word_gap=10,)
#     return cloud.render_notebook()
    return cloud
    #第一次运行显示空白,再运行一次就会显示出来了
cloud_view()

在这里插入图片描述

#标题
def title_view(title = '亚马逊订单可视化'):
    c = (Pie(init_opts=opts.InitOpts(chart_id='title_chart'))
        .set_global_opts(
            title_opts=opts.TitleOpts(title=title,
                  title_textstyle_opts=opts.TextStyleOpts(font_size=55,),
            pos_left='center',
            pos_top='middle'),)
        )
#     return c.render_notebook()
    return c
title_view()

生成大屏

在开发各个子图表时,每个图表的初始化配置项opts.InitOpts里都设置了chart_id。
不然保存json文件时,pyecharts会给图表生成随机的chart_id,后面json文件"cid"不同,导致无法重复引用!
在这里插入图片描述
用Page函数拖拽组合完大屏,点击页面左上角的Save Config生成chart_config.json文件

page = Page(layout=Page.DraggablePageLayout, page_title="亚马逊订单数据分析")
page.add(title_view(),hour_view(),week_view(),pro_ord_view(),pro_map_view(),pro_price_view(),gender_view(),size_col_view(),cloud_view())
page.render('亚马逊订单拖拽图.html')
a = page.save_resize_html('亚马逊订单拖拽图.html', cfg_file='chart_config.json', dest='亚马逊订单可视化.html')

在这里插入图片描述

猜你喜欢

转载自blog.csdn.net/qq_45694768/article/details/124829338