pandaboy玩pandas

基于python的三方库pandas的excel表二次开发

import numpy as np
import pandas as pd
import time
from pandas import merge
import openpyxl
import datetime
Date = input('请输入日期:')
def html_to_excel():
    with open(r'C:\Users\g\Desktop\CUX:缺件统计表_160618.xls','rb') as f:
        df_o = pd.read_html(f.read())
        print('数据表正在合并!')
        bb = pd.ExcelWriter('F:\缺件表.xlsx')
        df_o[0].to_excel(bb)
        bb.close()
'''
1.删除掉订单类型是SO的产线且是13开头的数据,再定义SO为紧急要料
2.按物料、产线对实际缺件数求和,并输出所有行>>>df_inner1
'''
# df=pd.DataFrame(pd.read_excel('YC_扬柴主机缺.xlsx'))
# print(df)
# Url = 'C:\Users\g\Desktop\CUX:缺件统计表_140618.xlsx'
# Url = input('请输入文件名:')
def change_excel():
    # df = pd.read_excel(r'C:\Users\g\Desktop\CUX:缺件统计表_150618.xlsx',sheet_name=0,header=7)
    df = pd.read_excel('F:\缺件表.xlsx', sheet_name=0, header=8)
    # df['需求日期'] = df['需求日期'].astype(str)
    df['需求日期'] = pd.to_datetime(df['需求日期'], format='%Y-%m-%d %H:%M:%S',errors='coerce')
    # print(df.head())
    # print(df.info())
    # df['产线'] = df['产线'].astype('object')
    df1 = df[(~df['产线'].isin([130000007.0,130000001.0,130000002.0,130000003.0,130000005.0,130000021.0,130000022.0,130000023.0,130000024.0]))&(df['订单类型']=='SO')]
    df2 = df[df['订单类型']!='SO']
    df = df1.append(df2)
    print('数据表合并已完成!')
    #######################逻辑处理有问题,excel导出有问题########################
    #######################以上BUG已经修复#########################################
    df['产线'] = df['产线'].replace(130000001.0, '485装配线')
    df['产线'] = df['产线'].replace(130000007.0, '随机附件')
    df['产线'] = df['产线'].replace(130000002.0, '4102装配线')
    df['产线'] = df['产线'].replace(130000003.0, 'VM装配线')
    df['产线'] = df['产线'].replace(130000005.0, '整理线')
    df['产线'] = df['产线'].replace(130000021.0, '二号厂装配线')
    df['产线'] = df['产线'].replace(130000022.0, '二号厂试车线')
    df['产线'] = df['产线'].replace(130000023.0, '二号厂成套线')
    df['产线'] = df['产线'].replace(130000024.0, '二号厂随机附件')
    df['产线'] = df['产线'].replace(130000600.0, '外协件销售出库')
    df['产线'] = df['产线'].replace(930000001.0, '制造工程部')
    df['产线'] = df['产线'].replace(930000002.0, '铸造厂')
    df['产线'] = df['产线'].replace(930000003.0, '金加工厂')
    df['产线'] = df['产线'].replace(930000004.0, '金加工厂机体线')
    df['产线'] = df['产线'].replace(930000005.0, '装配厂')
    df['产线'] = df['产线'].replace(930000006.0, '装配厂102线')
    df['产线'] = df['产线'].replace(930000007.0, '装配厂85线')
    df['产线'] = df['产线'].replace(930000008.0, '装配厂VM线')
    df['产线'] = df['产线'].replace(930000009.0, '调试厂产线')
    df['产线'] = df['产线'].replace(930000010.0, '调试厂生产')
    df['产线'] = df['产线'].replace(930000011.0, '调试厂设修')
    df['产线'] = df['产线'].replace(930000012.0, '质量部设修')
    df['产线'] = df['产线'].replace(930000013.0, '质量部市场')
    df['产线'] = df['产线'].replace(930000014.0, '质量部检验')
    df['产线'] = df['产线'].replace(930000015.0, '技术中心')
    df['产线'] = df['产线'].replace(930000016.0, '应用工程部')
    df['产线'] = df['产线'].replace(930000017.0, '产品试验中心')
    df['产线'] = df['产线'].replace(930000018.0, '平台开发')
    df['产线'] = df['产线'].replace(930000019.0, '物流部')
    df['产线'] = df['产线'].replace(930000020.0, '二号工厂机体加工线')
    df['产线'] = df['产线'].replace(930000021.0, '二号工厂缸盖加工线')
    df['产线'] = df['产线'].replace(930000022.0, '二号工厂装配线')
    df['产线'] = df['产线'].replace(930000023.0, '二号工厂试车线')
    df['产线'] = df['产线'].replace(930000024.0, '二号工厂成套线')
    df['产线'] = df['产线'].replace(930000025.0, '二号工厂机体加工线设修')
    df['产线'] = df['产线'].replace(930000026.0, '二号工厂缸盖加工线设修')
    df['产线'] = df['产线'].replace(930000027.0, '二号工厂装配线设修')
    df['产线'] = df['产线'].replace(930000028.0, '二号工厂试车线设修')
    df['产线'] = df['产线'].replace(930000029.0, '二号工厂试车线生产工具')
    df['产线'] = df['产线'].replace(930000030.0, '二号工厂成套线设修')
    df['产线'] = df['产线'].replace(930000031.0, '调试厂校机维修')
    df['产线'] = df['产线'].replace(930000032.0, '配件加工生产')
    df['产线'] = df['产线'].replace(930000033.0, '材料、零部件让售')
    df['产线'] = df['产线'].replace(930000034.0, '装配厂102线改型')
    df['产线'] = df['产线'].replace(930000035.0, '装配厂85线改型')
    df['产线'] = df['产线'].replace(930000036.0, '装配厂VM线改型')
    df['产线'] = df['产线'].replace(930000037.0, '调试厂产线改型')
    df['产线'] = df['产线'].replace(930000038.0, '二号工厂装配线改型')
    df['产线'] = df['产线'].replace(930000039.0, '二号工厂试车线改型')
    df['产线'] = df['产线'].replace(930000040.0, '二号工厂成套线改型')
    df['产线'] = df['产线'].replace(930000041.0, '装配厂102线补废')
    df['产线'] = df['产线'].replace(930000042.0, '装配厂85线补废')
    df['产线'] = df['产线'].replace(930000043.0, '装配厂VM线补废')
    df['产线'] = df['产线'].replace(930000044.0, '调试厂产线补废')
    df['产线'] = df['产线'].replace(930000045.0, '二号工厂装配线补废')
    df['产线'] = df['产线'].replace(930000046.0, '二号工厂试车线补废')
    df['产线'] = df['产线'].replace(930000047.0, '二号工厂成套线补废')
    df['产线'] = df['产线'].replace(930000048.0, '随机附件补充领用')
    df['产线'] = df['产线'].replace(np.nan, '未定义')
    df['保管员'] = df['保管员'].replace(np.nan,'未定义')
    df['订单类型'] = df['订单类型'].replace('MO','一次下架')
    ########################################################
    df['订单类型'] = df['订单类型'].replace('SO','紧急要料')
    ########################################################
    # df_inner = df.groupby('实际缺件数')['物料'].count()
    # 对city字段进行汇总,并分别计算prince的合计和均值
    # df_inner.groupby('city')['price'].agg([len,np.sum, np.mean])
    # df[df['creativeID']<=10000]
    ###########################################################
    # print(df.head())
    # df.to_excel('F:\excel_to_python.xlsx', sheet_name='gg')
    ##############导出操作###################################
    '''
    以上已经完成保管员的数据表
    下一步需要处理
    1.获取物料、产线、订单类型维护汇总的缺件总数(完成)
    2.合并表的内容(完成)
    3.获取指定日期的缺件数()
    '''
    # df_inner1.to_excel('F:\kk.xlsx', sheet_name='gg')
    # df['产线'] = df['产线'].replace('485装配线','485装配线')
    # df['产线'] = df['产线'].replace('随机附件','随机附件')
    # df['产线'] = df['产线'].replace('4102装配线','4102装配线')
    # df['产线'] = df['产线'].replace('VM装配线','VM装配线')
    # df['产线'] = df['产线'].replace('整理线','整理线')
    # df['产线'] = df['产线'].replace('二号厂装配线','二号厂装配线')
    # df['产线'] = df['产线'].replace('二号厂试车线','二号厂试车线')
    # df['产线'] = df['产线'].replace('二号厂成套线','二号厂成套线')
    # df['产线'] = df['产线'].replace('二号厂随机附件','二号厂随机附件')
    # df['产线'] = df['产线'].replace('外协件销售出库','外协件销售出库')
    # df['产线'] = df['产线'].replace('制造工程部','制造工程部')
    # df['产线'] = df['产线'].replace('铸造厂','铸造厂')
    # df['产线'] = df['产线'].replace('金加工厂','金加工厂')
    # df['产线'] = df['产线'].replace('金加工厂机体线','金加工厂')
    # df['产线'] = df['产线'].replace('装配厂','4102装配线')
    # df['产线'] = df['产线'].replace('装配厂102线','4102装配线')
    # df['产线'] = df['产线'].replace('装配厂85线','485装配线')
    # df['产线'] = df['产线'].replace('装配厂VM线','VM装配线')
    # df['产线'] = df['产线'].replace('调试厂产线','整理线')
    # df['产线'] = df['产线'].replace('调试厂生产','整理线')
    # df['产线'] = df['产线'].replace('调试厂设修','整理线')
    # df['产线'] = df['产线'].replace('质量部设修','质量部')
    # df['产线'] = df['产线'].replace('质量部市场','质量部')
    # df['产线'] = df['产线'].replace('质量部检验','质量部')
    # df['产线'] = df['产线'].replace('技术中心','技术中心')
    # df['产线'] = df['产线'].replace('应用工程部','技术中心')
    # df['产线'] = df['产线'].replace('产品试验中心','技术中心')
    # df['产线'] = df['产线'].replace('平台开发','技术中心')
    # df['产线'] = df['产线'].replace('物流部','物流部')
    # df['产线'] = df['产线'].replace('二号工厂机体加工线','二号工厂机体')
    # df['产线'] = df['产线'].replace('二号工厂缸盖加工线','二号工厂缸盖')
    # df['产线'] = df['产线'].replace('二号工厂装配线','二号厂装配线')
    # df['产线'] = df['产线'].replace('二号工厂试车线','二号厂试车线')
    # df['产线'] = df['产线'].replace('二号工厂成套线','二号工厂成套线')
    # df['产线'] = df['产线'].replace('二号工厂机体加工线设修','二号工厂机体')
    # df['产线'] = df['产线'].replace('二号工厂缸盖加工线设修','二号工厂缸盖')
    # df['产线'] = df['产线'].replace('二号工厂装配线设修','二号厂装配线')
    # df['产线'] = df['产线'].replace('二号工厂试车线设修','二号厂试车线')
    # df['产线'] = df['产线'].replace('二号工厂试车线生产工具','二号厂试车线')
    # df['产线'] = df['产线'].replace('二号工厂成套线设修','二号工厂成套线')
    # df['产线'] = df['产线'].replace('调试厂校机维修','校机维修')
    # df['产线'] = df['产线'].replace('配件加工生产','配件加工')
    # df['产线'] = df['产线'].replace('材料、零部件让售','材料、零部件让售')
    # df['产线'] = df['产线'].replace('装配厂102线改型','4102装配线')
    # df['产线'] = df['产线'].replace('装配厂85线改型','485装配线')
    # df['产线'] = df['产线'].replace('装配厂VM线改型','VM装配线')
    # df['产线'] = df['产线'].replace('调试厂产线改型','整理线')
    # df['产线'] = df['产线'].replace('二号工厂装配线改型','二号厂装配线')
    # df['产线'] = df['产线'].replace('二号工厂试车线改型','二号厂试车线')
    # df['产线'] = df['产线'].replace('二号工厂成套线改型','二号工厂成套线')
    # df['产线'] = df['产线'].replace('装配厂102线补废','4102装配线')
    # df['产线'] = df['产线'].replace('装配厂85线补废','485装配线')
    # df['产线'] = df['产线'].replace('装配厂VM线补废','VM装配线')
    # df['产线'] = df['产线'].replace('调试厂产线补废','整理线')
    # df['产线'] = df['产线'].replace('二号工厂装配线补废','二号厂装配线')
    # df['产线'] = df['产线'].replace('二号工厂试车线补废','二号厂试车线')
    # df['产线'] = df['产线'].replace('二号工厂成套线补废','二号工厂成套线')
    # df['产线'] = df['产线'].replace('随机附件补充领用','随机附件')
    df_inner1 = df.groupby(['产线','订单类型','物料','物料说明','保管员'])['实际缺件数'].agg([np.sum]).reset_index()
    df_inner1.rename(columns={'sum':'缺件总数','订单类型':'业务类型'}, inplace=True)
    # 物料         产线      订单类型       缺件总数  ----物料说明 ----保管员
    # 01119248   随机附件    一次下架        190
    # 1000045386 二号厂装配线  一次下架        1
    # 1000047928 485装配线  一次下架           8
    # 1000050574 调试厂产线改型 紧急要料       6
    # 1000069366 485装配线  一次下架          68
    # 1.获取物料、产线、订单类型维护汇总的缺件总数完成
    # df_inner1.to_excel('F:\k1k.xlsx', sheet_name='gg',index=False)
    #############################################################################
    print('数据表逻辑处理已完成!')
    Date_1 = datetime.datetime.strptime(Date, '%Y-%m-%d')
    df4 = df[df['需求日期']==Date_1]
    df_inner2 = df4.groupby(['产线','订单类型','物料','物料说明','保管员'])['实际缺件数'].agg([np.sum]).reset_index()
    df_inner2.rename(columns={'sum':'当日缺件数','订单类型':'业务类型'}, inplace=True)
    # print(df_inner2.head())
    result = pd.merge(df_inner1, df_inner2, how='left', on=['物料'])
    result = result.drop(columns = ['产线_y','业务类型_y','物料说明_y','保管员_y'])
    result.rename(columns={'产线_x':'产线','业务类型_x':'业务类型','物料说明_x':'物料说明','保管员_x':'保管员'}, inplace=True)
    result = result.groupby(['物料','物料说明','保管员','当日缺件数'])['缺件总数'].agg([np.sum]).reset_index()
    result.rename(columns={'sum':'缺件总数'}, inplace=True)
    # result = result.sort(columns='保管员', ascending=False)
    print(result.head())
    print('所有任务均已完成!')
    '''
    1.增加导出表的日期功能(完成)
    2.在缺件表上显示日期
    3.按保管员自动打印
    [加分项]:按产线维度汇总
    [超级加分项]:检测存在缺件时才汇总(涉及到递归算法,难度大,要重写算法)
    '''
    result.to_excel('F:\缺件统计表{!s}.xlsx'.format(Date), sheet_name='缺件总表',index=False)
    return 'The Job is Finished'
if __name__ == '__main__':
    html_to_excel()
    time.sleep(5)
    change_excel()
emmm

猜你喜欢

转载自www.cnblogs.com/pandaboy1123/p/9191976.html
今日推荐