python查询POS后台获取指定时间和状态的订单存入到excel表格中

版权声明:版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/z564359805/article/details/85267444
#!/usr/bin/env python
# coding=utf-8
# 查询pos后台指定时间和状态的订单

import requests
import re
from lxml import etree
from openpyxl import Workbook
import time,os
from requests.packages import urllib3
from datetime import datetime

class POS_crawl(object):
    def __init__(self, username, password):
        # 初始化headers实例
        self.headers = {"User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:56.0) Gecko/20100101 Firefox/56.0",
                        "Referer": "https://pos.XXXX.com/login.html"
                        }
        # 核实界面
        self.post_url = "https://pos.XXXXX.com/j_spring_security_check"
        # session会话对象
        self.session = requests.session()
        self.username = username
        self.password = password
        self.data = ['序号','订单号','卖家','客户','订单金额','下单时间','订单来源','订单状态','签收时间']
        self.wb = Workbook()
        # 去掉警告
        urllib3.disable_warnings()

    # 提取表单登录信息(可以和模拟登陆合并成一个)
    def get_login_info(self):
        data = {
            "j_username": self.username,
            "j_password": self.password
        }
        return data
    # 模拟登录
    def login(self):
        data = self.get_login_info()
        headers = {
                   'User-Agent': 'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0);',
                   "Referer": self.post_url,
                   "X-Requested-With": "XMLHttpRequest",
                   }
        try:
            login_page = self.session.post(self.post_url, data=data, headers=headers,verify=False)
            # print(login_page.text)
            if "loginerror" in login_page.text:
                print("登录失败,错误的手机号码或密码!")

            if "</span>首页" in login_page.text:
                print("欢迎您'%s',成功登陆POS管理系统!"%self.username)
                self.order_list()
        except Exception as e:
            print(e)
    # 处理订单
    def order_list(self):
        # 获取当天日期
        file_name_date = time.strftime("%Y-%m-%d", time.localtime())
        # 日期格式正则规则
        patt = re.compile(r"\d{4}-\d{1,2}-\d{1,2}")
        headers = {
            "User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:56.0) Gecko/20100101 Firefox/56.0",
            "Referer": "https://pos.XXXX.com/order/tolist.html",
        }
        for state in ['100','40']:
            while True:
                print("\n" + "=" * 75)
                print("开始日期如果不输入则为2015年最早日期,假设为2015-1-1,结束日期如果不输入则为当天日期。")
                print("注意:已完成订单中,如果开始和结束日期都不输入,则把签收日期为空的也会筛选出来。")
                print("=" * 75 + "\n")
                if state == "100":
                    # self.wb.sheetnames显示sheet表名字,防止输错日期循环的时候多次创建ws
                    if '已完成订单列表'not in self.wb.sheetnames:
                        ws = self.wb.active
                        ws.title = '已完成订单列表'  # 修改第一个sheet表名为销售企业数量列表
                    start_date = input("请输入已完成签收的开始日期(格式:2018-11-30):")
                    end_date = input("请输入已完成签收的结束日期(格式:2018-11-30):")
                    ws.column_dimensions['I'].width = 19
                else:
                    if '配送中订单列表'not in self.wb.sheetnames:
                        ws = self.wb.create_sheet('配送中订单列表',index=1)  # 插入新的工作表,放在1位置
                    start_date = input("请输入配送中的下单开始日期(格式:2018-11-30):")
                    end_date = input("请输入配送中的下单结束日期(格式:2018-11-30):")
                re_start_date = patt.findall(start_date)
                re_end_date = patt.findall(end_date)
                if start_date !='':
                    if len(re_start_date)!=0:
                        # 结束日期可以不输入
                        if end_date =='':
                            break
                        else:
                            if len(re_end_date) == 0:
                                print("ERROR:结束日期格式输入有误,请重新输入!")
                                continue
                            else:
                                break
                    else:
                        if end_date != '':
                            if len(re_end_date) == 0:
                                print("ERROR:开始和结束日期格式输入有误,请重新输入!")
                                continue
                        else:
                            print("ERROR:开始日期格式输入有误,请重新输入!")
                            continue
                else:
                    # re_end_date = patt.findall(end_date)
                    if end_date != '':
                        if len(re_end_date) == 0:
                            print("ERROR:结束日期格式输入有误,请重新输入!")
                            continue
                        else:
                            break
                    else:
                        break
            # 先写入表头数据
            for head in range(1, len(self.data) + 1):
                _ = ws.cell(row=1, column=head, value=self.data[head - 1])

            # 我们将查询日期的区间写在第一行第10列
            if end_date == '':
                end_date = file_name_date
            if start_date == '':
                start_date = '2015-1-1'
            if state == "100":
                select_date = "查询签收日期:"+start_date+"至"+end_date
                ws.cell(row=1, column=10, value=select_date)
            else:
                select_date = "查询下单日期:" + start_date + "至" + end_date
                ws.cell(row=1,column=9,value=select_date)
            # 冻结首行
            ws.freeze_panes = 'A2'
            ws.column_dimensions['A'].width = 6
            ws.column_dimensions['B'].width = 10
            ws.column_dimensions['C'].width = 45
            ws.column_dimensions['D'].width = 33
            ws.column_dimensions['E'].width = 10
            ws.column_dimensions['F'].width = 19
            ws.column_dimensions['G'].width = 12
            self.process_product(start_date,end_date,state,ws,headers)
        self.wb.save('订单数量列表'+file_name_date+'.xlsx')
        address = os.path.abspath('订单数量列表'+file_name_date+'.xlsx')
        print("数据处理完毕,表格已经保存到该路径下:%s"%address)
        self.session.post('https://pos.XXXX.com/j_spring_security_logout',headers=headers,verify=False)
        print("退出后台系统....")
    # 获取数据
    def process_product(self,start_date,end_date,state,ws,headers):
        formdata = {
        # "d-5481-p": "1",
        "to.buyer": "",
        "to.endDate": end_date,
        "to.id": "",
        # 订单状态,-1代表全部
        "to.orderSource": "-1",
        "to.seller": "",
        "to.startDate": start_date,
        # 已完成是100,配送中是40
        "to.status": state,
        }
        time.sleep(1)

        # headers = {
        #     "User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:56.0) Gecko/20100101 Firefox/56.0",
        #     "Referer": "https://pos.XXXXX.com/order/tolist.html",
        # }
        get_page = self.session.post("https://pos.XXXXX.com/order/tolist.html", data=formdata, headers=headers,verify=False)
        # 利用xpath规则提取查询结果信息
        get_page_html = etree.HTML(get_page.text)
        # 获取查询结果的数据,计算有多少页,是一个列表
        page_list = get_page_html.xpath('//*[@id="signForm"]/span[1]/text()')
        # with open("557.html", "w", encoding="utf-8")as f:
        #     f.write(get_page.text)
        if len(page_list) ==0:
            print("抱歉,没有查询到相应条件的商品!")
        else:
            # 如果只有一条数据会显示One
            if page_list[0].split(" ")[0] != "One":
                total = int(page_list[0].split(" ")[0].replace(",",""))
            else:
                total = 1
            if total % 20 == 0:
                end_page = total // 20
                print('共获取到%d项订单数据,合计%d页' % (total, end_page))
            else:
                end_page = total // 20 + 1
                print('共获取到%d项订单数据,合计%d页' % (total, end_page))
            for page in range(1,end_page+1):
                product_list = []
                order_list_url = "https://pos.XXXX.com/order/tolist.html?d-5481-p="+str(page)+"&to.status=" + state + "&to.orderSource=-1&to.buyer=&to.startDate=" + start_date + "&to.id=&to.seller=&to.endDate=" + end_date
                order_list = self.session.post(order_list_url,headers=headers, verify=False)
                get_order_html = etree.HTML(order_list.text)
                # 获取序号
                number_ = get_order_html.xpath('//*[@id="to"]/tbody/tr/td[1]/text()')
                number_result = []
                [number_result.append(int(number)) for number in number_]
                # 获取订单号
                order_number_ = get_order_html.xpath('//*[@id="to"]/tbody/tr/td[2]/a/text()')
                order_number_result = []
                [order_number_result.append(int(order_number)) for order_number in order_number_]
                # 获取卖家
                order_company_ = get_order_html.xpath('//*[@id="to"]/tbody/tr/td[3]/a/text()')
                order_company_result = []
                [order_company_result.append(order_company.replace("\n", "").replace("\t", "").replace(" ", "").strip()) for
                 order_company in order_company_]
                # 获取客户
                order_client_ = get_order_html.xpath('//*[@id="to"]/tbody/tr/td[4]/text()')
                order_client_result =[]
                [order_client_result.append(order_client.replace("\n", "").replace("\t", "")) for order_client in
                 order_client_]
                # 获取订单金额
                order_money_ = get_order_html.xpath('//*[@id="to"]/tbody/tr/td[5]/text()')
                order_money_result = []
                [order_money_result.append(float(order_money)) for order_money in order_money_]
                # 获取下单时间,只有这个首尾没有空格
                order_time = get_order_html.xpath('//*[@id="to"]/tbody/tr/td[6]/text()')
                # 订单来源
                order_source_ = get_order_html.xpath('//*[@id="to"]/tbody/tr/td[10]/text()')
                order_source_result = []
                [order_source_result.append(order_source.replace("\n", "").replace("\t", "")) for order_source in
                 order_source_]
                # 订单状态
                order_state_ = get_order_html.xpath('//*[@id="to"]/tbody/tr/td[11]/text()')
                order_state_result = []
                [order_state_result.append(order_state.replace("\n", "").replace("\t", "").replace(" ","")) for order_state in
                 order_state_]
                product_list.append(number_result)
                product_list.append(order_number_result)
                product_list.append(order_company_result)
                product_list.append(order_client_result)
                product_list.append(order_money_result)
                product_list.append(order_time)
                product_list.append(order_source_result)
                product_list.append(order_state_result)
                # 存储签收时间,首尾无空格
                sign_time_result = []
                # 这样获取的是xpath对象,没加/text是因为这样也能获取到空的值
                sign_time_element = get_order_html.xpath('//*[@id="to"]/tbody/tr/td[7]')
                [sign_time_result.append(sign_time.text) for sign_time in sign_time_element]
                product_list.append(sign_time_result)
                # 已完成才有签收时间
                # if state == '100':
                #     sign_time = get_order_html.xpath('//*[@id="to"]/tbody/tr/td[7]/text()')
                #     product_list.append(sign_time)
                # else:
                #     sign_time = ("*"*(len(number)-1)).split("*")
                #     product_list.append(sign_time)
                row_start = [int(k) for k in number_][0]+1
                row_end = [int(k) for k in number_][-1]+1
                print("第%d页数据写入表格中...."%page)
                for col in range(1, len(product_list)+1):
                    for row in range(row_start, row_end+1):
                        # print("写入第%d行第%d列数据..." % (row, col))
                        _ = ws.cell(row=row, column=col, value=(product_list[col - 1][row-2-(page-1)*20]))
            print("此项数据处理完成。")
if __name__ == "__main__":
    username = input("请输入登录账号:")
    password = input("请输入登录密码:")
    az = POS_crawl(username, password)
    az.login()
    input("请按回车键退出程序。。")

猜你喜欢

转载自blog.csdn.net/z564359805/article/details/85267444