使用动态代理爬取某房产平台信息并写入Excel(python)

import requests
from lxml import html
import random
import xlwt
import time
import hashlib
from datetime import datetime

ugList = []

orderno = "DT20210228205219E8iMOzLE"
secret = "XXXXXXXXXXX"
ip = "dynamic.xiongmaodaili.cn"
# 按量订单端口
port = "8088"
ip_port = ip + ":" + port
timestamp = str(int(time.time()))
#第二种写法:timestamp = str(int(datetime.timestamp(datetime.now())))

txt = "orderno=" + orderno + "," + "secret=" + secret + "," + "timestamp=" + timestamp
txt = txt.encode()
md5_string = hashlib.md5(txt).hexdigest()
sign = md5_string.upper()
#print(sign)
auth = "sign=" + sign + "&" + "orderno=" + orderno + "&" + "timestamp=" + timestamp + "&change=true"
proxy = {
    
    "https": "https://" + ip_port}
#print(proxy)
headers = {
    
    "User-Agent": random.choice(ugList),
           "Proxy-Authorization": "sign=BDB087FE4EZXXXXXXB814EACD4CB80&orderno=DT20210228205219E8iMOzLE&timestamp=1615711733&change=true"}

i = 0
work_book = xlwt.Workbook(encoding="utf-8")
sheet = work_book.add_sheet("巴州二手房信息")
sheet.write(0, 3, "小区名称")
sheet.write(0, 4, "区域1")
sheet.write(0, 5, "区域2")
sheet.write(0, 6, "地址")

sheet.write(0, 7, "总价(万元)")
sheet.write(0, 8, "单价(元/㎡)")
sheet.write(0, 2, "房子大小(㎡)")
sheet.write(0, 1, "房型")
sheet.write(0, 0, "标题")
row_num = 1
for i in range(0,50):

    url = "https://bygl.58.com/ershoufang/p" + str(i + 1) + "/"
    requests.DEFAULT_RETRIES = 5
    s = requests.session()
    s.keep_alive = False
    i += 1

    r = s.get(url, headers=headers, proxies=proxy, verify=False, timeout=20)
    r.encoding = 'utf-8'
    preview_html = html.fromstring(r.text)
    list_title = preview_html.xpath("//div[@class='property-content-title']/h3/text()|//p["
                                    "@class='property-content-info-comm-name']/text()|//p[ "
                                    "@class='property-content-info-comm-address']//span/text()|//span[ "
                                    "@class='property-price-total-num']/text()|//p["
                                    "@class='property-price-average']/text()|//p["
                                    "@class='property-content-info-text'][1]/text()|//p["
                                    "@class='property-content-info-text property-content-info-attribute']//span//text()")
    list_title = [str(x) for x in list_title]

    #time.sleep(random.random() * 2)

    print("-------------------------第" + str(i) + "页-------------------------------")
    print(list_title)

    for j in range(len(list_title)):

        if j % 14 == 0:
            title = list_title[j + 8]
            area1 = list_title[j + 9]
            biaoti = list_title[j]
            area2 = list_title[j + 10]
            area3 = list_title[j + 11]

            totalnum = list_title[j + 12]
            avg = list_title[j + 13]
            size = list_title[j + 7].strip().strip('\n')
            house_type = list_title[j + 1] + list_title[j + 2] + list_title[j + 3] + list_title[j + 4] + list_title[
                j + 5] + list_title[j + 6]
            # print(type(list_title[j + 6]))

            sheet.write(row_num, 3, title)
            sheet.write(row_num, 4, area1)
            sheet.write(row_num, 5, area2)
            sheet.write(row_num, 6, area3)

            sheet.write(row_num, 7, totalnum)
            sheet.write(row_num, 8, avg)
            sheet.write(row_num, 2, size)
            sheet.write(row_num, 1, house_type)
            sheet.write(row_num, 0, biaoti)
            row_num += 1
    time.sleep(1)
file_name = r"F:\巴州二手房爬取.xls"
work_book.save(file_name)

猜你喜欢

转载自blog.csdn.net/weixin_51424938/article/details/114808012