import requests
from lxml import html
import random
import xlwt
import time
import hashlib
from datetime import datetime
ugList = []
orderno = "DT20210228205219E8iMOzLE"
secret = "XXXXXXXXXXX"
ip = "dynamic.xiongmaodaili.cn"
port = "8088"
ip_port = ip + ":" + port
timestamp = str(int(time.time()))
txt = "orderno=" + orderno + "," + "secret=" + secret + "," + "timestamp=" + timestamp
txt = txt.encode()
md5_string = hashlib.md5(txt).hexdigest()
sign = md5_string.upper()
auth = "sign=" + sign + "&" + "orderno=" + orderno + "&" + "timestamp=" + timestamp + "&change=true"
proxy = {
"https": "https://" + ip_port}
headers = {
"User-Agent": random.choice(ugList),
"Proxy-Authorization": "sign=BDB087FE4EZXXXXXXB814EACD4CB80&orderno=DT20210228205219E8iMOzLE×tamp=1615711733&change=true"}
i = 0
work_book = xlwt.Workbook(encoding="utf-8")
sheet = work_book.add_sheet("巴州二手房信息")
sheet.write(0, 3, "小区名称")
sheet.write(0, 4, "区域1")
sheet.write(0, 5, "区域2")
sheet.write(0, 6, "地址")
sheet.write(0, 7, "总价(万元)")
sheet.write(0, 8, "单价(元/㎡)")
sheet.write(0, 2, "房子大小(㎡)")
sheet.write(0, 1, "房型")
sheet.write(0, 0, "标题")
row_num = 1
for i in range(0,50):
url = "https://bygl.58.com/ershoufang/p" + str(i + 1) + "/"
requests.DEFAULT_RETRIES = 5
s = requests.session()
s.keep_alive = False
i += 1
r = s.get(url, headers=headers, proxies=proxy, verify=False, timeout=20)
r.encoding = 'utf-8'
preview_html = html.fromstring(r.text)
list_title = preview_html.xpath("//div[@class='property-content-title']/h3/text()|//p["
"@class='property-content-info-comm-name']/text()|//p[ "
"@class='property-content-info-comm-address']//span/text()|//span[ "
"@class='property-price-total-num']/text()|//p["
"@class='property-price-average']/text()|//p["
"@class='property-content-info-text'][1]/text()|//p["
"@class='property-content-info-text property-content-info-attribute']//span//text()")
list_title = [str(x) for x in list_title]
print("-------------------------第" + str(i) + "页-------------------------------")
print(list_title)
for j in range(len(list_title)):
if j % 14 == 0:
title = list_title[j + 8]
area1 = list_title[j + 9]
biaoti = list_title[j]
area2 = list_title[j + 10]
area3 = list_title[j + 11]
totalnum = list_title[j + 12]
avg = list_title[j + 13]
size = list_title[j + 7].strip().strip('\n')
house_type = list_title[j + 1] + list_title[j + 2] + list_title[j + 3] + list_title[j + 4] + list_title[
j + 5] + list_title[j + 6]
sheet.write(row_num, 3, title)
sheet.write(row_num, 4, area1)
sheet.write(row_num, 5, area2)
sheet.write(row_num, 6, area3)
sheet.write(row_num, 7, totalnum)
sheet.write(row_num, 8, avg)
sheet.write(row_num, 2, size)
sheet.write(row_num, 1, house_type)
sheet.write(row_num, 0, biaoti)
row_num += 1
time.sleep(1)
file_name = r"F:\巴州二手房爬取.xls"
work_book.save(file_name)