import requests from lxml import etree import mysqlhelper base_url = 'https://bj.5i5j.com/zufang/huilongguan/n%i/' headers ={ 'Cookie':'PHPSESSID=9tjfg532s3u4o5ahq0juh0khch; yfx_c_g_u_id_10000001=_ck18081922291814885155118735681; yfx_f_l_v_t_10000001=f_t_1534688958476__r_t_1534688958476__v_t_1534688958476__r_c_0; _Jo0OQK=5BC06AAD2FA05488D0D101FDB4BE44CFEF7F9AA1275AF4370BED5BFC491C78050FCCC3C2840F6FA147D05166072734382F85D003822B7A956B5919129AEBEB4366DC57212F12283777C840763663251ADEB840763663251ADEB8F2DF4BF0D800FE52350674422DE2517GJ1Z1bQ==; domain=bj; _ga=GA1.2.606362966.1534688957; _gid=GA1.2.341510299.1534688957; Hm_lvt_94ed3d23572054a86ed341d64b267ec6=1534688967; Hm_lpvt_94ed3d23572054a86ed341d64b267ec6=1534689234', 'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.62 Safari/537.36' } # myhelper = mysqlhelper.MysqlHelper() # sql = 'INSERT INTO lianjiaxinxi (title, region, zone, meters, location, price) VALUES' \ # ' (%s, %s, %s, %s, %s, %s)' for i in range(1,2): url = base_url % i # print(url) response=requests.get(url,headers=headers) html_ele = etree.HTML(response.text) li_list = html_ele.xpath('//div[@class="list-con-box"]/ul[@class="pList"]/li/div[@class="listCon"]') # print(li_list) for li_ele in li_list: # print(li_ele) title = li_ele.xpath('./h3[@class="listTit"]/a')[0].text print(title) region = li_ele.xpath('./div[@class="listX"]/p[1]/text()')[0] print(region) # zone = li_ele.xpath('./div[@class="listX"]/p[2]/text()')[0] # print(zone) price = li_ele.xpath('./div[@class="listX"]/div[@class="jia"]/p[@class="redC"]/strong')[0].text print(price) # import re # res_match = re.match('\d+', meters) # meters = res_match.group(0) # data = (title, region, zone, meters, location, price) # print(data) # myhelper.fangfa(sql, data)
5i5j获取房租信息
猜你喜欢
转载自blog.csdn.net/weixin_42958164/article/details/81843817
今日推荐
周排行