selenium+PhantomJS爬取瓜子二手车


1
# -*- coding=utf-8 -*- 2 import time 3 from selenium import webdriver 4 from selenium.webdriver import * 5 6 7 # 设置请求头 8 dcap = dict(DesiredCapabilities.PHANTOMJS) 9 dcap['phantomjs.page.settings.userAgent'] = ( 10 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36' 11 ) 12 url = 'https://www.guazi.com/www/buy/i7/' 13 14 driver = webdriver.PhantomJS() 15 driver.get(url) 16 while True: 17 for i in range(1, 41): 18 title_xpath = "/html/body/div[4]/ul/li[" + str(i) + "]/a/h2" 19 info_xpath = "/html/body/div[4]/ul/li[" + str(i) + "]/a/div[@class='t-i']" 20 price_xpath = "/html/body/div[4]/ul/li[" + str(i) + "]/a/div[2]/p" 21 title = driver.find_element_by_xpath(title_xpath).text 22 info = driver.find_element_by_xpath(info_xpath).text 23 price = driver.find_element_by_xpath(price_xpath).text 24 print "正在保存数据 ------" + title 25 with open("C:\Users\Administrator\Desktop\guazi.csv", 'a') as f: 26 f.write('{},{},{}\n'.format(title, info, price).encode('gbk')) 27 try: 28 driver.find_element_by_class_name("next").click() 29 time.sleep(1.5) 30 except: 31 break 32 driver.quit()

利用requests爬的时候出现response <203>,于是用selenium+PhantomJS。

猜你喜欢

转载自www.cnblogs.com/zhujunzoe/p/9103152.html
今日推荐