python + selenium爬取淘宝

from selenium import webdriver
from lxml import etree
import time

driver = webdriver.Chrome(r"C:\Program Files (x86)\Google\Chrome\Application\chromedriver.exe")
driver.maximize_window()


def get_url(url):
    driver.get(url)
    driver.implicitly_wait(10)
    get_info()

def get_info():
    '''解析页面,查找元素'''
    '''通过观察页面,发现第一个元素的规则比较特殊,剩下的可通过一套规则找到'''
    selector = etree.HTML(driver.page_source)
    infos1 = selector.xpath('//*[@class="item J_MouserOnverReq item-ad  "]')
    infos2 = selector.xpath('//*[@class="item J_MouserOnverReq  "]')
    infos = infos1 + infos2

    for info in infos:
        moneys = info.xpath('//*[@class="price g_price g_price-highlight"]/strong/text()')
        # names = info.xpath('//*[@class="row row-2 title"]/a/span/text()')[0]
        numbers = info.xpath('//*[@class="deal-cnt"]/text()')
        dian_names = info.xpath('//*[@class="shopname J_MouseEneterLeave J_ShopInfo"]/span[2]/text()')
    for money, number, dian_name in zip(moneys, numbers, dian_names):
        print(money,number,dian_name)
    time.sleep(3)
    next_url()


def next_url():
    '''点击下一页'''
    driver.find_element_by_link_text('下一页').click()
    get_info()


if __name__ == '__main__':
    url = 'https://www.taobao.com/'
    driver.get(url)
    driver.implicitly_wait(10)
    driver.find_element_by_name('q').send_keys('python')
    driver.find_element_by_class_name('search-button').click()  #点击搜索
    get_url(driver.current_url) #传递当前页面url
    driver.quit()

猜你喜欢

转载自blog.csdn.net/qq_18525247/article/details/80384824