Requests-html 设置 headers

要求安装Requests-html,Python版本高于或等于3.6。

 1 # -*- coding -*-
 2 
 3 from requests_html import HTMLSession
 4 
 5 
 6 def get_web_page_elements(url, headers={}, xpath_expression=''):
 7     '''通过 xpath expression 获取 网页元素'''
 8     session = HTMLSession()
 9     response = session.get(url, headers=headers)
10     elements_list = response.html.xpath(xpath_expression)
11     return elements_list
12 
13 
14 if __name__ == '__main__':
15     url = 'https://www.liaoxuefeng.com/wiki/0014316089557264a6b348958f449949df42a6d3a2e542c000'
16     # headers 设置
17     referer = url
18     cookie = 'Cookie: atsp=1548864427226_1548863599220; Hm_lvt_2efddd14a5f2b304677462d06fb4f964=1548863599; Hm_lpvt_2efddd14a5f2b304677462d06fb4f964=1548863599'
19     user_agent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.81 Safari/537.36'
20     headers = {
21         'Referer': referer,
22         'Cookie': cookie,
23         'User-Agent': user_agent
24         }
25     # 获取 目录
26     index_xpath_expression = "//a[@class='x-wiki-index-item']"
27     index_data = get_web_page_elements(url, headers=headers, xpath_expression=index_xpath_expression)
28     for each_index in index_data:
29         print(each_index.text + '\t\t' + each_index.url)

猜你喜欢

转载自www.cnblogs.com/mcgill0217/p/10340310.html