# Author : GQ
# Datetime : 2020/2/20 20:23
# Product : PyCharm
# Project : python
# File : selenium爬取拉钩网.py
import requests
from fake_useragent import UserAgent
from lxml import etree
ua = UserAgent()
headers = {'User-Agent': ua.random,
'Referer': 'https://www.lagou.com/jobs/list_python?labelWords=&fromSearch=true&suginput=',
'Origin': 'https://www.lagou.com',
'Connection': "keep-alive",
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
}
def request_url_list(url):
data = {
'first': True,
'pn': 1,
'kd': 'python'
}
response = requests.post(url=url, headers=headers, data=data)
print(response.json())
def main():
url = 'https://www.lagou.com/jobs/positionAjax.json?needAddtionalResult=false'
request_url_list(url)
if __name__ == '__main__':
main()
上边这段代码并没有让我获取到json数据,Cookie信息我也加了,但是一直返回给我的信息就是操作频繁
如果有知道的小伙伴,欢迎私信我
另外多页数据的爬取只需要把data字典中的pn的value进行更换,然后把first的value修改为False即可,1即代表爬取第一页