爬虫-------lagou1.py

import re
import requests
all_cookie_dict = {}

# 第一步:访问登录页面 ####################################

r1 = requests.get(
    url='https://passport.lagou.com/login/login.html',
    headers={
        'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36'
    }
)

token =  re.findall("X_Anti_Forge_Token = '(.*)';",r1.text)[0]
code =  re.findall("X_Anti_Forge_Code = '(.*)';",r1.text)[0]
r1_cookie_dict = r1.cookies.get_dict()
all_cookie_dict.update(r1_cookie_dict)

# 第二步:去登陆 #####################################

r2 = requests.post(
    url='https://passport.lagou.com/login/login.json',
    data={
        'isValidate':'true',
        'username':'15131255089',
        'password':'647541545465',
        'request_form_verifyCode':'',
        'submit':''
    },
    headers={
        'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36',
        'X-Requested-With':'XMLHttpRequest',
        'Content-Type':'application/x-www-form-urlencoded; charset=UTF-8',
        'Host':'passport.lagou.com',
        'Origin':'https://passport.lagou.com',
        'Referer':'https://passport.lagou.com/login/login.html',
        'X-Anit-Forge-Code':code,
        'X-Anit-Forge-Token':token
    },
    cookies=all_cookie_dict

)
r2_response_json = r2.json()
r2_cookie_dict = r2.cookies.get_dict()
all_cookie_dict.update(r2_cookie_dict)

# 第三步:grant #####################################

r3 = requests.get(
    url='https://passport.lagou.com/grantServiceTicket/grant.html',
    headers={
        'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36',
        'Referer':'https://passport.lagou.com/login/login.html',
        'Host':'passport.lagou.com',
    },
    cookies=all_cookie_dict,
    allow_redirects=False

)
r3_cookie_dict = r3.cookies.get_dict()
all_cookie_dict.update(r3_cookie_dict)

# 第四步:action #####################################

r4 = requests.get(
    url=r3.headers['Location'],
    headers={
        'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36',
        'Referer':'https://passport.lagou.com/login/login.html',
        'Host':'www.lagou.com',
        'Upgrade-Insecure-Requests':'1',
    },
    cookies=all_cookie_dict,
    allow_redirects=False

)
r4_cookie_dict = r4.cookies.get_dict()
all_cookie_dict.update(r4_cookie_dict)

# 第五步:获取认证信息 #####################################

r5 = requests.get(
    url=r4.headers['Location'],
    headers={
        'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36',
        'Referer':'https://passport.lagou.com/login/login.html',
        'Host':'www.lagou.com',
        'Upgrade-Insecure-Requests':'1',
    },
    cookies=all_cookie_dict,
    allow_redirects=False

)
r5_cookie_dict = r5.cookies.get_dict()
all_cookie_dict.update(r5_cookie_dict)

print(r5.headers['Location'])

# 第六步:我的邀请 #####################################

r = requests.get(
    url='https://www.lagou.com/mycenter/invitation.html',
    headers={
        'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36',
        'Host':'www.lagou.com',
        'Upgrade-Insecure-Requests':'1',
        'Pragma':'no-cache',
    },
    cookies=all_cookie_dict
)
print('wupeiqi' in r.text)

lagou1.py

猜你喜欢

转载自blog.csdn.net/qq_43475097/article/details/83929781
py1