python爬虫 - cookie

什么是cookie
  • Cookie 是一些数据, 存储于你电脑上的文本文件中。当 web 服务器向浏览器发送 web 页面时,在连接关闭后,服务端不会记录用户的信息。

  • Cookie 的作用就是用于解决 “如何记录客户端的用户信息”:

    • 当用户访问 web 页面时,他的名字可以记录在 cookie 中。
    • 在用户下一次访问该页面时,可以在 cookie 中读取用户访问记录。

直接使用已知的cookie

import urllib.request
import urllib.parse


url = 'http://www.renren.com/969564068/profile'
header = {
    'Host': 'www.renren.com',
    'Connection' : 'keep-alive',
    'Accept': 'text/plain, */*; q=0.01',
    'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
    'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
    'Referer': 'http://www.renren.com/969564068/profile',
    'Accept-Encoding': 'utf-8',
    'Accept-Language': 'zh-CN,zh;q=0.9',
    'Cookie': 'anonymid=jrebo4pi-z4loz2; depovince=GW; _r01_=1; JSESSIONID=abcz3XfZsh5OH9fcyrnIw; ick_login=4e011e8f-ceb9-4958-885d-306801794a96; ick=e2033348-600d-4841-8068-ef6a6b41bd6b; t=72f64946ea2089f3e0d4bf16e12439d78; societyguester=72f64946ea2089f3e0d4bf16e12439d78; id=969564068; xnsid=72b9146c; ver=7.0; loginfrom=null; jebe_key=23f6f8a7-f2d7-4ff8-8abf-7c81444668dd%7C1d2883452e01fabd750041b5e1b0b8e6%7C1548558485107%7C1%7C1548558485227; wp_fold=0; jebecookies=28d18871-f364-48cd-8cee-db57cf9d09fc|||||',
}

request = urllib.request.Request(url=url, headers=header)
response = urllib.request.urlopen(request)

with open('renren1.html', 'wb') as fp:
    fp.write(response.read())
    

模拟登入后再携带得到的cookie访问

import urllib.request
import urllib.parse
import http.cookiejar

#ck对象可以保存cookie
ck = http.cookiejar.CookieJar()
#根据ck对象创建一个handler
handler = urllib.request.HTTPCookieProcessor(ck)
#根据handler创建一个opener
opener = urllib.request.build_opener(handler)


url = 'http://www.renren.com/ajaxLogin/login?1=1&uniqueTimestamp=2019001141162'
#表单数据
data = {'email':'17320015926',
    'icode':'',
    'origURL':	'http://www.renren.com/home',
    'domain': 'renren.com',
    'key_id': '1',
    'captcha_type':	'web_login',
    'password':	'4b7aaaf85d048df2c63d22642e934cfad7cfce4ac1924070d2181bc11d2ef1b0',
    'rkey':	'd34b787c1d15cd449f9e1f996ec13682',
    'f':' http%3A%2F%2Fwww.renren.com%2F969564068',	}

data = urllib.parse.urlencode(data).encode('utf8')
header = {'User-Agent': ' Mozilla/5.0 (Windows NT 6.1; Win64;'
                            ' x64) AppleWebKit/537.36 (KHTML, like'
                            ' Gecko) Chrome/71.0.3578.98 Safari/537.36',}
request = urllib.request.Request(url=url, headers=header)
response = opener.open(request,data=data)

print(response.read().decode('utf8'))

url = 'http://www.renren.com/969564068/profile'

#header = {'User-Agent': ' Mozilla/5.0 (Windows NT 6.1; Win64;'
#                            ' x64) AppleWebKit/537.36 (KHTML, like'
#                            ' Gecko) Chrome/71.0.3578.98 Safari/537.36',}

request = urllib.request.Request(url=url, headers=header)
response = opener.open(request)
with open('renren2.html', 'wb') as fp:
    fp.write(response.read())
    
发布了51 篇原创文章 · 获赞 29 · 访问量 2383

猜你喜欢

转载自blog.csdn.net/fangweijiex/article/details/103748113