Python学习笔记--Python 爬虫入门 -17-3 cookie+session (人人网的爱恨情仇)

- cookie & session
    - 由于http协议的无记忆性,人们为了弥补这个缺憾,所采用的一个补充协议
    - cookie是发放给用户(即http浏览器)的一段信息,session是保存在服务器上的对应的另一半信息,用来记录用户信息
    
- cookie和session的区别
    - 存放位置不同
    - cookie不安全
    - session会保存在服务器上一定时间,会过期
    - 单个cookie保存数据不超过4k, 很多浏览器限制一个站点最多保存20个
- session的存放位置
    - 存在服务器端
    - 一般情况,session是放在内存中或者数据库中
    - 没有cookie登录 案例v011,可以看到,没使用cookie则反馈网页为未登录状态
    (注意:如果html 文件乱码,open 方法加上encoding="utf-8")

from  urllib import  request,parse
import  chardet

if __name__ == '__main__':

    url="http://www.renren.com/966226407/profile"
    req = request.urlopen(url)
    html = req.read()
    cs = chardet.detect(html)
    res = html.decode(cs.get("encoding","utf-8"))
    print(res)
    with open("webrenren.html",'w',encoding="utf-8") as f:
        f.write(res)


- 使用cookie登录
    - 1.直接把cookie复制下来,然后手动放入请求头, 案例 v12

from  urllib import  request

if __name__ == '__main__':
    url = 'http://www.renren.com/966226407/profile'
    headers = {
        'Cookie':'JSESSIONID=xxx; wp_fold=0; anonymid=xxx; depovince=GW; jebecookies=xxx|||||; _r01_=1; ick_login=xxx; t=xxx; societyguester=xxx; id=xxx; xnsid=xxx'
    }

    req = request.Request(url=url,headers=headers)
    res = request.urlopen(req)

    html = res.read()
    html = html.decode()
    with open("welcome.html","w",encoding="UTF-8") as  f:
        f.write(html)


    - 2.http模块包含一些关于cookie的模块,通过他们我们可以自动使用cookie
        - CookieJar
            - 管理存储cookie,向传出的http请求添加cookie,
            - cookie存储在内存中,CookieJar实例回收后cookie将消失
        - FileCookieJar(filename, delayload=None, policy=None):
            - 使用文件管理cookie
            - filename是保存cookie的文件
        - MozillaCookieJar(filename, delayload=None, policy=None):
            - 创建与mocilla浏览器cookie.txt兼容的FileCookieJar实例
        - LwpCookieJar(filename, delayload=None, policy=None):
            - 创建与libwww-perl标准兼容的Set-Cookie3格式的FileCookieJar实例
        - 他们的关系是: CookieJar-->FileCookieJar-->MozillaCookieJar & LwpCookieJar
    - 利用cookiejar访问人人,
        - 自动使用cookie登录,大致流程是
        - 打开登录页面后自动通过用户名密码登录
        - 自动提取反馈回来的cookie
        - 利用提取的cookie登录隐私页面

        - 案例v13

from urllib import  request,parse
from http import  cookiejar

# 创建cookie 的实例
cookie = cookiejar.CookieJar()
# 生成cookie 管理器
cookie_handler = request.HTTPCookieProcessor(cookie)
# 创建http 请求管理器
http_handler = request.HTTPHandler()
# 创建https 请求管理器
https_handler = request.HTTPSHandler()
# 创建请求管理器
opener = request.build_opener(http_handler,https_handler,cookie_handler)
print("opener..type...info",type(opener)) #opener..type...info <class 'urllib.request.OpenerDirector'>
print("opener...........info",opener) #opener...........info <urllib.request.OpenerDirector object at 0x000001A165BD9208>

def login():
    """
    用于初次登录,
    需要输入用户名密码获取cookie 凭证
    :return:
    """
    url = "http://www.renren.com/PLogin.do"

    data = {
        "email":"136****1021", # O(∩_∩)O哈哈~
        "password":"********"  # ^_^
    }
    data = parse.urlencode(data)
    data= data.encode()
    req = request.Request(url=url,data=data)
    rsp = opener.open(req)
    print("login...type->rsp",type(rsp)) #<class 'http.client.HTTPResponse'>
    print("login.........->rsp",rsp)
    html = rsp.read().decode()
    # rsp = request.urlopen(req) #opener.open(url, data, timeout) 来自源码
    with open("login.html","w",encoding="utf-8") as f:
        f.write(html)
def getHomePage():
    url ="http://www.renren.com/966226407/profile"
    rsp = opener.open(url)

    html = rsp.read().decode()
    with open("logintohomepage.html","w",encoding="utf-8") as f:
        f.write(html)

if __name__ == '__main__':
    login()
    getHomePage()


    - handler是Handler的实例,常用参看案例代码
        - 用来处理复杂请求
        
                # 生成 cookie的管理器
                cookie_handler = request.HTTPCookieProcessor(cookie)
                # 创建http请求管理器
                http_handler = request.HTTPHandler()
                # 生成https管理器
                https_handler = request.HTTPSHandler()
             
    - 创立handler后,使用opener打开,打开后相应的业务由相应的hanlder处理
    - cookie作为一个变量,打印出来, 案例 v14

#打印cookie
from urllib import  request,parse
from http import  cookiejar

# 创建cookie 的实例
cookie = cookiejar.CookieJar()
# 生成cookie 管理器
cookie_handler = request.HTTPCookieProcessor(cookie)
# 创建http 请求管理器
http_handler = request.HTTPHandler()
# 创建https 请求管理器
https_handler = request.HTTPSHandler()
# 创建请求管理器
opener = request.build_opener(http_handler,https_handler,cookie_handler)
print("opener..type...info",type(opener)) #opener..type...info <class 'urllib.request.OpenerDirector'>
print("opener...........info",opener) #opener...........info <urllib.request.OpenerDirector object at 0x000001A165BD9208>

def login():
    """
    用于初次登录,
    需要输入用户名密码获取cookie 凭证
    :return:
    """
    url = "http://www.renren.com/PLogin.do"

    data = {
        "email":"136****1021",  #^_^
        "password":"********"   # O(∩_∩)O哈哈~
    }
    data = parse.urlencode(data)
    data= data.encode()
    req = request.Request(url=url,data=data)
    rsp = opener.open(req)
    print("login...type->rsp",type(rsp)) #<class 'http.client.HTTPResponse'>
    print("login.........->rsp",rsp)
    html = rsp.read().decode()
    # rsp = request.urlopen(req) #opener.open(url, data, timeout) 来自源码
    # with open("login.html","w",encoding="utf-8") as f:
    #     f.write(html)

if __name__ == '__main__':
    login()
    print(type(cookie)) #<class 'http.cookiejar.CookieJar'>
    #<CookieJar[<Cookie _de=9AA242F9622CF572B027474576520882 for .renren.com/>, <Cookie anonymid=jltlm70f-32dewa for .renren.com/>, <Cookie first_login_flag=1 for .renren.com/>, <Cookie id=849340563 for .renren.com/>, <Cookie ln_hurl=http://head.xiaonei.com/photos/0/0/men_main.gif for .renren.com/>, <Cookie ln_uact=13691071021 for .renren.com/>, <Cookie loginfrom=syshome for .renren.com/>, <Cookie p=f309512158772bad268d94655c5410633 for .renren.com/>, <Cookie societyguester=df704536b0c5ba5e4fb05a2eeb02ce6c3 for .renren.com/>, <Cookie t=df704536b0c5ba5e4fb05a2eeb02ce6c3 for .renren.com/>, <Cookie xnsid=462659e6 for .renren.com/>, <Cookie t=6202bdf017ba2edb8ae934a0473b3ff4 for .renren.com/xtalk/>, <Cookie JSESSIONID=abcGDA9y0Lrh2erwA25ww for zhibo.renren.com/>]>

    print("cookie--->{}".format(cookie))
    for item in cookie:
        for i in dir(item):
            print(i)


        - cookie的属性
            - name: 名称
            -  value: 值
            - domain:可以访问此cookie的域名
            - path: 可以访问此cookie的页面路径
            - expires:过期时间
            - size: 大小
            - Http字段

  for item in cookie:
        print("name :",item.name," ,value :", item.value,", path :",item.path," ,domain..",item.domain,"...",item.expires)



>>>name : _de  ,value : xxxxx42Fxxxxxxxxxx45765xxxxx , path : /  ,domain.. .renren.com ... 1567526036
>>>name : anonymid  ,value : xxtmxxxxxx9mi4m , path : /  ,domain.. .renren.com ... 1694102036
>>>name : first_login_flag  ,value : 1 , path : /  ,domain.. .renren.com ... None
>>>name : id  ,value : 849xxxx63 , path : /  ,domain.. .renren.com ... None
>>>name : ln_hurl  ,value : http://head.xiaonei.com/photos/0/0/men_main.gif , path : /  ,domain.. .renren.com ... 1539014036
>>>name : ln_uact  ,value : 136****1021 , path : /  ,domain.. .renren.com ... 1539014036
>>>name : loginfrom  ,value : syshome , path : /  ,domain.. .renren.com ... None
>>>name : p  ,value : xxxx5121587xxxxxxxxxxxc5410633 , path : /  ,domain.. .renren.com ... None
>>>name : societyguester  ,value : xxxx536xxxe4fbxxxeb0xxe6x , path : /  ,domain.. .renren.com ... None
>>>name : t  ,value : df704536b0c5ba5e4fb05a2eeb02ce6c3 , path : /  ,domain.. .renren.com ... None
>>>name : xnsid  ,value : 461c3954 , path : /  ,domain.. .renren.com ... None
>>>name : t  ,value : xxxxdf01xxedbxx934a04xx3fx , path : /xtalk/  ,domain.. .renren.com ... None
>>>name : JSESSIONID  ,value : axxiDbxxJexx45xx , path : /  ,domain.. zhibo.renren.com ... None


    - cookie的保存-FileCookieJar, 案例v15

"""
cookie 保存
"""
from urllib import  request,parse
from http import  cookiejar

filename ="cookie.txt"

# cookie = cookiejar.CookieJar()
cookie = cookiejar.MozillaCookieJar(filename)
cookie_handler = request.HTTPCookieProcessor(cookie)

http_handler = request.HTTPHandler()
https_handler = request.HTTPSHandler()

opener = request.build_opener(http_handler,https_handler,cookie_handler)

def login():
    url="http://www.renren.com/PLogin.do"

    data = {
        "email":"136****1021", #^_^
        "password":"********"   #O(∩_∩)O哈哈~
    }

    data = parse.urlencode(data)
    req = request.Request(url=url,data=data.encode())

    rsp = opener.open(req)
    """
    保存cookie 到文件
    
    """
    cookie.save(ignore_discard=True,ignore_expires=True)

if __name__ == '__main__':
    login()


    - cookie的读取, 案例v16

"""
cookie 保存
"""
from urllib import request, parse
from http import cookiejar

filename = "cookie.txt"

# cookie = cookiejar.CookieJar()
cookie = cookiejar.MozillaCookieJar(filename)
cookie.load("cookie.txt",ignore_discard=True,ignore_expires=True)
cookie_handler = request.HTTPCookieProcessor(cookie)

http_handler = request.HTTPHandler()
https_handler = request.HTTPSHandler()

opener = request.build_opener(http_handler, https_handler, cookie_handler)


def getHomePage():
    url = "http://www.renren.com/966226407/profile"
    rsp = opener.open(url)

    html = rsp.read().decode()
    with open("readcookie.html","w",encoding="utf-8") as r:
        r.write(html)
if __name__ == '__main__':
    getHomePage()

猜你喜欢

转载自blog.csdn.net/u013985879/article/details/82532185
今日推荐