Python爬虫urllib之cookie文件的保存和访问

 data = {
        "log": "*******",
        "pwd": "******",
        "rememberme": "forever",
        "wp-submit": "登录",
        # 登陆后重定向地址
        "redirect_to": "http://date.jobbole.com/4965/",
        "testcookie": "1"
    }
    data = parse.urlencode(data).encode()
  • cookie文件保存
  # 保存cookie到文件
  cookie.save(f,ignore_discard=True,ignore_expires=True)
  • cookie文件读取
	f = r'jobbole_cookie.txt'
    # 创建MozillaCookieJar实例对象
    cookie = cookiejar.MozillaCookieJar()
    # 从文件中读取cookie内容到变量
    cookie.load(f,ignore_discard=True,ignore_expires=True)
    handler = request.HTTPCookieProcessor(cookie)
    opener = request.build_opener(handler)

代码

from urllib import request,parse,error
from http import cookiejar
import json

def login():
    #登录的入口网址,密码数据,headers
    #获取cookie并且保存
    url = 'http://date.jobbole.com/wp-login.php'
    data = {
        "log": "*******",
        "pwd": "******",
        "rememberme": "forever",
        "wp-submit": "登录",
        # 登陆后重定向地址
        "redirect_to": "http://date.jobbole.com/4965/",
        "testcookie": "1"
    }
    data = parse.urlencode(data).encode()
    headers = {
        "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.119 Safari/537.36",
        "Connection": "keep-alive"
    }

    # 设置保存cookie的文件,同级目录下的jobbole_cookie.txt
    f = r'jobbole_cookie.txt'
    # 声明一个MozillaCookieJar对象实例来保存cookie,之后写入文件
    cookie = cookiejar.MozillaCookieJar(f)
    # 利用urllib库的HTTPCookieProcessor对象来创建cookie处理器
    handler = request.HTTPCookieProcessor(cookie)
    # 通过handler来构建opener
    opener = request.build_opener(handler)
    req = request.Request(url,data=data,headers=headers)

    try:
        rsp = opener.open(req)
        # 保存cookie到文件
        cookie.save(f,ignore_discard=True,ignore_expires=True)
        html = rsp.read().decode()
        print(html)
    except error.URLError as f:
        print(e)

def getInfo():

    url = 'http://date.jobbole.com/wp-admin/admin-ajax.php'
    f = r'jobbole_cookie.txt'
    # 创建MozillaCookieJar实例对象
    cookie = cookiejar.MozillaCookieJar()
    # 从文件中读取cookie内容到变量
    cookie.load(f,ignore_discard=True,ignore_expires=True)
    handler = request.HTTPCookieProcessor(cookie)
    opener = request.build_opener(handler)

    data = {
        "action": "get_date_contact",
        "postId": "4965"
    }
    data = parse.urlencode(data).encode()
    headers = {
        "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.119 Safari/537.36",
        "Connection": "keep-alive"
    }
    req = request.Request(url,data=data,headers=headers)

    try:
        rsp = opener.open(req)
        html = rsp.read()
        print(html)

        html = json.loads(html)
        print(html)

        with open('jobbo.html', 'w',encoding='utf-8') as f:
            f.write(str(html))
    except Exception as e:
        print(e)

if __name__=='__main__':
    login()
    getInfo()

猜你喜欢

转载自blog.csdn.net/qq_31235811/article/details/88652244
今日推荐