第三章、3.2requests使用

#比urllib更方便
#实现文件上传,使用Cookie登录,会话维持SSL证书验证,设置代理超时设置,身份认证

# """
# import requests
# r =requests.get("https://www.baidu.com")
# print(type(r))
# print(r.status_code)
# print(type(r.text))
# print(r.cookies)
"""
<class 'requests.models.Response'>
200
<class 'str'>
<RequestsCookieJar[<Cookie BDORZ=27315 for .baidu.com/>]>
"""
#如果使用get请求,附加额外信息呢?利用params这个参数
# import requests
# data = {'name':'lin',"age":23}
# response = requests.get("http://httpbin.org/get",params=data)
# print(response.text)
"""
{
  "args": {
    "age": "23", 
    "name": "lin"
  }, 
  "headers": {
    "Accept": "*/*", 
    "Accept-Encoding": "gzip, deflate", 
    "Connection": "close", 
    "Host": "httpbin.org", 
    "User-Agent": "python-requests/2.19.1"
  }, 
  "origin": "114.242.26.54", 
  "url": "http://httpbin.org/get?name=lin&age=23"
}
"""
# print(type(response.text)) #返回结果是str但是json格式的,需要用json()方法解码得到字典格式
"""
<class 'str'>
"""
# print(response.json())
"""
{'args': {'age': '23', 'name': 'lin'}, 'headers': {'Accept': '*/*', 'Accept-Encoding': 'gzip, deflate', 'Connection': 'close', 'Host': 'httpbin.org', 'User-Agent': 'python-requests/2.19.1'}, 'origin': '114.242.26.65', 'url': 'http://httpbin.org/get?name=lin&age=23'}
"""
# print(type(response.json()))  #返回字典格式
"""
<class 'dict'>
"""
# import requests
# import re
# headers = {"User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.835.163 Safari/535.1"}
# response = requests.get("https://www.zhihu.com/explore",headers=headers)
# pattern = re.compile(r'explore-feed.*?question_link.*?>(.*?)</a>',re.S)
# titles = re.findall(pattern,response.text)
# print(titles)
# """
# ['\nNASA 发射史上离太阳最近航天器「帕克号」,挑战百万度高温,不会被热化吗?\n', '\n古人为什么要想像出山海经里那些荒诞不经的异兽?\n', '\n怎样评价电视剧《武动乾坤》?\n', '\n刀塔的哪个英雄最好吃?\n', '\n如何评价Bernevig等人最新拓扑材料分类表?\n', '\n从轮胎表面能看出来车的多少信息?\n', '\n为什么有的网络小说以神魔和整个宇宙为背景,但还是会被批评格局太小或是缺乏想象力?\n', '\n有哪些体现华晨宇音乐素养的片段?\n', '\n如何评价NOI2018强制要求参加社会实践活动?\n', '\n哪些瞬间让你发现贫穷限制了你的想象力?\n']
# """
#上面抓取的的是一个HTML文档,如果想读取图片、音频、视频等该怎么办?
# import requests
# response = requests.get("https://github.com/favicon.ico")
# # print(response.text)
# # print(response.content)
# with open("fav.ico",'wb')as f:
#     f.write(response.content)
#添加headers  例1就是添加了请求头的get
#POST请求
import requests
# data = {"name":"lin","age":23}
# r = requests.post("http://httpbin.org/post",data = data)
# print(r.json())
# """
# {'args': {}, 'data': '', 'files': {}, 'form': {'age': '23', 'name': 'lin'}, 'headers': {'Accept': '*/*', 'Accept-Encoding': 'gzip, deflate', 'Connection': 'close', 'Content-Length': '15', 'Content-Type': 'application/x-www-form-urlencoded', 'Host': 'httpbin.org', 'User-Agent': 'python-requests/2.19.1'}, 'json': None, 'origin': '111.198.24.66', 'url': 'http://httpbin.org/post'}
# """
#form表示了请求方式是POST
#5、响应
#发送请求后得到响应
# import requests
# r = requests.get("https://www.baidu.com")
# print(r.status_code,type(r.status_code))
# print(r.cookies,type(r.cookies))
# print(r.headers,type(r.headers))
# print(r.url,type(r.url))
# print(r.history,type(r.history))
# """
# 200 <class 'int'>
# <RequestsCookieJar[<Cookie BDORZ=27315 for .baidu.com/>]> <class 'requests.cookies.RequestsCookieJar'>
# {'Cache-Control': 'private, no-cache, no-store, proxy-revalidate, no-transform', 'Connection': 'Keep-Alive', 'Content-Encoding': 'gzip', 'Content-Type': 'text/html', 'Date': 'Tue, 14 Aug 2018 07:20:55 GMT', 'Last-Modified': 'Mon, 23 Jan 2017 13:23:46 GMT', 'Pragma': 'no-cache', 'Server': 'bfe/1.0.8.18', 'Set-Cookie': 'BDORZ=27315; max-age=86400; domain=.baidu.com; path=/', 'Transfer-Encoding': 'chunked'} <class 'requests.structures.CaseInsensitiveDict'>
# https://www.baidu.com/ <class 'str'>
# [] <class 'list'>
# """

#3.2.2高级用法
#了解一些requests的一些高级用法,如文件上传、Cookies设置,代理设置
#1、文件上传
# import requests
# files = {"file":open('fav.ico','rb')}
# r = requests.post("http://httpbin.org/post",files = files)
# print(r.text)
"""
{
  "args": {}, 
  "data": "", 
  "files": {
    "file": "data:application/octet-stream;base64,..............................................
  }, 
  "form": {}, 
  "headers": {
    "Accept": "*/*", 
    "Accept-Encoding": "gzip, deflate", 
    "Connection": "close", 
    "Content-Length": "6661", 
    "Content-Type": "multipart/form-data; boundary=8bd96dd05337aab96a94c77a86c6e75a", 
    "Host": "httpbin.org", 
    "User-Agent": "python-requests/2.19.1"
  }, 
  "json": null, 
  "origin": "123.123.106.104", 
  "url": "http://httpbin.org/post"
}
"""
#2、使用Cookies维持登录状态
# import requests
# headers = {
#     "Cookie": "uuid_tt_dd=10_19284690460-1532583823644-491393; ADHOC_MEMBERSHIP_CLIENT_ID1.0=75d2e9cd-0d91-8d57-a954-e78a728b90de; smidV2=20180726210051bc461f82ce049b4ac03242bef8613ca1009673459ddb489f0; UM_distinctid=1651c350b4a1c5-08e1a96c5691ab-324d687a-38400-1651c350b4c497; Hm_ct_6bcd52f51e9b3dce32bec4a3997715ac=1788*1*PC_VC; UN=luslin; UserName=luslin; UserInfo=c54edpYHfCrj5X%2FKNCKAmSFqdvLrxPh%2BWyW7Aj3R8Vhul%2FFuIo7hwETgp4PpEaY3WNuCsFbIV2RQ0BDf2YlYtYqG5jmCx9WSMR7bZ4zsA%2FSRsbElY%2FA32SegBRXkoWfh; UserNick=luslin; AU=689; BT=1534052530638; UserToken=c54edpYHfCrj5X%2FKNCKAmSFqdvLrxPh%2BWyW7Aj3R8Vhul%2FFuIo7hwETgp4PpEaY3WNuCsFbIV2RQ0BDf2YlYtYqG5jmCx9WSMR7bZ4zsA%2FQMoUZUcYFr9QoDCtT1CdspnZV0ux8qBns1o7fWFwCdSGPlrxc8dW%2BPo%2BG%2B9dJMx5M%3D; dc_session_id=10_1534206612837.240559; TY_SESSION_ID=2dea91c7-9b19-4c0d-96cb-107c442f52cc; Hm_lvt_6bcd52f51e9b3dce32bec4a3997715ac=1534131188,1534137108,1534155286,1534206613; dc_tos=pdfskd; Hm_lpvt_6bcd52f51e9b3dce32bec4a3997715ac=1534226269",
#     "Host": "www.csdn.net",
#     "User-Agent":"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.84 Safari/537.36"
# }
# r = requests.get('https://www.csdn.net',headers = headers)
# print(r.text)
#3、会话维持
#使用Session维持会话
# import requests
# s = requests.Session()
# s.get("http://httpbin.org/cookies/set/number/123456789")
# r = s.get("http://httpbin.org/cookies")
# print(r.text)
# """
# {
#   "cookies": {
#     "number": "123456789"
#   }
# }
# """
#SSL证书验证
#发送请求时,通过改变verify参数来控制是否检查SSL证书
# import requests
# r = requests.get("https://www.12306.cn")
# print(r.text)
# """
# equests.exceptions.SSLError: HTTPSConnectionPool
# """
# import requests
# r= requests.get("https://www.12306.cn",verify = False)
# print(r.text)
# """
# request is being made. Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.io/en/latest/advanced-usage.html#ssl-warnings
#   InsecureRequestWarning)
#   后面是网站源码
# """

#代理设置
# import requests
# proxies ={
#     "http":"http://10.0.126.188:3218",
#     "https":"http://10.0.126.188:3218"
# }
# r = requests.get("https://www.baidu.com",proxies = proxies)
# print(r.text)
#超时设置timeout,参数,当设置为None时不会反回超时错误

#身份认证
# import requests
# from requests.auth import HTTPBasicAuth
# r = requests.get("http://localhoot",auth = ("user","passwd"))
# print(r.status_code)

猜你喜欢

转载自blog.csdn.net/luslin/article/details/81669938