Python爬虫学习笔记(Requests库补充)

import requests #引入库
r = requests.get('https://www.baidu.com')#得到Response对象
print(type(r))#Response类型
print(r.status_code)#状态码
print(type(r.text))#响应体类型
print(r.text)#抓取网页内容
print(r.cookies)#抓取cookie

import requests #引入库
data = {
    'name':'germey',
    'age':22
}
r = requests.get('https://httpbin.org/get',params=data)
print(r.text)
#调用json方法以字典的方法打印,字符格式转化为字典格式
print(type(r.text))
print(r.json())
print(type(r.json()))

 

#下载github选项栏图标:使用get请求

import requests
r = requests . get ('https://github.com/favicon.ico')
with open('D:favicon.ico','wb') as f:
    f.write(r.content)

 

#利用session维持会话,不用重新发起同一个网站的多次会话
import requests
s = requests.Session()
s.get('http://httpbin.org/cookies/set/number/123456789') #设置cookie,名称number,内容123456789
r = s.get('http://httpbin.org/cookies')#获取当前cookie
print(r.text)

 

 

#通过SSL证书验证,设置verify=False即可
import requests
import logging

logging.captureWarnings(True) #捕获警告到日志
r = requests.get('https://www.12306.cn',verify = False)
#除了上述方法,也可以指定本地的客户端证书,crtkey,key需要时解密状态
#r = requests.get('https://www.12306.cn',cert=('/path/server.crt','/path/key'))
r.encoding = r.apparent_encoding
print(r.text)

 

 

#利用proxies参数设置代理,避免IP封禁等情况,免费代理网站可自行百度
#HTTP basic Auth语法:http://user:password@host:port
import requests
import random
proxies_list=[
    { "http":"http://223.199.24.230:9999"},
    {"https":"http://223.199.28.237:9999"},
    {"http":"http://175.44.109.194:9999"},
    {"http":"http://117.69.152.77:9999"},
    {"https":"http://114.99.10.137:9999"}
]
proxies = random.choice(proxies_list)
requests.get("https://www.taobao.com",proxies = proxies)

 

#身份认证,OAuth1也可以实现下属述功能
import requests
from requests.auth import HTTPBasicAuth
r = requests.get('url',auth=('username','passqord'))
print(r.status_code)

 

#将请求对象表示为数据结构,方便构造Request队列
from requests import Request,Session
url = 'http://httbin.org/post' #目前此网站拒绝以post方式发起请求
data = {
    'name':'germy'
}
headers = {
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3423.2 Safari/537.36'
}
s = Session()
req = Request('POST',url,data = data,headers=headers)#构造Request对象
prepped = s.prepare_request(req) #转换为prepare request对象
r = s.send(prepped)#发送请求
print(r.text)

发布了33 篇原创文章 · 获赞 15 · 访问量 1万+

猜你喜欢

转载自blog.csdn.net/qq_33360009/article/details/104173131