import requests
url="https://www.amazon.cn/gp/product/B01M8L5Z3Y"
try:
r=requests.get(url)
print(r.status_code)
print(r.request.headers) # 亚马逊识别了此次访问是由'User-Agent': 'python-requests/2.23.0'发起的,故拒绝了访问
kv={'User-Agent': 'Mozilla/5.0'} # Mozilla/5.0是浏览器身份标识字段 修改User-Agent
r = requests.get(url,headers=kv)
print(r.status_code)
r.raise_for_status() # 如果非200 则触发异常
r.encoding=r.apparent_encoding
print(r.text[0:1000])
except:
print("爬取失败")
结果:
503
{‘User-Agent’: ‘python-requests/2.23.0’, ‘Accept-Encoding’: ‘gzip, deflate’, ‘Accept’: ‘/’, ‘Connection’: ‘keep-alive’}
200
if (true === true) {
var ue_t0 = (+ new Date()),
ue_csm = window,
ue = { t0: ue_t0, d: function() { return (+new Date() - ue_t0); } },
ue_furl = “fls-cn.amazon.cn”,
ue_mid = “AAHKV2X7AFYLW”,