爬虫高度模拟浏览器

import urllib.request as request
import urllib
import http.cookiejar


#注意要使用Fiddler调试,下面的网址就设置为‘www.baidu.com/'
url = 'http://www.baidu.com'
headers = {"Accept":" text/html,application/xhtml,application/xml;q=0.9,*/*;q=0.8",
           "Accept-Encoding":" utf-8,gb2312",
           "Accept-Language":" zh-CN,zh;q=0.8,en-US;q=0.5;q=0.3",
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36 SE 2.X MetaSr 1.0',
            "Connection":"keep-alive",
            'referer':"baidu.com"}
cjar = http.cookiejar.CookieJar()
opener = request.build_opener(request.HTTPCookieProcessor(cjar))
headall = []
for key,value in headers.items():
    item = (key,value)
    headall.append(item)

opener.addheaders = headall
request.install_opener(opener)
data = request.urlopen(url).read()
fhandle = open('D:\\python35\\crawler\\10.html','wb')
fhandle.write(data)
fhandle.close()

猜你喜欢

转载自blog.csdn.net/qq_41359265/article/details/84677712