第六天(Python打卡)

爬虫:

   urllib模块:

import urllib.request

req = urllib.request.Request('http://placekitten.com/g/500/600')
response = urllib.request.urlopen(req)
cat_img = response.read()

with open('cat_500_600.jpg','wb') as f:
    f.write(cat_img)
response.geturl()
response.getinfo()

    使用urllib进行post方法(不加data默认get方法)

import urllib.parse
data = urllib.urlencode(data).encode("utf-8")
html = response.read().decode('utf-8')

解析json 数据:

import json
json.loads(html)

使用headers:

response = urllib.request.urlopen(url,data,header)

req.add_header('User-Agent':'...')
设置访问阈值:

        延迟使用时间:  

import time
time.sleep()

       代理:1.参数是一个字典{‘类型’:‘代理IP:端口号’}                

proxy_support = urllib.request.ProxyHandler({})

                 2.定制,创建一个opener

opener = urllib.request.build_opener(proxy_support)

                 3a.安装opener

urllib.request.install_opener(opener)

                 3b.调用opener

opener.open(url)
import urllib.request
import random
url = 'https://www.whatismyip.com/'
iplist = ['115.223.241.23','180.118.73.175','117.90.1.162']
proxy_support = urllib.request.ProxyHandler({'http':random.choice(iplist)})

opener = urllib.request.build_opener(proxy_support)
opener.addheaders = [('User-Agent','Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.104 Safari/537.36 Core/1.53.3427.400 QQBrowser/9.6.12513.400')]

urllib.request.install_opener(opener)

response = urllib.request.urlopen(url)

html = response.read().decode('utf-8')

print(html)


扫描二维码关注公众号,回复: 424315 查看本文章


猜你喜欢

转载自blog.csdn.net/qq_41191024/article/details/80023553
今日推荐