正则表达式的简单运用----爬虫斗鱼小姐姐图片

构思:

可以考虑选择多任务协程版来下载,这样快速更方便

1.  导入import gevent模块

2.  使用猴哥来打补丁:

from gevent import monkey

monkey.patch_all()

3.  调用spawn方法:

gevent.spawn(函数名,传参)




import urllib.request

from gevent import monkey

monkey.patch_all()  #使用猴子来打补丁

import re
importgevent



def myclient():


    url = "https://www.douyu.com/directory/game/yz"
    ua_header = {
        "User-Agent": "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0;"}
    request = urllib.request.Request(url, headers=ua_header)


    response = urllib.request.urlopen(request)
    html = response.read()


    pics = re.findall(r'https://.*\.jpg', html.decode())
    print(pics)
    return pics




def download(pics):
    """下载"""


    for pic_url in pics:
        content = urllib.request.urlopen(pic_url).read()
        file_name = pic_url[pic_url.rfind("/"):]
        with open("./pics/" + file_name, "wb") as file:
            file.write(content)




def main():

    """爬取斗鱼"""

    g1 = gevent.spawn(download)

    g2 = gevent.spawn(myclient)

    gevent.joinall([g1,g2])

    pics = myclient()
    download(pics)
    

猜你喜欢

转载自blog.csdn.net/wpb74521wrf/article/details/80431690