python 实现多任务协程下载斗鱼平台图片

                          python 实现多任务协程下载斗鱼平台图片


import re
import gevent
from gevent import monkey, pool
import time, random
import urllib.request

monkey.patch_all()      # put a patch

def down_load_img(img_url, img_name):
    """
    :param img_url: the url of the picture that you need
    :param img_name: the name of the picture that you call
    example:
    when get a url of a picture,this function will request
    the internet and download the picture ,then it will save
     in the path you supply
    """
    img = urllib.request.urlopen(img_url)       # request the internet
    img_content = img.read()     # read the content of the web page according to the url
    # make a new file and save it with the name you supply
    with open(r"G:\myproject_jiuye\hm_424_430\正则\beauty\%s" % img_name, "wb") as f1:
        f1.write(img_content)
        time.sleep(random.random())

def main():
    p = pool.Pool(5)         # limit thr number of the Coroutines(协程)
    local_addr = r"G:\myproject_jiuye\hm_424_430\正则\douyu_url.txt"
    url_txt = open(local_addr, "r", encoding="utf-8")       # open the text of the code
    #read the content of the picture and save it to a variable(变量)
    url_content = url_txt.read()
    ret_list = re.findall("https://.*?.jpg", url_content)       # match the regular expression
    # traversal list
    num = 0
    my_list = []

    t_start = time.time()       #mark the start time
    for img_url in ret_list:
        my_list.append(p.spawn(down_load_img, img_url, "%d.jpg" % num))
        if num == 100:  # you can choose the num of the picture taht you want to download
            break
        num += 1

    gevent.joinall(my_list)      #add the task to the Coroutines
    t_stop = time.time()         #mark the stop time
    print("Download completed!Enjoy now!Time of use:%.2fS" % (t_stop - t_start))


if __name__ == '__main__':
    main()

猜你喜欢

转载自blog.csdn.net/weixin_40612082/article/details/80101478