免费 IP 代理池示例


import requests
import re
import random
from concurrent.futures import ThreadPoolExecutor

import time
start = time.time()

pool = ThreadPoolExecutor(12)

def get_proxy():
    return requests.get('http://127.0.0.1:5010/get/').json()

def delete_proxy(proxy):
    requests.get("http://127.0.0.1:5010/delete/?proxy={}".format(proxy))


url = 'https://www.pearvideo.com/category_loading.jsp?reqType=5&categoryId=9&start=0'


video_list=[]

ret = requests.get(url)
reg = '<a href="(.*?)" class="vervideo-lilink actplay">'
video_urls = re.findall(reg, ret.text)
print(video_urls)


for url in video_urls:
    proxy = get_proxy().get('proxy')
    print(proxy)
    try:
        ret_detail = requests.get('https://www.pearvideo.com/' + url, proxies={"http": "http://{}".format(proxy)})
        print(proxy)

        reg = 'srcUrl="(.*?)",vdoUrl=srcUrl'
        mp4_url = re.findall(reg, ret_detail.text)[0]  # type:str

        video_name = mp4_url.rsplit('/', 1)[-1]

        dic = {
            'v_name': video_name,
            'v_url': mp4_url
        }
        video_list.append(dic)

    except Exception:
        delete_proxy(proxy)

def get_video(dic):
    url = dic['v_url']
    name = dic['v_name']
    print(f'开始下载{name}')
    video_data = requests.get(url=url)
    print(url)
    with open(name, 'wb') as f:
        for line in video_data.iter_content():
            f.write(line)
        print(f'{name}下载完成')
        end = time.time()
        ctime = end - start
        print(ctime)

print(video_list)
def main():
    for url in video_list:
        done = pool.submit(get_video, url)


if __name__ == '__main__':
    main()
    pool.shutdown(wait=True)

猜你喜欢

转载自www.cnblogs.com/kai-/p/12659803.html