Python并发编程-一个简单的爬虫


#网页状态码
#200  正常
#404  网页找不到
#502 504
import requests
from multiprocessing import Pool

def get(url):
    response = requests.get(url)
    if response.status_code == 200:
        return url, response.content.decode('utf-8')

def call_back(args):
    url,content = args #拆包args中传入的参数
    print(url,len(content))


if __name__ == '__main__':
    url_lst = [
        'https://www.cnblogs.com',
        'https://www.sogou.com',
        'http://www.sohu.com',
        'http://www.baidu.com'
    ]
    p = Pool(5)
    for url in url_lst:
            p.apply_async(get,args=(url,),callback=call_back) #利用callback去用主进程执行Call_back函数中的功能
    p.close()
    p.join()

猜你喜欢

转载自www.cnblogs.com/konglinqingfeng/p/9703414.html