import requests import re import os if not os.path.exists('qiushiLibs'): os.mkdir('qiushiLibs') headers = {'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 UBrowser/6.2.4098.3 Safari/537.36'} url = 'https://www.qiushibaike.com/pic/page/%d/?s=5191431' for page_number in range(1,3): new_url = format(url%page_number) page_text = requests.get(url=new_url,headers=headers).text ex = '<div class="thumb">.*?<img src="(.*?)" alt.*?</div>' imag_url = re.findall(ex,page_text,re.S) for large_url in imag_url: large_url = 'https:'+ large_url page_content = requests.get(large_url,headers=headers).content imag_name = large_url.split("/")[-1] imagPath = 'qiushiLibs/' + imag_name with open(imagPath,'wb') as fp: fp.write(page_content) print('辛辛苦苦')
第三天爬虫实战,加了正则表达式,学点东西辛辛苦苦不容易。