3.10爬取网页数据示例(二)

import requests
import os
import bs4
url='http://xkcd.com'
ml='F:\ABD'
os.makedirs(ml,exist_ok=True)
while not url.endswith('#'):
print('Download page %s...' %url)
res=requests.get(url)
res.raise_for_status()
soup=bs4.BeautifulSoup(res.text)
print('Done.')
comicElem=soup.select('#comic img')
if comicElem==[]:
print('Could not find comic image.')
else:
comicUrl=comicElem[0].get('src')
print('Downloading image %s...'%(comicUrl))
res=requests.get(comicUrl)
res.raise_for_status()
print('Done.')
imageFile=open(os.path.join(ml,os.path.basename(comicUrl)),'wb')
for chunk in res.iter_content(100000):
imageFile.write(chunk)
imageFile.close()
prevLink=soup.select('a[rel="prev"]')[0]
url='http://xkcd.com'+prevLink.get('href')
print('Done.')

猜你喜欢

转载自www.cnblogs.com/cqkangle/p/10505248.html