爬取千图网上的高清图片

贴下源码

配置环境:python3.5  win10

import urllib.request
import re
for i in range(1,10):
    pageurl="http://www.58pic.com/tupian/duanwujie-800-0-"+str(i)+".html"
    data=urllib.request.urlopen(pageurl).read().decode("utf-8","ignore")
#正则表达并不固定只要能找到相同的规律即可!!!
    pat='http://pic.qiantucdn.com/58pic/28/(.*?).jpg!'
    imglist=re.compile(pat).findall(data)
    for j in range(0,len(imglist)):
        try:
            thisimg=imglist[j]
            thisimgurl="http://pic.qiantucdn.com/58pic/28/"+thisimg+".jpg!"
            file="D:/爬虫/"+str(i)+str(j)+".jpg"
            urllib.request.urlretrieve(thisimgurl,filename=file)
            print("第"+str(i)+"页第"+str(j)+"个图片爬取成功")
        except urllib.error.URLError as e:
            if hasattr(e,"code"):
                print (e.code)
            if hasattr(e,"reason"):
                print (e.reason)
        except Exception as e:
            print (e)

  

猜你喜欢

转载自www.cnblogs.com/chao-sir/p/9350886.html