为爱而码

下载微信文章中图片
downloadIMage.py

#!/usr/bin/python
# -*- coding: UTF-8 -*-
import re,os
import urllib,urllib2;
import time
import sys
reload(sys)
sys.setdefaultencoding('utf8')

#通过url获取网页
def getHtml(url):
    # 要设置请求头,让服务器知道不是机器人
    user_agent = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'
    headers = {'User-Agent': user_agent}

    request=urllib2.Request(url,headers=headers);
    page = urllib2.urlopen(request);
    html = page.read()

    return html


#通过正则表达式来获取图片地址,并下载到本地
def getImg(html,savePath):
    #http://mmbiz.qpic.cn/mmbiz_jpg/wlJkphkR2NMibwTo1cqHwdhLTMYmbV0IOw5vCaJuTsbvTdukCQwUicPClXRibcnY8RCsszAfBYlrJnfz8icUIBWWGw/640?wx_fmt=jpeg
    reg = r'data-src="(.*?)"'
    imgre = re.compile(reg)
    imglist = imgre.findall(html)
    x = 0
    for imgurl in imglist:
        try:
            #通过urlretrieve函数把数据下载到本地的D:\\images,所以你需要创建目录
            urllib.urlretrieve(imgurl, savePath+'\\%s.jpg' % x)
            print "[+] imgurl =%s" % imgurl


        except:
            print "[-] imgurl =%s"%imgurl
        finally:
            if imgurl!='':
                x = x + 1
            time.sleep(1)



def bookUrl(html):
    reg = r'<a href=\"(.*?)\" target=\"_blank\">(.*?)<\/a>'
    imgre = re.compile(reg)
    imglist = imgre.findall(html)
    for item in imglist[:6]:
        url, bookName =item[0],item[1]
        savePath =r'C:\Users\pradmin\Desktop\downloadImage\images\\'+bookName.decode("utf-8")
        print("[+] url =%s"%url)
        html=getHtml(url)
        #os.mkdir(savePath)
        getImg(html,savePath)


originUrl="http://mp.weixin.qq.com/s?__biz=MzA4NjQzNzY4Mw==&mid=2454531002&idx=4&sn=67826657f4486bfa0cb8f195262a86f9&chksm=887131e6bf06b8f09b2ec821f49c71c64536cf585d9f17664709fcfc533d39c976c30da91a8d&mpshare=1&scene=1&srcid=1215qBhkFwNhLrfnZlSMmZSj#rd"
html = getHtml(originUrl)
bookUrl(html)

猜你喜欢

转载自blog.csdn.net/DAo_1990/article/details/78852245