python3网络爬虫:爬虫正则表达式下载图片(六)

python3网络爬虫:爬虫正则表达式下载图片(六)

import urllib.request
import re

def open_url(url):
    req = urllib.request.Request(url)
    req.add_header('User-Agent','Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36')
    page = urllib.request.urlopen(req)
    html = page.read().decode('utf-8')
    #print(html)
    return html

def get_img(html):
    p = r'<img class="BDE_Image" src="([^"]+\.jpg)"'    #正则匹配图片url的路径,https://imgsa.baidu.com/forum/w%3D580/sign=aa042893d558ccbf1bbcb53229d9bcd4/82e96d600c338744f222ae5d550fd9f9d62aa07d.jpg
    imglist = re.findall(p,html)
    print(imglist)
    # for each in imglist:    #遍历出所有的图片完整的链接
    #     print(each)
    for each in imglist:
        filename = each.split("/")[-1]
        urllib.request.urlretrieve(each,filename,None)  # urlretrieve(each,filename,None) 下载图片
if __name__=='__main__':
    url = "https://tieba.baidu.com/p/3563409202?red_tag=3526577411"
    get_img(open_url(url))
    #open_url(url)

猜你喜欢

转载自blog.csdn.net/m0_38039437/article/details/80505041
今日推荐