python爬虫之站长之家图片抓去

import requests
import re
import os
#1.1 获取抓取的链接
url='http://sc.chinaz.com/tupian/'
#1.2 定义存放文件的路径
download = "picture/"
#1.3 判断文件夹是否存在,如果存在直接使用,如果不存在创建
if(not os.path.exists(download)):
    #创建目录
    os.mkdir(download)
#2.使用requests库发请求
response=requests.get(url)
#response.status_code:获取响应码
#response.text获取响应页面内容(字符串)
#response.content:获取响应的二进制内容
#print(response.content.decode("utf-8"))
#with open("picture.html","wb") as file:
#    file.write(response.content)
#imgPath='<img src2="http://pic.sc.chinaz.com/Files/pic/pic9/202006/bpic20497_s.jpg" alt="一颗黄柠檬图片">'
#4.编写正则表达式
imgPattern= r'<img src2="(.*?)" alt=".*">'
#5.读取picture.html文件内容
#with open("picture.html","r",encoding="utf-8") as file:
#    text = file.read()
#print(text)
#6.使用正则提取图片路径
#imgUrls =re.findall(imgPattern,text);
#print(imgUrls)
imgUrls=re.findall(imgPattern,response.text);
list=[]
i=0
for imgurl in imgUrls:
    #print(imgurl)
    list.append(imgurl)
    with open(download+f"{i}.jpg","wb") as file:
        file.write(requests.get(imgurl).content)
    print(f"第{i}张图片")
    i+=1
#print(list)

猜你喜欢

转载自blog.csdn.net/qq_26018075/article/details/106979114