Python实操网站图片爬虫自动下载

import requests
import re

url = f'http://www.netbian.com/index_2.htm'
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36',
}
response = requests.get(url=url, headers=headers)
response.encoding = response.apparent_encoding
# print(response.text)
img_info = re.findall(
    '<a href="(.*?)" title=".*?" target="_blank"><img src=".*?" alt="(.*?)" />.*?</a>', response.text)
for link, title in img_info:
    link_url = 'http://www.netbian.com'+link
    html_data = requests.get(url=link_url, headers=headers)
    html_data.encoding = html_data.apparent_encoding
    print(html_data.text)
    img_url = re.findall(
        'target="_blank"><img src="(.*?)" alt=".*?" title=".*?">', html_data.text)[0]
    img_content = requests.get(url=img_url, headers=headers).content
    with open('img//'+title+'.jpg', mode='wb') as f:
        f.write(img_content)

猜你喜欢

转载自blog.csdn.net/weixin_64974855/article/details/132644599