实现结果

准备
为了运行代码,你需要一个cookie和user_agent。
爬虫系列之怎么找cookie以及user-Agent
代码
# -*- coding: utf-8 -*-
import requests
import os
from pyquery import PyQuery as pq
import re
#下载单张图片
def save_image(img_src,name,main_path='save_img'):
imgResponse = requests.get(img_src)
if imgResponse.status_code==200:
if not os.path.exists(main_path):#如果路径不存在
os.makedirs(main_path) #创建文件夹
with open(main_path+'/'+name+'.png','ab') as f:#以二进制追加模式打开,不清空
f.write(imgResponse.content)
f.close()
class Image_From_Pixiv_Painter_ID():
def __init__(self,cookie,user_agent,painter_id='490219'):
print('正在初始化...')
self.basic_url = 'https://www.huashi6.com'
self.head1 = {
'cookie': cookie,'user-agent': user_agent}
self.painter_id = painter_id
self.target_url = self.basic_url + '/search?searchText=' + self.painter_id
self.flag = self.find_painter()
def find_painter(self):
self.target_url_response = requests.get(self.target_url,headers=self.head1)
if self.target_url_response.status_code==200:
print('网站成功响应...')
print('正在搜索画师ID...')
# print(self.target_url_response.text)
find = re.findall('没有找到相关画师或者作品',self.target_url_response.text)
# print(find)
if len(find)!=0:
print('未找到该画师.')
return False
else:
print('已找到该画师,使用.download()方法保存图片到''/save_img''文件夹')
return True
else:
print('网站未响应:'+str(self.target_url_response.status_code))
return False
def download(self):
if self.flag==False:
print('未找到该画师.下载失败')
doc = pq(self.target_url_response.text)
s = doc('div.painter > a').attr('href')
#####到达该画师的主页面
new_url = self.basic_url + s
new_url_response = requests.get(new_url,headers=self.head1)
if new_url_response.status_code==200:
print('成功响应...')
with open("newresult2.html",'w',encoding="utf-8") as f:#抓取的HTML界面
f.write(new_url_response.text)
text = new_url_response.text
doc2 = pq(text)
search = doc2('.p-painter-detail .painter-page-body .painter-works .waterfall .px-waterfall .px-waterfall-item .px-img .c-work-img > img').items()
# search = doc2('.p-painter-detail .painter-page-body .painter-works .waterfall .px-waterfall').items()
print('下载中...')
for i in search:
# print(i)
img_title = i.attr('title')
# print(img_title)
img_src = i.attr('src')
if img_title!=None:
print('图片名称',img_title)
save_image(img_src,img_title)
print('下载完毕')
else:
print('网站未响应:'+str(new_url_response.status_code))
###cookie,user_agent在页面——Network里
cookie = ''
user_agent = ''
model = Image_From_Pixiv_Painter_ID(cookie,user_agent,'11086')
model.download()