import random
import re
import requests
import os
import time
User_Agent_list=[
'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36 QIHU 360SE',
'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36 SE 2.X MetaSr 1.0',
'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.140 Safari/537.36 Edge/17.17134']
def get_url_text(url):
headers={
'User-Agent':random.choice(User_Agent_list)}
try:
r=requests.get(url,headers=headers,timeout=5)
r.raise_for_status()
r.encoding=r.apparent_encoding
return r.text
except:
print('访问异常')
def get_url_content(url):
headers={
'User-Agent':random.choice(User_Agent_list)}
try:
r=requests.get(url,headers=headers,timeout=5)
r.raise_for_status()
r.encoding=r.apparent_encoding
return r.content
except:
print('访问异常')
def create_filter(dirPath):
if(os.path.exists(dirPath)):
print("目录"+dirPath+"已经存在")
return None
else:
os.mkdir(dirPath)
print("创建目录"+dirPath)
return None
def download_pic(url,dirpath,name):
try:
content=get_url_content(url)
filepath=dirpath+'\\'+name+'.jpg'
pic=open(filepath,'wb')
pic.write(content)
pic.close()
print('下载成功')
return None
except:
print('出现错误!')
url='https://amlyu.com/category/miaoxiezhen/'
html=get_url_text(url)
dirpath='F:\\用于调试的文件夹'
son_html_list=re.findall(r'<a target="_blank" href="(.*?)">(.*?)</a>',html)
for i in son_html_list:
time.sleep(0.3)
son_url=i[0]
son_html=get_url_text(son_url)
son_infor_list=re.findall(r'<img src="(.*?)" alt="(.*?)" title="(.*?)>',son_html)
print(i)
Dirpath=dirpath+'\\'+i[1]
create_filter(Dirpath)
k=0
for j in son_infor_list:
print(j)
download_pic(j[0],Dirpath,str(k))
k=k+1