#1.爬虫库补充
# 安装靓汤 pip install beautifulsoup4
# 安装html5lib:html网页解析库pip install html5lib#
# 拓展:安装lxml pip install lxml
import os
import requests
from bs4 import BeautifulSoup
#1.请求url
url="https://www.i4.cn/ring_21_0_1.html"
#2.本地存储路径
def download():
download="music/"
#1.3 判断文件夹是否存在,如果存在直接使用,如果不存在创建
if(not os.path.exists(download)):
#创建目录
os.mkdir(download)
#4.使用requests发起请求
response=requests.get(url)
if(response.status_code==200):
#print(response.text)
#6、使用beautifulsoup4和html5lib解析网页内容
# 友情提醒 安装bs4和html5lib
bs = BeautifulSoup(response.content,"html5lib")
#print(bs,type(bs))
#7. 查找存放所有音频的标签,div,class="kbox"
#find:根据标签名和选择器进行查询,只会返回一个满足条件的标签
#kbox=bs.find("div",attrs={"class":"kbox"})
#print(kbox)
#8、在kbox里面找所有的div标签(存放音频的div),div,class=“list ring_list"
# find_all根据签名和选择器进行查询,返回所有满足条件的标签
ringList = bs.find_all("div",attrs={"class":"ring_list"})
#print(ringList,len(ringList),type(ringList))
for ring in ringList:
#10、使用ring查找存放音频的标签(div,class="btn audio_play")
audioPlay=ring.find("div",attrs={"class":"audio_play"})
#print(audioPlay)
#11、获取audioPlay标签的data-mp3属性,获取音频的路径
mp3Url=audioPlay.get("data-mp3")
#print(mp3Url)
# 12、获取个去名字
title = ring.find("div",attrs={"class":"title"}).text
with open(download+title+".mp3","wb") as file:
file.write(requests.get(mp3Url).content)
print(title+"下载完成")
if __name__ == '__main__':
download()
python爬虫之爱思助手音乐爬取
猜你喜欢
转载自blog.csdn.net/qq_26018075/article/details/106979127
今日推荐
周排行