python爬虫之爱思助手音乐爬取

#1.爬虫库补充
#   安装靓汤 pip install beautifulsoup4
#   安装html5lib:html网页解析库pip install html5lib#
#   拓展：安装lxml  pip install lxml
import os
import requests
from bs4 import BeautifulSoup
#1.请求url
url="https://www.i4.cn/ring_21_0_1.html"
#2.本地存储路径
def download():
    download="music/"
    #1.3 判断文件夹是否存在，如果存在直接使用，如果不存在创建
    if(not os.path.exists(download)):
    #创建目录
        os.mkdir(download)
    #4.使用requests发起请求
    response=requests.get(url)
    if(response.status_code==200):
        #print(response.text)
        #6、使用beautifulsoup4和html5lib解析网页内容
        # 友情提醒 安装bs4和html5lib
        bs = BeautifulSoup(response.content,"html5lib")
        #print(bs,type(bs))
        #7. 查找存放所有音频的标签，div,class="kbox"
        #find:根据标签名和选择器进行查询，只会返回一个满足条件的标签
        #kbox=bs.find("div",attrs={"class":"kbox"})
        #print(kbox)
        #8、在kbox里面找所有的div标签（存放音频的div），div，class=“list ring_list"
        # find_all根据签名和选择器进行查询，返回所有满足条件的标签
        ringList = bs.find_all("div",attrs={"class":"ring_list"})
        #print(ringList,len(ringList),type(ringList))
        for ring in ringList:
            #10、使用ring查找存放音频的标签（div,class="btn audio_play"）
            audioPlay=ring.find("div",attrs={"class":"audio_play"})
            #print(audioPlay)
            #11、获取audioPlay标签的data-mp3属性,获取音频的路径
            mp3Url=audioPlay.get("data-mp3")
            #print(mp3Url)
            # 12、获取个去名字
            title = ring.find("div",attrs={"class":"title"}).text
            with open(download+title+".mp3","wb") as file:
                file.write(requests.get(mp3Url).content)
            print(title+"下载完成")
if __name__ == '__main__':
    download()
python爬虫之爱思助手音乐爬取

猜你喜欢