python爬虫之爱思助手音乐爬取

#1.爬虫库补充
#   安装靓汤 pip install beautifulsoup4
#   安装html5lib:html网页解析库pip install html5lib#
#   拓展:安装lxml  pip install lxml
import os
import requests
from bs4 import BeautifulSoup
#1.请求url
url="https://www.i4.cn/ring_21_0_1.html"
#2.本地存储路径
def download():
    download="music/"
    #1.3 判断文件夹是否存在,如果存在直接使用,如果不存在创建
    if(not os.path.exists(download)):
    #创建目录
        os.mkdir(download)
    #4.使用requests发起请求
    response=requests.get(url)
    if(response.status_code==200):
        #print(response.text)
        #6、使用beautifulsoup4和html5lib解析网页内容
        # 友情提醒 安装bs4和html5lib
        bs = BeautifulSoup(response.content,"html5lib")
        #print(bs,type(bs))
        #7. 查找存放所有音频的标签,div,class="kbox"
        #find:根据标签名和选择器进行查询,只会返回一个满足条件的标签
        #kbox=bs.find("div",attrs={"class":"kbox"})
        #print(kbox)
        #8、在kbox里面找所有的div标签(存放音频的div),div,class=“list ring_list"
        # find_all根据签名和选择器进行查询,返回所有满足条件的标签
        ringList = bs.find_all("div",attrs={"class":"ring_list"})
        #print(ringList,len(ringList),type(ringList))
        for ring in ringList:
            #10、使用ring查找存放音频的标签(div,class="btn audio_play")
            audioPlay=ring.find("div",attrs={"class":"audio_play"})
            #print(audioPlay)
            #11、获取audioPlay标签的data-mp3属性,获取音频的路径
            mp3Url=audioPlay.get("data-mp3")
            #print(mp3Url)
            # 12、获取个去名字
            title = ring.find("div",attrs={"class":"title"}).text
            with open(download+title+".mp3","wb") as file:
                file.write(requests.get(mp3Url).content)
            print(title+"下载完成")
if __name__ == '__main__':
    download()

猜你喜欢

转载自blog.csdn.net/qq_26018075/article/details/106979127