爬虫-网易云歌单下载

爬虫-网易云歌单下载

'''网易热门歌单
1. https://music.163.com/discover/playlist 进入歌单列表页,返回所有歌单datas = [('歌单名1','歌单id'),...]
2. 'https://music.163.com/playlist?id=%s'%data[1]  进入每一个歌单
2. 正则匹配出每个歌单的url
3. 用requests.get().content方法下载歌曲
4. 用multiprocessing 模块下的Pool方法
'''

import re
import requests
import json
from fake_useragent import UserAgent
from multiprocessing import Pool

class WangyiSpider(object):
    def __init__(self):
        '''爬取前2个歌单的所有歌曲'''
        self.headers = {
            'Referer': 'https://music.163.com/discover/playlist',
            'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) '
                          'Chrome/72.0.3626.119 Safari/537.36'
        }

    def get_song(self):
        # 首先进入歌单页面
        songlist_url = 'https://music.163.com/discover/playlist'
        songlist_res = requests.get(songlist_url,verify=False,headers=self.headers)
        # print(songlist_res.text)

        # 找到所有的li标签,找出a标签的链接
        a_data = re.findall('<a title="(.*?)" href="/playlist\?id=(\d+)" class="msk"></a>',songlist_res.text)
        # 进程池提高效率
        pool = Pool(processes=4)
        pool.map(self.get_song_content,a_data[:2])  # 将a_data中的每一个数放入get_song_content函数中

    def get_song_content(self,data):
        '''('世界它太小,小到容不下爱人的心', '2885665791')'''
        url = 'https://music.163.com/playlist?id=%s'%data[1]
        # 拿到所有的歌名和对应的url
        res = requests.get(url,headers=self.headers,verify=False)

        for i in re.findall(r'<li><a href="/song\?id=(\d+)">(.*?)</a></li>', res.text)[:3]:
            id = i[0]
            title = i[1]
            song_url = 'https://music.163.com/song/media/outer/url?id=%s'%id
            song_content = requests.get(song_url,headers=self.headers,verify=False)
            try:
                with open('music/%s.mp3'%title,'wb') as f:
                    f.write(song_content.content)
                    print(title + '  下载完成!')
            except Exception as e:
                print(e)

if __name__ == '__main__':
    # song_name = input('enter song name you want: ').strip()
    ws = WangyiSpider()
    ws.get_song()

猜你喜欢

转载自www.cnblogs.com/Afrafre/p/11693849.html