#-*-coding:utf-8-*- __author__ = 'qinlan' import requests from urllib.parse import urlencode import random,os,re,time from bs4 import BeautifulSoup from multiprocessing import Pool singer = '周杰伦' proxies = [ '115.224.163.58:61202','179.184.9.172:20183', '177.37.166.164:20183','103.55.69.242:53281', '217.61.106.183:80','45.125.220.242:8080', '103.88.140.85:8080','218.26.227.108:80', '110.171.230.47:8080','118.81.108.77:9797', '31.145.83.198:8080','195.88.208.115:3128' ] headers = { 'host':'music.taihe.com', 'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Safari/537.36' } session = requests.Session() def getMainHtml(page):#获取歌曲主页 base_url = 'http://music.taihe.com/search/song?' params = { 'key':singer, 'start':str(page*20), 'size':20 } try: response = session.get(url=base_url,params=params,proxies={'https':random.choice(proxies)},headers=headers) response.encoding = response.apparent_encoding if response.status_code == 200: return response.text else: print('---访问失败---') return None except: print('网页加载出错') return None def downloadMusic(name,url): dir = '歌曲\{}'.format(singer) if not os.path.exists(dir): os.makedirs(dir) try: response = requests.get(url=url,proxies={'https':random.choice(proxies)}) if response.status_code == 200: file = os.path.join(dir,'{}.mp3'.format(name)) with open(file,'wb') as f: f.write(response.content) print('下载完成') else: print('状态码不为200(失败)') except: print('---%%%%---') def getMusicLink(songid): base_url = 'http://play.taihe.com/data/music/songlink' data = { 'songIds':songid, 'hq':'0','type':'m4a,mp3', 'rate':'','pt':'0', 'flag':'-1','s2p':'-1','prerate':'-1', 'bwt':'-1','dur':'-1','bat':'-1','bp':'-1', 'pos':'-1','auto':'-1' } header = { 'Host':'play.taihe.com', 'Origin':'http://play.taihe.com', 'referer':'http://play.taihe.com/?__m=mboxCtrl.playSong&__o=/search||songListIcon&fr=-1||www.baidu.com{}'.format(urlencode({'__a':songid,'__s':singer})), 'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Safari/537.36' } response = session.post(url=base_url,data=data,proxies={'https':random.choice(proxies)},headers=header) if response.status_code == 200: content = response.json() songUrl = content['data']['songList'][0]['songLink'] songName = content['data']['songList'][0]['songName'] print(songName,songUrl) downloadMusic(songName,songUrl) else: print('---加载失败---') def parseMusicId(html): if html: content = re.findall('sid".*?(\d+),',html,re.S) for songid in content: getMusicLink(songid) else: print('------------') def main(page): html = getMainHtml(page) parseMusicId(html) time.sleep(1) if __name__ == '__main__': pool = Pool() start = time.time() pool.map(main,[page for page in range(1,10)]) print('下载完毕:',time.time()-start)
python爬虫(百度音乐)
猜你喜欢
转载自blog.csdn.net/qq_42394743/article/details/80859708
今日推荐
周排行