import re
import requests
class SpiderHimalaya(object):
def __init__(self):
self.headers = {"User-Agent": "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_8; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50"}
self.audio_url = ''
def get_page_url(self):
"""每一页的url"""
pageUrl= ""
pageUrlList=[pageUrl.format(i) for i in range(1,13)]
return pageUrlList
def get_response(self,url):
"""获取响应"""
resp = requests.get(url,headers=self.headers)
if resp.status_code == 200:
return resp
else:
print(resp)
def get_item_id(self):
"""获取每一节的id"""
pageUrlList=self.get_page_url()
resp = self.get_response(url=pageUrlList[0])
content_list = resp.json()['data']['tracks']
item_list = []
for con in content_list:
item={}
key = self.audio_url.format(con['trackId'])
item[key] = con["title"]
item_list.append(item)
return item_list
def down_mp3(self,item):
"""下载音频"""
(ite,) = item.items() # 拆包,返回一个元祖
url,name = ite # 元组拆包
resp=self.get_response(url)
file_name = (lambda :''.join((lambda :re.split(r"[/ \\ : * \" < > | ?]+",name))()))() # 文件名处理
print(file_name)
mp3_url = resp.json()['data']['src']
mp3_content = self.get_response(mp3_url).content
with open(''.join(['三国志/',file_name,'.mp3']),'wb') as f:
f.write(mp3_content)
def run(self):
"""主函数"""
item_list=self.get_item_id()
for item in item_list:
self.down_mp3(item)
if __name__ == '__main__':
SpiderHimalaya().run()
python 爬取喜马拉雅
猜你喜欢
转载自blog.csdn.net/weixin_44224529/article/details/104836401
今日推荐
周排行