python爬虫实现音乐下载
音乐下载功能模块
from pymongo import MongoClient
import pandas as pd
import requests
import random
import json
import os
def kuWoDownLoad():
search_music = input("请输入作者或者歌曲名:")
weheader = [
[
'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36'],
['Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/534.57.2 (KHTML, like Gecko) Version/5.1.7 Safari/534.57.2'],
['Mozilla/5.0 (Windows NT 6.1; WOW64; rv:34.0) Gecko/20100101 Firefox/34.0']]
kk_header = [i for i in random.choice(weheader)]
header = {
'User-Agent': str(kk_header),
'Referer': 'http://www.kuwo.cn/',
'csrf': 'D32I86ILA88',
'Cookie': '_ga=GA1.2.1490969665.1620831748; _gid=GA1.2.1678093837.1620831748; _gat=1; Hm_lvt_cdb524f42f0ce19b169a8071123a4797=1620831749; Hm_lpvt_cdb524f42f0ce19b169a8071123a4797=1620831749; kw_token=D32I86ILA88'}
try:
for i in range(167):
url = "http://www.kuwo.cn/api/www/search/searchMusicBykeyWord?key={}&pn={}&rn=30" \
"&httpsStatus=1&reqId=30709280-b333-11eb-9d41-ad2a15a69fbf".format(search_music, i)
response = requests.get(url, headers=header, timeout=30).json()
list_response = response['data']['list']
if int(response["data"]["total"]) % 30 > 0:
i = int(response["data"]["total"]) // 30 + 1
else:
i = int(response["data"]["total"]) // 30
print("一共{}页,正在爬取数据。。。。。。".format(i))
for info in list_response:
music_name = info['name']
music_rid = info['rid']
music_artist = info['artist']
music_Image = info['pic']
music_album = info['album']
music_releaseDate = info['releaseDate']
music_songTimeMinutes = info['songTimeMinutes']
with open(os.path.expanduser("~") + '\\Desktop\\music\\' + "{}.mp3".format(music_name), 'wb') as f:
music_api = 'http://www.kuwo.cn/url?format=mp3&rid={}&response=url&type=convert_url3&br=128kmp3' \
'&from=web&t=1620883237037&httpsStatus=1&reqId=f32a4ce1-b3aa-11eb-8486-6142833c02da'.format(music_rid)
music_play_url = requests.get(music_api, headers=header).json()
music_data = requests.get(music_play_url['url']).content
music_info_data = {
"music_name": music_name,
"music_artist": music_artist,
'music_Image': music_Image,
"music_album": music_album,
"music_releaseDate": music_releaseDate,
"songTimeMinutes": music_songTimeMinutes,
"music_data": music_data,
"music_rid": music_rid
}
f.write(music_data)
print("下载成功:" + music_name)
write_into_image(music_Image, header)
write_into_mongo(Connecting_mongo(), music_info_data)
print("写入数据库成功~")
break
break
except requests.HTTPError:
print("连接异常!")
'''
def _connect_mongo(host, port, username, password, db):
""" 指定帐户和密码建立连接 """
if username and password:
mongo_uri = 'mongodb://%s:%s@%s:%s/%s' % (username, password, host, port, db)
conn = MongoClient(mongo_uri)
else:
"""无用户和密码连接"""
conn = MongoClient(host, port)
return conn[db]
def read_mongo(db, collection, query={}, host='localhost', port=27017, username=None, password=None, no_id=True):
""" 从Mongo读取并存储到DataFrame """
# 连接MongoDB
db = _connect_mongo(host=host, port=port, username=username, password=password, db=db)
# 对特定(query)的数据库和集合进行查询
new_data = db[collection].find(query)
# 读取数据并构造DataFrame
df = pd.DataFrame(list(new_data))
# 删除MongoDB中主键_id
# if no_id:
# del df['_id']
# return df
'''
def write_into_image(music_Image, header):
response_jpg = requests.get(url=music_Image, headers=header, timeout=30)
with open(os.path.expanduser("~") + '\\Desktop\\image\\' + "{}.jpg".format(34), 'wb') as f:
f.write(response_jpg.content)
print("保存图片成功!")
def write_into_mongo(db, data):
'''
@params: data,将数据封装为字典,然后将其写入到MongoDB数据库中
'''
print("正在插入数据")
try:
'''
# 连接MongoDB数据库
# Client = MongoClient()
# 查询数据库名
# db_list = Client.list_database_names()
# print(db_list)
# 打开或创建名为data的collection,collection相当于关系型数据库中的数据库
# 在MongoDB中,collection是文档的集合
# db = Client.data
# 或者使用类似引用字典值的方式打开或创建collection
# db = Client['data']
# 授权
# db.authenticate(name='lu',password='123456',source='admin')
# info = db['NBA']
'''
table = db['kuwo_down']
table.insert_one(data)
print('总数', '=', table.estimated_document_count())
except Exception as e:
print(e)
def Connecting_mongo():
Client = MongoClient('localhost', 27017)
db = Client.db
return db
def Delete(db):
table = db['kuwo_down']
name = input("请输入删除歌曲名:")
table.distinct("music_name")
list_music_name = table.distinct("music_name")
if name in list_music_name:
table.delete_one({
"music_name": name})
else:
print("删除的歌曲不存在哦~")
def Update(db):
table = db['kuwo_down']
old_name = input("请输入更改项名称:")
old_value = input("请输入更改项值:")
old_value_one = {
old_name: old_value}
new_name = input("请输入更改项名称:")
new_value = input("请输入更改项值:")
new_value_one = {
new_name: new_value}
table.update_one(old_value_one, new_value_one)
def Read(db):
table = db['kuwo_down']
music_name = input("请输入查找的歌曲名:")
if music_name in table.distinct("music_name"):
print("歌曲已存在~~~")
else:
print("不存在此歌曲~~~")
def create_Read_Update_Delete(num):
if num == '':
print("什么都不想对我说吗~~~Giao\n直接用回车回避我吗~~~Giao")
elif num.isnumeric():
if int(num) == 1:
kuWoDownLoad()
elif int(num) == 2:
Delete(Connecting_mongo())
elif int(num) == 3:
Update(Connecting_mongo())
elif int(num) == 4:
Read(Connecting_mongo())
else:
print("你连1234都不记得,更何况是我\n走了,这次罚你,再也见不到我!")
elif num.isalpha():
print("请不要输入字母!")
else:
print("格式错误!")
def huoqu_shuju(db):
table = db['kuwo_down']
data = pd.DataFrame(list(table.find()))
new_data = data[['music_name', 'music_artist']]
print(new_data)
if __name__ == "__main__":
if not os.path.exists(os.path.join(os.path.expanduser("~") + '\\Desktop\\music')):
os.mkdir(os.path.join(os.path.expanduser("~") + '\\Desktop\\music'))
if not os.path.exists(os.path.join(os.path.expanduser("~") + '\\Desktop\\image')):
os.mkdir(os.path.join(os.path.expanduser("~") + '\\Desktop\\image'))
num = input("下载(99%可不一定没爱过呀!):1\n删除(即使在舍不得,也得走啦!):2\n更新(没有了你,我会开始新生活!):3\n查询(来过,你可不一定,找得到我!):4\n请输入:")
create_Read_Update_Delete(num)
huoqu_shuju(Connecting_mongo())