全民K歌获取作者歌单

注意:本实例仅供学习使用,请勿用于非法用途,并下载后在24小时内删除, 否则产生的一切问题与本人无关! 

import re
import requests
from bs4 import BeautifulSoup
import openpyxl  
import json
num = 0  
p = 0  
q = 0  

kv = {
    "User-Agent":
        "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36"
}


# 获取share_uid
def getShare_UID(url):
    uid = url.split('=')[-1]
    # print("uid={}".format(uid))
    return uid


# 获取网页
def getHTML(url):
    try:
        r = requests.get(url, headers=kv, timeout=30)
        r.raise_for_status()
        # print(r.url)
        r.encoding = r.apparent_encoding
        return r.text
    except:
        return ""


# 解析json数据获取歌曲名称信息
def getMusicInfo(json_data, namelis, idlis, count):
    s = json.loads(json_data)  # 将json数据转换为python对象(字典)
    for i in range(count):
        m_name = s["data"]["ugclist"][i]["title"]
        m_shareid = s["data"]["ugclist"][i]["shareid"]
        namelis.append(m_name)
        idlis.append(m_shareid)
        # print(m_name)
        # print(m_shareid)


# 打印歌曲信息
def printMusicInfo(namelis, idlis, music_url):
    form = "{:^3}\t{:^15}\t{:^20}"
    print("{}的演唱歌曲共有{}首,信息如下:".format(nickname, num))
    print(form.format("序号", "歌曲名称", "下载链接"))
    for i in range(len(namelis)):
        down_url = music_url + idlis[i]
        print(form.format(i + 1, namelis[i], down_url))


# 将歌曲信息保存到Excel文件
def saveXlsx(namelis, idlis, music_url):
    try:
        wb = openpyxl.Workbook()  # 创建一个工作簿
        ws = wb.active  # 在工作簿中获取活跃的工作表
        ws.title = "歌曲信息"  # 设置工作表的名称
        ws['A1'] = '序号'
        ws['B1'] = '歌曲名称'
        ws['C1'] = '下载链接'
        for i in range(len(namelis)):
            ws.cell(row=i + 2, column=1, value=i + 1)  # 行数从2开始
            ws.cell(row=i + 2, column=2, value=namelis[i])  #歌曲名称
            ws.cell(row=i + 2, column=3, value=music_url + idlis[i])  # 歌曲链接
        wb.save('D:/{}的歌曲信息.xlsx'.format(nickname_cn))
        print("歌曲信息在D盘根目录保存完毕!")
    except:
        print("出现异常,保存失败!")


if __name__ == "__main__":
    namelis = []
    idlis = []
    home_url=input("请输入作者的主页链接:")
    music_url = "https://node.kg.qq.com/cgi/fcgi-bin/fcg_get_play_url?shareid="
    kg_url = "https://node.kg.qq.com/cgi/fcgi-bin/kg_ugc_get_homepage?jsonpCallback=callback_0&inChFarset=GB2312&outCharset=utf-8&format=&g_tk=5381&g_tk_openkey=719182536&nocache=0.8706501019187272&share_uid="
    kg2_url = kg_url + getShare_UID(home_url) + "&type=get_uinfo&start=1&num=8"
    # print(kg2_url)
    home_json = getHTML(kg2_url)[11:-1]  # json数据在callback_0()里面,需要单独提取出来
    s = json.loads(home_json)  # 将json数据转换为python对象(字典)
    num = s["data"]["ugc_total_count"]  
    nickname = s["data"]["nickname"]  
    match = re.search(r'[\u4e00-\u9fa5\d]*', nickname)  
    nickname_cn = match.group(0)
    count = 8
    index = 1
    q = num // 8
    p = num % 8
    if q < 1:  # 说明p!=0
        x = 1
        # count = p
    elif p == 0:  
        x = q
    else:  # 多一页刚好爬完
        x = q + 1
    for i in range(x):
        if i == x - 1:
            if p != 0:
                count = p
        kg2_url = kg_url + getShare_UID(
            home_url) + "&type=get_uinfo&start=" + str(index) + "&num=8"
        home_json = getHTML(kg2_url)[11:-1]
        getMusicInfo(home_json, namelis, idlis, count)
        index += 1
    printMusicInfo(namelis, idlis, music_url)
    saveXlsx(namelis, idlis, music_url)
发布了143 篇原创文章 · 获赞 78 · 访问量 4万+

猜你喜欢

转载自blog.csdn.net/KK_2018/article/details/104146388