Python爬虫:只要80行代码,打包B站喜欢UP主所有小视频~

环境准备

requests 跟 you_get 库

操作方式:cmd

用户登录

b站没有登录,下载视频很模糊,此处需要先使用火狐浏览器。

下载视频的包:you_get目前支持2种登录操作

1>手动在请求头指定登录cookie

2>使用火狐浏览器登录,然后解析火狐浏览器cookie缓存文件

默认安装路径

C:/Users/用户名/AppData/Roaming/Mozilla/Firefox/Profiles/zqcn0olh.default-release/cookies.sqlite

后续大伙手动改一下BilibiliDowload.py 54行代码为自己电脑火狐浏览器cookie保存文件地址即可。

源码执行

执行资料文件中的BilibiliDowload.py

前提:必须安装了python环境

 

完整源码

import urllib
import json
import urllib.request
from you_get import common as you_get
import sys
import os

# b站up主视频下载
class Bilibili(object):
    # 初始化
    # 参数1:up主uid  参数2:下载视频保存路径  参数3:每页查询条数  参数4:总页数
    def __init__(self,uid, path, page_size=30, total_page = 1):
        if not os.path.exists(path):
            os.makedirs(path)
        self.path = path;

        self.base_url = "https://api.bilibili.com/x/space/arc/search?mid={0}&ps={1}&tid=0&pn={2}&keyword=&order=pubdate&jsonp=jsonp"
        self.uid = uid
        self.page_size = page_size
        self.total_page = total_page
        self.urls = [self.base_url.format(self.uid, self.page_size, i) for i in range(1, self.total_page + 1)]

    # up主视频是分页的,一页url显示很多视频,该方法获取指定地址页面所有视频url
    def get_video_urls(self, url):
        headers = {
            "Host": "api.bilibili.com",
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/110.0",
            "Accept": "application/json, text/plain, */*",
            "Accept-Language": "zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2",
            "Referer": "https://space.bilibili.com/1406810/video",
            "Origin": "https://space.bilibili.com",
            "Connection": "keep-alive",
            "Cookie": "buvid3=75A07D25-D75A-4FCB-9AC7-02FEC267382023612infoc; b_nut=1676819123; CURRENT_FNVAL=4048; _uuid=58AE9EB7-35A3-FF3F-77C3-61D5C87B25C1024409infoc; sid=5tj46y1i; buvid_fp=ceee833bf1b352be7ac6198c01c93f38; buvid4=B5A3D1A7-B813-80C4-D340-9174D8A6F92924615-023021923-Pk1O31qDhl4K1jeBvjmSZhpyYjEmH1skxlFWJ3bCsZiJZ%2F4BFwLDCg%3D%3D; rpdid=0zbfAHVLv9|Gp0XQg47|3Bb|3w1PtLfM; SESSDATA=08d07c52%2C1695797256%2C4be7f%2A32; bili_jct=b44785e1f12607d9cfecc0e2531791e8; DedeUserID=494956170; DedeUserID__ckMd5=cd667fe4fdbb1757; CURRENT_QUALITY=80; innersign=1; i-wanna-go-back=-1; b_ut=5; b_lsid=358B10A101_18736BE7A48; header_theme_version=CLOSE; home_feed_column=5; bp_video_offset_494956170=779148836255825900; CURRENT_PID=80195020-cf96-11ed-89e2-752e1f70f3eb; nostalgia_conf=-1; hit-new-style-dyn=1; hit-dyn-v2=1; PVID=1",
            "Sec-Fetch-Dest": "empty",
            "Sec-Fetch-Mode": "cors",
            "Sec-Fetch-Site": "same-site",
            "TE": "trailers"
        }
        request = urllib.request.Request(url=url, headers=headers)
        response = urllib.request.urlopen(request)
        content = response.read().decode('utf-8')

        data = json.loads(content)
        list = data["data"]["list"]["vlist"]
        video_url = "https://www.bilibili.com/video/{0}"
        urls = []
        for info in list:
            urls.append(video_url.format(info["bvid"]))
        return urls

    # 使用you-get 下载指定地址视频
    def video_down(self, url):
        sys.argv = ['you-get', "-c",
                    "C:/Users/Administrator/AppData/Roaming/Mozilla/Firefox/Profiles/zqcn0olh.default-release/cookies.sqlite",
                    '--format', 'dash-flv', '-o', self.path, url]
        you_get.main()

    # 执行下载
    def run(self):
        for i in range(len(self.urls)):
            video_urls = self.get_video_urls(self.urls[i])
            print(f"......第{i+1}页视频开始下载......")
            for url in video_urls:
                try:
                    self.video_down(url)
                except:
                    pass

        pass

if __name__ == '__main__':
    uid = input("请输入up主的 uid:")
    path = input("请输入保存视频路径 path:")
    Bilibili(uid, path, 30, 1).run()

原理分析

其实非常简单,就是爬虫获取视频的播放地址,调用you_get第三方包实现视频抓取逻辑。

猜你喜欢

转载自blog.csdn.net/langfeiyes/article/details/129890064