目录
1、开发工具
Python3.9
pycharm
扫描二维码关注公众号,回复: 17381432 查看本文章requests
和其他python内置库
2、第三方库
安装第三方库
pip install requests
3、实现思路
1.用requests发送get请求,获得下载链接
2.将下载到B站视频和音频保存到本地
3.使用ffmpeg来合并视频和音频。
4.并保存到本地。
4.单个爬取B站视频
import os
import requests
import json
import re
from bs4 import BeautifulSoup
import subprocess
from detail_video import video_bvid
# video_bvid 是一个从外部得到的单个视频ID
video_bvid = 'your-single-bvid'
class BilibiliVideoAudio:
def __init__(self, bvid):
self.bvid = bvid
self.headers = {
"referer": "https://search.bilibili.com/all?keyword=%E4%B8%BB%E6%92%AD%E8%AF%B4%E8%81%94%E6%92%AD&from_source=webtop_search&spm_id_from=333.1007&search_source=5&page=4&o=90",
"origin": "https://search.bilibili.com",
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36 Edg/120.0.0.0',
'Accept-Encoding': 'gzip, deflate, br'
}
def get_video_audio(self):
# 构造视频链接并发送请求获取页面内容
url = f'https://www.bilibili.com/video/{self.bvid}/?spm_id_from=333.337.search-card.all.click&vd_source=14378ecd144bed421affe1fe0ddd8981'
content = requests.get(url, headers=self.headers).content.decode('utf-8')
soup = BeautifulSoup(content, 'html.parser')
# 获取视频标题
meta_tag = soup.head.find('meta', attrs={'name': 'title'})
title = meta_tag['content']
# 获取视频和音频链接
pattern = r'window\.__playinfo__=({.*?})\s*</script>'
json_data = re.findall(pattern, content)[0]
data = json.loads(json_data)
video_url = data['data']['dash']['video'][0]['base_url']
audio_url = data['data']['dash']['audio'][0]['base_url']
return {
'title': title,
'video_url': video_url,
'audio_url': audio_url
}
def download_video_audio(self, url, filename):
# 对文件名进行清理,去除不合规字符
filename = self.sanitize_filename(filename)
try:
# 发送请求下载视频或音频文件
resp = requests.get(url, headers=self.headers).content
download_path = os.path.join('D:\\video', filename) # 构造下载路径
with open(download_path, mode='wb') as file:
file.write(resp)
print("{:*^30}".format(f"下载完成:{filename}"))
except Exception as e:
print(e)
def sanitize_filename(self, filename):
# 定义不合规字符的正则表达式
invalid_chars_regex = r'[\"*<>?\\|/:,]'
# 替换不合规字符为空格
sanitized_filename = re.sub(invalid_chars_regex, ' ', filename)
return sanitized_filename
def merge_video_audio(self, video_path, audio_path, output_path):
"""
使用ffmpeg来合并视频和音频。
"""
try:
command = [
'ffmpeg',
'-y', # 覆盖输出文件如果它已经存在
'-i', video_path, # 输入视频路径
'-i', audio_path, # 输入音频路径
'-c', 'copy', # 复制原始数据,不进行转码
output_path # 输出视频路径
]
subprocess.run(command, check=True)
print(f"视频和音频合并完成:{output_path}")
except subprocess.CalledProcessError as e:
print(f"合并失败: {e}")
def main():
try:
# 只处理一个 bvid
bilibili = BilibiliVideoAudio(video_bvid)
video_audio_info = bilibili.get_video_audio()
title = video_audio_info['title']
video_url = video_audio_info['video_url']
audio_url = video_audio_info['audio_url']
processed_videos_path = 'D:\\processed_videos'
if not os.path.exists(processed_videos_path):
os.makedirs(processed_videos_path)
video_filename = f"{title}.mp4"
audio_filename = f"{title}.mp3"
output_filename = f"{title} - combined.mp4"
video_file_path = os.path.join('D:\\video', video_filename)
audio_file_path = os.path.join('D:\\video', audio_filename)
output_file_path = os.path.join(processed_videos_path, output_filename)
bilibili.download_video_audio(video_url, video_filename) # 下载视频
bilibili.download_video_audio(audio_url, audio_filename) # 下载音频
bilibili.merge_video_audio(video_file_path, audio_file_path, output_file_path) # 合并视频和音频
# Optional: Delete the separate files after merge
# os.remove(video_file_path)
# os.remove(audio_file_path)
except Exception as ex:
print(f"Failed to process video/audio for {video_bvid}: {ex}")
main()
5.批量爬取B站视频
# 批量爬取b站上的视频
import os
import requests
import json
import re
from bs4 import BeautifulSoup
import subprocess
from detail_video import video_bvid
class BilibiliVideoAudio:
def __init__(self, bvid):
self.bvid = bvid
self.headers = {
"referer": "https://search.bilibili.com/all?keyword=%E4%B8%BB%E6%92%AD%E8%AF%B4%E8%81%94%E6%92%AD&from_source=webtop_search&spm_id_from=333.1007&search_source=5&page=4&o=90",
"origin": "https://search.bilibili.com",
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36 Edg/120.0.0.0',
'Accept-Encoding': 'gzip, deflate, br'
}
def get_video_audio(self):
# 构造视频链接并发送请求获取页面内容
url = f'https://www.bilibili.com/video/{self.bvid}/?spm_id_from=333.337.search-card.all.click&vd_source=14378ecd144bed421affe1fe0ddd8981'
content = requests.get(url, headers=self.headers).content.decode('utf-8')
soup = BeautifulSoup(content, 'html.parser')
# 获取视频标题
meta_tag = soup.head.find('meta', attrs={'name': 'title'})
title = meta_tag['content']
# 获取视频和音频链接
pattern = r'window\.__playinfo__=({.*?})\s*</script>'
json_data = re.findall(pattern, content)[0]
data = json.loads(json_data)
video_url = data['data']['dash']['video'][0]['base_url']
audio_url = data['data']['dash']['audio'][0]['base_url']
return {
'title': title,
'video_url': video_url,
'audio_url': audio_url
}
def download_video_audio(self, url, filename):
# 对文件名进行清理,去除不合规字符
filename = self.sanitize_filename(filename)
try:
# 发送请求下载视频或音频文件
resp = requests.get(url, headers=self.headers).content
download_path = os.path.join('D:\\video', filename) # 构造下载路径
with open(download_path, mode='wb') as file:
file.write(resp)
print("{:*^30}".format(f"下载完成:{filename}"))
except Exception as e:
print(e)
def sanitize_filename(self, filename):
# 定义不合规字符的正则表达式
invalid_chars_regex = r'[\"*<>?\\|/:,]'
# 替换不合规字符为空格
sanitized_filename = re.sub(invalid_chars_regex, ' ', filename)
return sanitized_filename
def merge_video_audio(self, video_path, audio_path, output_path):
"""
使用ffmpeg来合并视频和音频。
"""
try:
command = [
'ffmpeg',
'-y', # 覆盖输出文件如果它已经存在
'-i', video_path, # 输入视频路径
'-i', audio_path, # 输入音频路径
'-c', 'copy', # 复制原始数据,不进行转码
output_path # 输出视频路径
]
subprocess.run(command, check=True)
print(f"视频和音频合并完成:{output_path}")
except subprocess.CalledProcessError as e:
print(f"合并失败: {e}")
def main():
# 批量获取多个视频的bid
bvids = [
# 0 1 2 3
"BV187411i7zw","BV1wi4y1E7E6","BV1Gz4y1X7vh","BV1Lh411d7Lw", # 1
# 4 5 6 7
"BV1mJ411D7QB","BV1Z5411w7Xb","BV1op4y167kS","BV1Mp4y1p7Ck", # 2
# 8 9 10 11
"BV1nJ41187Zy","BV1qb4y1Z7JK","BV1f5411379u","BV1kt4y1Q792", # 3
# 12 13 14 15
"BV1Qy4y1e7kk","BV1T7411T7q6","BV1k64y1k7QL","BV1J5411c7Rw", # 4
# 16 17 18 19
"BV1Db4y1y7yL","BV1cC4y1878T","BV11y4y1z7bY","BV1LJ411S7ML", # 5
# 20 21 22 23
"BV1X54y1L7mt","BV1S64y1D7HM","BV1rK4y1d7mZ","BV1b64y1y7AE", # 6
# 24 25 26 27
"BV1TK411F7MU","BV1HN411f7Em","BV1QA411x7KB","BV1pM4y1K7Ao", # 7
# 28 29 30 31
"BV1os4y1s7Aw","BV1sv411e71L","BV1xZ4y1A7gn","BV1E3411B7Q3", # 8
# 32 33 34 35
"BV1664y1d78D","BV1xv41177MR","BV13q4y1S7y1","BV1kJ411H7y8", # 9
# 36 37 38 39
"BV1Cq4y1Z7pM","BV1Jf4y147U7","BV1az4y117h4","BV1gy4y1h7wS", # 10
]
for bvid in bvids:
try:
bilibili = BilibiliVideoAudio(bvid)
video_audio_info = bilibili.get_video_audio()
title = video_audio_info['title']
video_url = video_audio_info['video_url']
audio_url = video_audio_info['audio_url']
bilibili.download_video_audio(video_url, f"{title}.mp4") # 下载视频
bilibili.download_video_audio(audio_url, f"{title}.mp3") # 下载音频
except Exception as ex:
print(f"Failed to download video/audio for {bvid}: {ex}")
processed_videos_path = 'D:\\processed_videos'
if not os.path.exists(processed_videos_path):
os.makedirs(processed_videos_path)
# 注意:B站和其他短视频平台的视频不同,需要分别下载视频和音频,最后将视频和音频拼接到一块
for bvid in bvids:
try:
bilibili = BilibiliVideoAudio(bvid)
video_audio_info = bilibili.get_video_audio()
title = video_audio_info['title']
video_url = video_audio_info['video_url']
audio_url = video_audio_info['audio_url']
video_filename = f"{title}.mp4"
audio_filename = f"{title}.mp3"
output_filename = f"{title} - combined.mp4"
video_file_path = os.path.join('D:\\video', video_filename)
audio_file_path = os.path.join('D:\\video', audio_filename)
output_file_path = os.path.join(processed_videos_path, output_filename)
bilibili.download_video_audio(video_url, video_filename) # 下载视频
bilibili.download_video_audio(audio_url, audio_filename) # 下载音频
bilibili.merge_video_audio(video_file_path, audio_file_path, output_file_path) # 合并视频和音频
# Optional: Delete the separate files after merge
# os.remove(video_file_path)
# os.remove(audio_file_path)
except Exception as ex:
print(f"Failed to process video/audio for {bvid}: {ex}")
main()
6.查找所需数据
1)bvid
2)referer / origin / User-Agent / Accept-Encoding
结尾
希望大家喜欢我的分享!!!