通过访问tiktok的直播间网页,从网页的script标签内部提取出关于该直播间的相关信息的JSON串,最终从JSON里提取出直播视频流的hls地址和直播间的其他信息。
import sys
import requests
import json
import time
import subprocess
from urllib.parse import urlunparse
from bs4 import BeautifulSoup
from urllib.parse import urlparse
def get_tiktok_live_data_from_pc(user_agent, pc_live_url):
headers = {
'User-Agent': user_agent
}
response = requests.get(pc_live_url, headers=headers)
html_str = response.text
soup = BeautifulSoup(html_str, 'html.parser')
script = soup.find('script', id='SIGI_STATE', type='application/json')
json_data = {}
target_str = script.string.strip()
if target_str is not None and "live