urllib简单爬取m3u8地址的所有ts文件并下载

#!/usr/bin/env python
# coding=utf-8
# 爬取m3u8地址的所有ts文件并下载
# https://newplayers.pe62.com/mdparse/m3u8.php?id=https://cdn.youku-letv.com/20180709/Vq6F9hHv/index.m3u8
import requests
import os
import time
import urllib.request

# 将来需要拼接的每一个ts视频文件地址的开头
begin_url = "https://cdn.youku-letv.com/20180709/Vq6F9hHv/"
length = len(begin_url)
# m3u8地址,下载下来会看到很多个ts文件名字组成
url = "https://cdn.youku-letv.com/20180709/Vq6F9hHv/index.m3u8"
response = requests.get(url)
all_content = response.text
# 按照结尾的换行符进行切片操作
file_line = all_content.split("\n")
# 存储将来拼接的所有ts链接地址
url_list = []
header = {
    'User-Agent': 'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0);'
}
for index, line in enumerate(file_line):
    if "EXTINF" in line:
        pd_url = begin_url + file_line[index + 1]  # 拼出ts片段的URL
        # print(pd_url)
        url_list.append(pd_url)
        # file_name = file_line[index+1][8:-3]
for i in range(len(url_list)):
        add_url= url_list[i]
        request = urllib.request.Request(add_url,headers=header)
        response = urllib.request.urlopen(request)
        html = response.read()
        file_name= url_list[i][length:][8:-3]
        time.sleep(1)
        path = "D:/video2/"
        if (not os.path.exists(path)):
            os.makedirs(path)
        with open(path+file_name+'ts',"wb")as f:
            f.write(html)

猜你喜欢

转载自blog.csdn.net/z564359805/article/details/81055321
今日推荐