python之电影天堂爬取下载链接

示例仅供参考，如有违权请联系删除！

比如我们想要获取“2024必看影片”

鼠标右键，查看源文件

找到这里

匹配正则表达式

obj1 = re.compile(r"2024必看热片.*?<ul>(?P<ul>.*?)</ul>", re.S)
obj2 = re.compile((r"<a href='(?P<href>.*?)'"))
obj3 = re.compile(r'◎片　　名(?P<movie>.*?)<br />.*?<td '
                  r'style="WORD-WRAP: break-word" bgcolor="#fdfddf"><a href="(?P<download>.*?)">', re.S)

完整代码：

# http://dytt89.com/
# 1.定位2024必看片
# 2.从该位置提取到子页面的链接地址
# 3.请求页面的链接地址拿到我们要的下载地址
import requests
import re
import csv
import pandas as pd
import time

url = 'http://dytt89.com/'
resp = requests.get(url, verify=False)  # verify=False 去掉安全验证
resp.encoding = 'gb2312'  # 指定字符集
# print(resp.text)
# 拿到ul里面的li
obj1 = re.compile(r"2024必看热片.*?<ul>(?P<ul>.*?)</ul>", re.S)
obj2 = re.compile((r"<a href='(?P<href>.*?)'"))
obj3 = re.compile(r'◎片　　名(?P<movie>.*?)<br />.*?<td '
                  r'style="WORD-WRAP: break-word" bgcolor="#fdfddf"><a href="(?P<download>.*?)">', re.S)

result1 = obj1.finditer(resp.text)
child_href_list = []
for it in result1:
    ul = it.group('ul')
    # print(ul)

    # 提取href
    result2 = obj2.finditer(ul)
    for itt in result2:
        child_href = url + itt.group('href').strip('/')
        child_href_list.append(child_href)  # 保存起来

# 写入CSV文件
with open('data_movie.csv', mode='w', encoding='utf-8', newline='') as f:
    csvwriter = csv.writer(f)
    csvwriter.writerow(["电影名称", "电影下载链接"])  # 写入表头

    # 提取子页面内容
    for href in child_href_list:
        try:
            child_resp = requests.get(href, verify=False)
            child_resp.encoding = 'gb2312'
            result3 = obj3.search(child_resp.text)
            if result3:
                movie = result3.group('movie').strip()
                download = result3.group('download')
                print(movie, download)

                # 写入电影信息
                csvwriter.writerow([movie, download])

            time.sleep(1)  # 添加延时，避免过于频繁的请求
        except Exception as e:
            print(f"处理链接 {href} 失败: {e}")

# 读取CSV文件并转换为Excel
df = pd.read_csv('data_movie.csv')
df.to_excel('data_movie.xlsx', index=False)

结果展示:

复制任意链接，会跳转到迅雷进行下载

恭喜您学会了，快去试试吧！！！

python之电影天堂爬取下载链接

猜你喜欢