账号名:luowenqiang
密码: lWq159357
可以免费下载文件
代码:
import requests
import os
import time
from bs4 import BeautifulSoup
SPICE_URL = 'https://podaac-tools.jpl.nasa.gov' #需要拼接的URL
URL = 'https://podaac-tools.jpl.nasa.gov/drive/files/allData/topex/L2'
COOKIE = '_ga=GA1.2.2044435566.1575709094; _gid=GA1.2.984665774.1575709094; PODAAC_Drive=Xetpr3Zh1Qbc-8ALgXh2FQAAABI' #记录登录Cookie需要配置
g_cookies = dict(map(lambda x: x.split('='), COOKIE.split(";"))) #转换成用于传输的Cookie
base_path = "D:\\xzydown\\"
def mkdir(path):
# 引入模块
import os
# 去除首位空格
path = path.strip()
# 去除尾部 \ 符号
path = path.rstrip("\\")
# 判断路径是否存在
# 存在 True
# 不存在 False
isExists = os.path.exists(path)
# 判断结果
if not isExists:
# 如果不存在则创建目录# 创建目录操作函数
os.makedirs(path)
print('创建成功')
return True
else:
# 如果目录存在则不创建,并提示目录已存在
print('目录已存在')
return False
def DownloadFile(url,path,m_cookie=''):
'''
:param url: 下载链接
:param path: 保存路径
:param m_cookie: 设置Cookie值(可为空)
:return:
'''
headers = {'Proxy-Connection': 'keep-alive'}
#设置Cookies
r = requests.get(url, stream=True, headers=headers,cookies = g_cookies)
length = float(r.headers['content-length'])
f = open(path, 'wb')
count = 0
count_tmp = 0
time1 = time.time()
for chunk in r.iter_content(chunk_size=512):
if chunk:
f.write(chunk)
count += len(chunk)
if time.time() - time1 > 2:
p = count / length * 100
speed = (count - count_tmp) / 1024 / 1024 / 2
count_tmp = count
print(path + ': ' + formatFloat(p) + '%' + ' Speed: ' + formatFloat(speed) + 'M/S')
time1 = time.time()
f.close()
print("文件保存在:"+path)
def formatFloat(num):
return '{:.2f}'.format(num)
def getTestUrl(test_url):
res = requests.get(test_url,cookies=g_cookies)
bs_html = BeautifulSoup(res.text,'lxml')
# 获取
tmd = bs_html.find_all(class_= 'table-responsive')
if len(tmd) == 0:
#此处是未知类型的下载 测试一波
sp_list = test_url.split('/')
sp_name = sp_list[len(sp_list)-1]
sp_path = base_path + sp_list[len(sp_list)-2] + "\\"
mkdir(sp_path) #创建文件夹
DownloadFile(test_url,sp_path+sp_name) #下载该文件
return
a_list = tmd[0].find_all('a')
for l in a_list:
# 过滤掉这个
if l.text != 'Parent Directory':
cur_url = SPICE_URL + l.get('href').strip()
# 添加判断是否需要下载
if cur_url.find('.',30) < 0:
#继续遍历
getTestUrl(cur_url)
else:
#此处可以直接下载:
cur_url_name_list = cur_url.split('/')
cur_url_name = cur_url_name_list[len(cur_url_name_list)-1]
down_path = base_path + cur_url_name_list[len(cur_url_name_list)-2]+"\\"
mkdir(down_path)
DownloadFile(cur_url,down_path+cur_url_name)
print("需要下载的链接:" + cur_url)
#下载方法
if __name__ == '__main__':
getTestUrl(URL)
print('============================运行结束标志===========================')