python 念听网mp3多线程多任务下载

#coding=utf-8
import requests
import threading
import re

class downloader:
    # 构造函数
    def __init__(self,downloadurl):
        # 设置url
        #downloadurl='http://file.ting55.com/down/2016/10/8030/15.mp3?s=nb2ZbTb7gOGbMQ4JybtcpQ&e=1490250302&n=五大贼王1落马青云_15.mp3'

        self.url = downloadurl.split('&n=')[0]
        # 设置线程数
        self.num = 8
        # 文件名从url最后取
        self.name = downloadurl.split('&n=')[1]
        # 用head方式去访问资源
        r = requests.head(self.url)
        # 取出资源的字节数
        self.total = int(r.headers['Content-Length'])
        print 'total is %s' % (self.total)

    def get_range(self):
        ranges = []
        # 比如total是50,线程数是4个。offset就是12
        offset = int(self.total / self.num)
        for i in range(self.num):
            if i == self.num - 1:
                # 最后一个线程,不指定结束位置,取到最后
                ranges.append((i * offset, ''))
            else:
                # 没个线程取得区间
                ranges.append((i * offset, (i + 1) * offset))
        # range大概是[(0,12),(12,24),(25,36),(36,'')]
        return ranges


    def download(self, start, end):
        headers = {'Range': 'Bytes=%s-%s' % (start, end), 'Accept-Encoding': '*'}
        # 获取数据段
        res = requests.get(self.url, headers=headers)
        # seek到指定位置
        print '%s:%s download success' % (start, end)
        self.fd.seek(start)
        self.fd.write(res.content)

    def run(self):
        # 打开文件,文件对象存在self里
        self.fd = open(r'E:/PycharmProjects/download/output/'+self.name, 'w')
        thread_list = []
        n = 0
        for ran in self.get_range():
            start, end = ran
            print 'thread %d start:%s,end:%s' % (n, start, end)
            n += 1
            # 开线程
            thread = threading.Thread(target=self.download, args=(start, end))
            thread.start()
            thread_list.append(thread)
        for i in thread_list:
            # 设置等待
            i.join()
        print 'download %s load success' % (self.name)
        self.fd.close()

def getPage(url):
    html=requests.get(url)
    content=html.text
    #print content
    return content


def getDownloadUrls(page_content):
    pattern=re.compile(r'<a href="(.*?)".*?>',re.S)
    downloadUrls=re.findall(pattern,page_content)
    return downloadUrls[-11]

if __name__ == '__main__':
    for i in xrange(1, 61):
        url = "http://ting55.com/down/8030-%d" % i
        page_content = getPage(url)
        downloadLink = getDownloadUrls(page_content)
        # 新建实例
        down = downloader(downloadLink)
        # 执行run方法
        down.run()

猜你喜欢

转载自blog.csdn.net/DAo_1990/article/details/65443652