import requests,re def getdetail(url): response = requests.get(url) html = response.content.decode('gbk') # 电影详情页标题 movie_title_name = re.search('<h1><font color=#07519a>(.*)</f',html) movie_title = movie_title_name.group(1) # 电影 磁力 magnet movie_magnet_url = re.search('/><a href="(.*)"><str',html) # print(movie_magnet.group(1)) movie_magnet = movie_magnet_url.group(1) # torrent种子 movie_torrent_url = re.search('ddf"><a href="(.*)">ft',html) movie_torrent = movie_torrent_url.group(1) # print(movie_torrent.group(1)) # 这个列表用来title movie_title_list = [] movie_title_list.append(movie_title) # 这个列表两个下载的链接 movie_down_url = [] movie_down_url.append(movie_magnet) movie_down_url.append(movie_torrent) movie_down_url_all = [] movie_down_url_all.append(movie_down_url) movie_dict = dict(zip(movie_title_list,movie_down_url_all)) print(movie_dict) def getpage(): for i in range(1,178): lurl = 'http://www.dytt8.net/html/gndy/dyzz/list_23_%s.html' % i response = requests.get(lurl) html = response.text movie_url_list = re.findall('<a href="(.*)" class="ulink"',html) for movie_item in movie_url_list: movie_url = 'http://www.dytt8.net'+movie_item getdetail(movie_url) if __name__ == '__main__': getpage()
电影
猜你喜欢
转载自www.cnblogs.com/lxh777/p/9503233.html
今日推荐
周排行