# -*- coding: utf-8 -*- import requests from bs4 import BeautifulSoup import random #爬取音乐V榜 def get_html(url): try: r=requests.get(url,timeout=30) r.raise_for_status() r.encoding=r.apparent_encoding return r.text except: print ('wrong') def get_content(url): html=get_html(url); soup=BeautifulSoup(html,'lxml'); #获取对象 ul=soup.find('ul',attrs={'class':'area_three area_list'}) #获取集合 li_list=ul.find_all('li',attrs={'name':'dmvLi'}) for li in li_list: #print (li) name=li.find('a',attrs={'class':'mvname'}).text singer=li.find('a',attrs={'class':'special'}).text time=li.find('p',attrs={'class':'c9'}).text sco=li.find('div',attrs={'class':'score_box'}).h3.text print ('歌名:{}\t{}\n演唱者:{}\n评分:{}'.format(name,time,singer,sco)) def get_agent(): ''' 模拟header的user-agent字段, 返回一个随机的user-agent字典类型的键值对 ''' agents = ['Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0;', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv,2.0.1) Gecko/20100101 Firefox/4.0.1', 'Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; en) Presto/2.8.131 Version/11.11', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11', 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; 360SE)'] fakeheader = {} fakeheader['User-agent'] = agents[random.randint(0, len(agents))] return fakeheader def get_proxy(): ''' 简答模拟代理池 返回一个字典类型的键值对, ''' proxy = ["http://116.211.143.11:80", "http://183.1.86.235:8118", "http://183.32.88.244:808", "http://121.40.42.35:9999", "http://222.94.148.210:808"] fakepxs = {} fakepxs['http'] = proxy[random.randint(0, len(proxy))] return fakepxs def main(url): url_list=['ML','HT','KR','JP'] for index in url_list: get_content(url+index) if __name__=='__main__': url='http://vchart.yinyuetai.com/vchart/trends?area=' main(url);
python爬虫七:爬取音乐V榜
猜你喜欢
转载自blog.csdn.net/qq_38788128/article/details/80486997
今日推荐
周排行