获取知乎热点前十名与其热度

import requests
import re
import pandas as pd
import os


#请求网页
headers = { 'User-Agent': '5498'}
response = requests.get('https://tophub.today/n/mproPpoq6O',headers=headers)
html=response.text
#print(html)

#解析网页与抓取信息
urls = re.findall('<a href=.*? target="_blank" .*?>(.*?)</a>',html)[3:13]
redu = re.findall('<td>(.*?)</td>',html)[0:10]


#完成创建
dict = {'标题':urls,'热度':redu}
newfile = pd.DataFrame(dict)
os.chdir(r'C:\Users\a5498\Desktop')
newfile.to_csv('file.csv')
print(newfile)

 

猜你喜欢

转载自www.cnblogs.com/lnico/p/12521951.html