知乎热榜前十

import requests
from bs4 import BeautifulSoup
import pandas as pd

url='https://tophub.today/n/mproPpoq6O'
headers={'User-Agent':url}
r=requests.get(url,headers=headers)
#print(r.text)  显示页面结构
soup=BeautifulSoup(r.content,'html.parser')

title=soup.find_all("td",{"class":"al"}) 
g_list_content=[]  #建立全局总列表
#for title in title[:10]:
    #print("title:"+title.get_text())
tmp_content = []   
content=soup.find_all("td")
index = 1
for val in content[:40]:
    if index == 2 or index == 3:
        tmp_content.append(val.get_text())
    if index == 4:
        g_list_content.append(tmp_content)
        #print(tmp_content)
        tmp_content = []
        index = 0
    #print("content:"+val.get_text())
    index = index + 1
   
#print(g_list_content)
#df=pd.DataFrame({'标题':'aa','热度':'bb'})
   
df=pd.DataFrame(g_list_content,columns=['标题','热度'])
filename="知乎热榜前十"
df.to_csv('C:\\wenjian\\aaa.csv',encoding="utf-8")

猜你喜欢

转载自www.cnblogs.com/123OC/p/12536827.html