版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/Yk_0311/article/details/82457509
import requests
from pyquery import PyQuery as pq
url = "https://www.zhihu.com/explore"
hd = {'User-Agent': 'Mozilla/5.0'}
try:
r = requests.get(url, headers=hd)
r.raise_for_status()
r.encoding = r.apparent_encoding
except:
print('=====')
doc = pq(r.text)
items = doc('.explore-tab .feed-item').items() # 找到class="explore-feed feed-item"的标签
# print(items)
for item in items:
question = item.find('h2').text() # 找到问题
# print(question)
author = item.find('.author-link-line').text()#找到作者
# print(author)
answer=pq(item.find('.content').html()).text()#找到回答
# print(answer)
with open("explore.txt",'a',encoding='utf-8') as f:#保存
f.write('\n'.join([question,author,answer]))
f.write('\n'+'='*50+'\n')
主要:pyquery解析库的使用