import re
import requests
def parse_url(url):
headers={"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.132 Safari/537.36"}
response=requests.get(url,headers=headers)
text=response.text
contents=re.findall(r'<div class="content">.*?</div>',text,re.S)
ends=[]
for content in contents:
x=re.sub(r'<.*?>',"",content)
ends.append(x.strip())
for end in ends:
print(end)
print("#"*30)
def main():
for i in range(1,10):
url="https://www.qiushibaike.com/text/page/%s/"%i
parse_url(url)
main()
爬虫实战——正则表达式爬取糗事百科
猜你喜欢
转载自blog.csdn.net/devilangel2/article/details/105442149
今日推荐
周排行