爬虫实战——正则表达式爬取糗事百科

import re
import requests


def parse_url(url):
    headers={"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.132 Safari/537.36"}
    response=requests.get(url,headers=headers)
    text=response.text
    contents=re.findall(r'<div class="content">.*?</div>',text,re.S)
    ends=[]
    for content in contents:
        x=re.sub(r'<.*?>',"",content)
        ends.append(x.strip())
    for end in ends:
        print(end)
        print("#"*30)



def main():
    for i in range(1,10):
        url="https://www.qiushibaike.com/text/page/%s/"%i
        parse_url(url)
main()
发布了35 篇原创文章 · 获赞 4 · 访问量 2338

猜你喜欢

转载自blog.csdn.net/devilangel2/article/details/105442149