python 黑板课爬虫闯关-第一关

#!/usr/bin/python
# -*- coding:utf-8 -*-
# Author: LiTianle
# Time:2019/9/24 15:36
'''
<h3>你需要在网址后输入数字53639</h3>
<h3>下一个你需要输入的数字是10963. </h3>
'''
import requests,re
    
def get_num(s):
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36',
    }
    ex = '<h3>.*数字是?(\d+)'
    # 获取页面内容
    page_text = requests.get(url=s, headers=headers).text
    result=re.findall(ex, page_text, re.S)
    if result:
        # 匹配数字
        num = result[0]
        # 生成新url
        URL = 'http://www.heibanke.com/lesson/crawler_ex00/' + num
        print(URL)
        return get_num(URL)
    else:
        print('闯关成功,下一关:http://www.heibanke.com'+re.findall('<a href="(.*) class="btn btn-primary.*',page_text,re.S)[0])

if __name__ == '__main__':
    url = 'http://www.heibanke.com/lesson/crawler_ex00/'
    get_num(url)

猜你喜欢

转载自www.cnblogs.com/tianleblog/p/11672675.html