爬虫07-美团

"""
__title__ = ''
__author__ = 'Thompson'
__mtime__ = '2018/8/28'
# code is far away from bugs with the god animal protecting
    I love animals. They taste delicious.
              ┏┓      ┏┓
            ┏┛┻━━━┛┻┓
            ┃      ☃      ┃
            ┃  ┳┛  ┗┳  ┃
            ┃      ┻      ┃
            ┗━┓      ┏━┛
                ┃      ┗━━━┓
                ┃  神兽保佑    ┣┓
                ┃　永无BUG！   ┏┛
                ┗┓┓┏━┳┓┏┛
                  ┃┫┫  ┃┫┫
                  ┗┻┛  ┗┻┛
"""

import requests
import re

if __name__ == "__main__":
    url = "http://hotel.meituan.com/beijing/"
    headers = {"User-Agent":"Mozilla/5.0 (compatible; WOW64; MSIE 10.0; Windows NT 6.2)"}
    response = requests.get(url,headers = headers)
    html = response.content.decode()
    print(html)
    pat_1  = re.compile(r'<article class="poi-item".*?>(.*?)</article>',re.S | re.M)
    pat_2 = re.compile(r'<h3.*?>.*?<a.*?>.*?<em.*?>.*?</em>(.*?)</a>',re.S | re.M) # 标题
    pat_3 = re.compile(r'<h3.*?>.*?<a href="(.*?)".*?>', re.S | re.M)  # url
    pat_4 = re.compile(r'<div class="poi-grade".*?([0-9.]+)', re.S | re.M) # 评分
    pat_5 = re.compile(r'<div class="poi-price".*?<em data-v-5be45891>(.*?)</em>')

    ls = pat_1.findall(html)
    print(len(ls))
    for item in ls:
        matchObj = pat_2.search(item)
        if matchObj:
            title = matchObj.group(1)
            print(title)

        matchObj = pat_3.search(item)
        if matchObj:
            url = matchObj.group(1)
            print(url)

        matchObj = pat_4.search(item)
        if matchObj:
            score = matchObj.group(1)
            print(score)

        matchObj = pat_5.search(item)
        if matchObj:
            price = matchObj.group(1)
            print(price)
猜你喜欢