"""
__title__ = ''
__author__ = 'Thompson'
__mtime__ = '2018/8/28'
# code is far away from bugs with the god animal protecting
I love animals. They taste delicious.
┏┓ ┏┓
┏┛┻━━━┛┻┓
┃ ☃ ┃
┃ ┳┛ ┗┳ ┃
┃ ┻ ┃
┗━┓ ┏━┛
┃ ┗━━━┓
┃ 神兽保佑 ┣┓
┃ 永无BUG! ┏┛
┗┓┓┏━┳┓┏┛
┃┫┫ ┃┫┫
┗┻┛ ┗┻┛
"""
import requests
import re
if __name__ == "__main__":
url = "http://hotel.meituan.com/beijing/"
headers = {"User-Agent":"Mozilla/5.0 (compatible; WOW64; MSIE 10.0; Windows NT 6.2)"}
response = requests.get(url,headers = headers)
html = response.content.decode()
print(html)
pat_1 = re.compile(r'<article class="poi-item".*?>(.*?)</article>',re.S | re.M)
pat_2 = re.compile(r'<h3.*?>.*?<a.*?>.*?<em.*?>.*?</em>(.*?)</a>',re.S | re.M) # 标题
pat_3 = re.compile(r'<h3.*?>.*?<a href="(.*?)".*?>', re.S | re.M) # url
pat_4 = re.compile(r'<div class="poi-grade".*?([0-9.]+)', re.S | re.M) # 评分
pat_5 = re.compile(r'<div class="poi-price".*?<em data-v-5be45891>(.*?)</em>')
ls = pat_1.findall(html)
print(len(ls))
for item in ls:
matchObj = pat_2.search(item)
if matchObj:
title = matchObj.group(1)
print(title)
matchObj = pat_3.search(item)
if matchObj:
url = matchObj.group(1)
print(url)
matchObj = pat_4.search(item)
if matchObj:
score = matchObj.group(1)
print(score)
matchObj = pat_5.search(item)
if matchObj:
price = matchObj.group(1)
print(price)
爬虫07-美团
猜你喜欢
转载自blog.csdn.net/qwerLoL123456/article/details/83143602
今日推荐
周排行