爬虫入门-4-1.利用BeautifulSoup获取中国天气网最低气温

import requests
from bs4 import BeautifulSoup
from pyecharts import Bar

# 所有城市的最低温度
ALL_DATA = []


def parse_url(url):
    headers = {
        'User-Agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
                      " (KHTML, like Gecko) Chrome/72.0.3626.121 Safari/537.36",
        'Referer': 'http://www.weather.com.cn/textFC/hz.shtml'
    }
    response = requests.get(url, headers=headers)
    text = response.content.decode("utf-8")
    soup = BeautifulSoup(text, "html5lib")
    # 由于港澳台页面表格不标准,故需要采取容错能力更强的解析器HTML5lib
    conMidtab = soup.find("div", class_='conMidtab')
    tables = conMidtab.find_all("table")
    for table in tables:
        trs = table.find_all("tr")[2:]
        for index, tr in enumerate(trs):
            tds = tr.find_all("td")
            city_td = tds[0]
            if index == 0:
                city_td = tds[1]
            city = list(city_td.stripped_strings)[0]
            temp_td = tds[-2]
            temp = list(temp_td.stripped_strings)[0]
            ALL_DATA.append({"city": city, "min_temp": int(temp)})
            print({"city": city, "min_temp": temp})


def main():
    areas = ['hb.shtml', 'db.shtml', 'hd.shtml', 'hz.shtml', 'hn.shtml', 'xb.shtml', 'xn.shtml', 'gat.shtml']
    base_url = 'http://www.weather.com.cn/textFC/'
    for area in areas:
        url = base_url + area
        parse_url(url)
    ALL_DATA.sort(key=lambda data: data['min_temp'])
    data = ALL_DATA[0:10]
    cities = list(map(lambda x: x['city'], data))
    min_temps = list(map(lambda x: x['min_temp'], data))
    chart = Bar("中国天气最低气温排行")
    chart.add("", cities, min_temps)
    chart.render('temperature.html')


if __name__ == '__main__':
    main()

  

猜你喜欢

转载自www.cnblogs.com/min-R/p/10506628.html