XPath初学(爬取中国天气网,并做出可视化界面)

爬取中国天气网,并做出可视化界面

发现码字真的也是一门技术活,刚开始还可以码码字,现在根本码不动了,就是随便记录下自己的学习过程吧,不定期更新,毕竟还是要毕业的啊,没很多时间一直学这个。

import requests
from lxml import etree
from pyecharts.charts import Bar
from pyecharts import options as opt

BASE_INFO = []

def parse_page(html):
    a = []
    coon = html.xpath("//div[@class='conMidtab']")[0]
    print(coon)
    conmidtab = coon.xpath(".//div[@class='conMidtab2']")
    info = {}
    #print(etree.tostring(conmidtab,encoding='utf-8').decode('utf-8'))
    for trss in conmidtab:
        trs = trss.xpath(".//tr")[2:]
        for index,tr in enumerate(trs):
            #print(etree.tostring(tr, encoding='utf-8').decode('utf-8'))
            if index == 0:
                city =(tr.xpath(".//td[2]/a/text()"))
            else:
                city =(tr.xpath(".//td[1]/a/text()"))
            td = tr.xpath(".//td")[-2]
            #print(td)
            temp = td.xpath("./text()")[0]
            #print(temp)
            BASE_INFO.append({'city':city,'temp':int(temp)})
    for value in BASE_INFO:
        print(value)

def get_html(url):
    headers = {
        'User-Agent':'Mozilla/5.0(Windows NT 10.0;Win64;x64) AppleWebKit/537.36(KHTML,likeGecko) Chrome/76.0.3809.132Safari/537.36'
    }
    response = requests.get(url, headers=headers)
    if response.status_code == 200:
        text = response.content.decode('utf-8')
        html = etree.HTML(text)
        return html
    else:
        print(response.status_code)
        return None


def main():
    urls = ['http://www.weather.com.cn/textFC/hb.shtml','http://www.weather.com.cn/textFC/db.shtml',
           'http://www.weather.com.cn/textFC/hz.shtml','http://www.weather.com.cn/textFC/hd.shtml',
           'http://www.weather.com.cn/textFC/hn.shtml','http://www.weather.com.cn/textFC/xb.shtml',
           'http://www.weather.com.cn/textFC/xn.shtml']
    for url in urls:
        html = get_html(url)
        #print(etree.tostring(html,encoding='utf-8').decode('utf-8'))
        parse_page(html)
    BASE_INFO.sort(key=(lambda BASE_INFO:BASE_INFO['temp']),reverse=True)
    print(BASE_INFO)
    cities = (list(map(lambda x:x['city'],BASE_INFO)))[0:10]
    temps = list(map(lambda X:X['temp'],BASE_INFO))[0:10]
    
    return cities,temps
    
if __name__ == '__main__':
    cities,temps=main()
    #bar.render_notebook()
    bar = (
    Bar()
    .add_xaxis(cities)
    .add_yaxis('',temps)
    .set_global_opts(title_opts=opt.TitleOpts(title="气温排行版"))
    )
    bar.render('F:\\气温.html')

在这里插入图片描述

猜你喜欢

转载自blog.csdn.net/qq_36662353/article/details/100540858