针对重庆沙坪坝区2021年02月份历史天气数据查询 - 重庆沙坪坝气温的2021年01-12月份进行数据爬虫,代码如下:
import requests
import pandas as pd
from bs4 import BeautifulSoup
import csv
def isexsit(li):
if '/' in li:
index = li.index('/')
zp ='%s%s%s'%(li[index-1],li[index],li[index+1])
print(zp)
li[index-1:index+1]=[]
li[index-1] = zp
return li
def not_empty(s):
return s and s.strip()
def get_data(url,i,filename):
resp = requests.get(url)
html = resp.content.decode('UTF-8')
soup = BeautifulSoup(html,'html.parser')
res=[]
li_list = soup.find_all('li')
#print(li_list)
dates,conditions,tempu,templ,AQI,wind,RRR = [],[],[],[],[],[],[]
with open(filename, 'a', errors='ignore', newline='') as f:
f_csv = csv.writer(f)
if i == 1:
for data in li_list[5:]:
sub_data = data.text.split()
sub_data = list(filter(not_empty, sub_data))
sub_data = isexsit(sub_data)
f_csv.writerow(sub_data)
print("sub_data1:",sub_data)
else:
for data in li_list[6:]:
sub_data = data.text.split()
sub_data = list(filter(not_empty, sub_data))
sub_data = isexsit(sub_data)
print("sub_data:",sub_data)
f_csv.writerow(sub_data)
# year是年
# month是月份,如month=11,就是取,1-11月的数据
year = '2021'
month = 12
for i in range(1,month+1):
print(i)
a = get_data("https://www.tianqi24.com/shapingba/history{0}{1:0>2d}.html".format(year,i),i,filename=f"res{year}.csv")
结果: