代码:
import random
import time
import urllib.request
import urllib.response
import pymysql
from lxml import etree
import schedule
def sprider():
currenturl1 = "https://tianqi.moji.com/weather/china/jilin/baishan" # 白山 墨迹天气当前温度和描述
currenturl2 = "https://tianqi.moji.com/weather/china/sichuan/yingshan-county" # 营山 墨迹天气当前温度和描述
minmaxurl1 = "http://www.weather.com.cn/weather1d/101060901.shtml" # 白山 白山天气网 预测温度和描述
minmaxurl2 = "http://www.weather.com.cn/weather1d/101270503.shtml" # 营山 白营山天气网 预测温度和描述
place1 = "白山"
place2 = "营山"
useragent = [
"Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0)",
"Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0)",
"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0)",
"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:2.0.1) Gecko/20100101 Firefox/4.0.1",
"Mozilla/5.0 (Windows NT 6.1; rv:2.0.1) Gecko/20100101 Firefox/4.0.1",
"Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; en) Presto/2.8.131 Version/11.11",
"Opera/9.80 (Windows NT 6.1; U; en) Presto/2.8.131 Version/11.11",
"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Maxthon 2.0)",
"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; TencentTraveler 4.0)",
"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)",
"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; The World)"]
headers1 = {
'User-Agent': useragent[random.randint(0, 3)]}
headers2 = {
'User-Agent': useragent[random.randint(4, 6)]}
headers3 = {
'User-Agent': useragent[random.randint(7, 9)]}
headers4 = {
'User-Agent': useragent[random.randint(9, 11)]}
currenturl1 = urllib.request.Request(url=currenturl1, headers=headers1)
currenturl2 = urllib.request.Request(url=currenturl2, headers=headers2)
minmaxurl1 = urllib.request.Request(url=minmaxurl1, headers=headers3)
minmaxurl2 = urllib.request.Request(url=minmaxurl2, headers=headers4)
currentreponse1 = urllib.request.urlopen(currenturl1)
currentreponse2 = urllib.request.urlopen(currenturl2)
minmaxreponse1 = urllib.request.urlopen(minmaxurl1)
minmaxreponse2 = urllib.request.urlopen(minmaxurl2)
currenthtml1 = currentreponse1.read().decode('utf-8')
currenthtml2 = currentreponse2.read().decode('utf-8')
minmaxhtml1 = minmaxreponse1.read().decode('utf-8')
minmaxhtml2 = minmaxreponse2.read().decode('utf-8')
currenthtml1 = etree.HTML(currenthtml1)
currenthtml2 = etree.HTML(currenthtml2)
minmaxhtml1 = etree.HTML(minmaxhtml1)
minmaxhtml2 = etree.HTML(minmaxhtml2)
# 当前天气度数 当前天气描述 墨迹天气
currentwer1 = currenthtml1.xpath("//div[@class='left']//div[@class='wea_weather clearfix']/em/text()") # 当前天气度数
currentdes1 = currenthtml1.xpath("//div[@class='left']//div[@class='wea_weather clearfix']/b/text()") # 当前天气描述
currentwer2 = currenthtml2.xpath("//div[@class='left']//div[@class='wea_weather clearfix']/em/text()") # 当前天气度数
currentdes2 = currenthtml2.xpath("//div[@class='left']//div[@class='wea_weather clearfix']/b/text()") # 当前天气描述
# 中国天气网预测 描述
# //div[@class='t']//ul[@class='clearfix']/li/p[@class='wea']/text() 描述
forecastdes1 = minmaxhtml1.xpath(
"//div[@class='t']//ul[@class='clearfix']/li/p[@class='wea']/text()")
forecastdes1 = forecastdes1[0] + "转" + forecastdes1[1]
forecastdes2 = minmaxhtml2.xpath(
"//div[@class='t']//ul[@class='clearfix']/li/p[@class='wea']/text()")
forecastdes2 = forecastdes2[0] + "转" + forecastdes2[1]
# 中国天气网预测 最低气温最高气温
# //div[@class='t']//ul[@class='clearfix']//p[@class='tem']/span//text()
maxminwer1 = minmaxhtml1.xpath("//div[@class='t']//ul[@class='clearfix']//p[@class='tem']/span//text()")
min1 = maxminwer1[0]
max1 = maxminwer1[1]
minmax1 = min1 + "/" + max1 + "℃"
maxminwer2 = minmaxhtml2.xpath("//div[@class='t']//ul[@class='clearfix']//p[@class='tem']/span//text()")
min2 = maxminwer2[0]
max2 = maxminwer2[1]
minmax2 = min2 + "/" + max2 + "℃"
date = int(time.time())
currentwer1 = currentwer1[0] + "℃"
currentwer2 = currentwer2[0] + "℃"
# print(place1)
# print("当前实时温度:" + currentwer1)
# print("当前描述:" + currentdes1[0])
# print("预测描述:" + forecastdes1)
# print("预测最高最低气温:" + minmax1)
#
# print(place2)
# print("当前实时温度:" + currentwer2)
# print("当前描述:" + currentdes2[0])
# print("预测描述:" + forecastdes2)
# print("预测最高最低气温:" + minmax2)
# 打开数据库连接
db = pymysql.connect("****", "****", "****", "****", port=3306, charset='utf8')
# 使用cursor()方法获取操作游标
cursor = db.cursor()
# SQL 插入语句
sql = """INSERT INTO weather(
placeme,currentwerme,minmaxme,currentdesme,forecastdesme,
placeyou,currentweryou,minmaxyou,currentdesyou,forecastdesyou,
updatedate
)
VALUES ('{0}','{1}','{2}','{3}','{4}','{5}','{6}','{7}','{8}','{9}','{10}')""".format(place1,
currentwer1,
minmax1,
currentdes1[0],
forecastdes1,
place2,
currentwer2,
minmax2,
currentdes2[0],
forecastdes2, date)
try:
cursor.execute(sql) # 执行sql语句
db.commit() # 提交到数据库执行
# results = cursor.fetchall() 查询
except:
db.rollback() # 如果发生错误则回滚
# 关闭数据库连接
db.close()
# 每2个小时执行一次sprider函数
schedule.every(120).minutes.do(sprider)
while True:
# 启动服务
schedule.run_pending()
time.sleep(1)
服务器端运行
ps -ef |grep python //查看运行的python程序
kill -9 进程名 //杀掉进程
nohup python3 -u test.py > test.log 2>&1 & //后台运行(输出日志)
//需要注意这里用python3 和pip3
nohup python3 -u test.py >/dev/null 2>&1& //后台运行(不输出日志)