groovy爬虫爬取城市历史天气数据

本人在使用基于java的脚本语言groovy做爬虫时,用了获取城市历史天气的任务做练习。数据源隐藏了,有需要的我可以直接发数据。使用过程中虽然有些绊脚石,总体来说还是很良好的,脚本语言groovy相比java的确省事儿很多。分享代码,供大家参考。(城市对应的编码存在一个js里面了,这里不写)

package com.fan

import com.fission.source.httpclient.ApiLibrary
import com.fission.source.httpclient.FanRequest
import com.fission.source.mysql.MySqlTest
import com.fission.source.source.WriteRead
import com.fission.source.utils.Log
import net.sf.json.JSONException
import net.sf.json.JSONObject

class Weather extends ApiLibrary {

/**
 * 获取城市2011-2018年数据
 * @param cityId
 */
    static getCityAll(int cityId) {
        for (int j in 2011..2018) {
            getCityYear(cityId, j)
            sleep(1000 + getRandomInt(1000))
        }
    }

/**
 * 获取当年的数据
 * @param cityId
 * @param year
 */
    static getCityYear(int cityId, int year) {
        for (int i in 1..12) {
            if (year == 2018 && i > 9) continue
            getMonth(cityId, year, i)
            sleep(1000 + getRandomInt(1000))
        }
    }

/**
 * 获取某个城市某一年某一月的数据
 * @param cityId
 * @param year
 * @param month
 */
    static getMonth(int cityId, int year, int month) {
        def yyyymm;
        def uri;
        if (year > 2016) {
            yyyymm = year * 100 + month
            uri = "http://tianqi.2345.com/t/wea_history/js/" + yyyymm + "/" + cityId + "_" + yyyymm + ".js"
        } else {
            yyyymm = year + EMPTY + month
            uri = "http://tianqi.2345.com/t/wea_history/js/" + cityId + "_" + yyyymm + ".js"
        }
        output(uri)
        def response = FanRequest.isGet()
                .setUri(uri)
                .getResponse()
                .getString("content")
                .substring(16)
                .replace(";", EMPTY)
        def weather = JSONObject.fromObject(response)
        def city = weather.getString("city")
        def array = weather.getJSONArray("tqInfo")
        output(array.size())
        for (int i in 0..array.size() - 1) {
            JSONObject info = array.get(i)
            if (!info.containsKey("ymd")) continue
            def date = info.getString("ymd")
            def low = info.getString("bWendu").replace("℃", EMPTY)
            def high = info.getString("yWendu").replace("℃", EMPTY)
            def wea = info.getString("tianqi")
            def wind = info.getString("fengxiang")
            def fengli = info.getString("fengli")
            def aqi = TEST_ERROR_CODE, aqiInfo = EMPTY, aqiLevel = TEST_ERROR_CODE;
            if (info.containsKey("aqi")) {
                aqi = info.getInt("aqi")
                aqiInfo = info.getString("aqiInfo")
                aqiLevel = info.getInt("aqiLevel")
            }
            String sql = "INSERT INTO weather (city,low,high,date,wind,windsize,weather,aqi,aqilevel,aqiinfo) VALUES (\"%s\",%d,%d,\"%s\",\"%s\",\"%s\",\"%s\",%d,%d,\"%s\");"
            sql = String.format(sql, city, changeStringToInt(low), changeStringToInt(high), date, wind, fengli, wea, aqi, aqiLevel, aqiInfo)
            output(sql)
            MySqlTest.sendWork(sql)
        }
    }
}

这里是数据库的数据截图:

欢迎有兴趣的朋友一起交流:QQ群号:340964272

猜你喜欢

转载自blog.csdn.net/Fhaohaizi/article/details/82387326