By crawling reptile recent information on the tender Sichuan public resources trading platform --- URLConnection

By reptile crawling public resources trading platform (Sichuan Province) the most recent bid information 

A: the introduction of specific dependency JSON

<dependency>
       <groupId>net.sf.json-lib</groupId>
      <artifactId>json-lib</artifactId>
      <version>2.4</version>
     <classifier>jdk15</classifier>
</dependency>

II: Get URLConnection connection request url

package com.svse.pachong;
import java.io.InputStream;
import java.net.HttpURLConnection;
import java.net.URL;
import java.net.URLConnection;

Import the org.apache.log4j.Logger;
/ **
* acquired by the connection request url URLConnection
* @author Lenovo
* @date dated 22 is 2019 years. 1 Day
* Description:
* /
public class open_url_test {

  public static Logger Logger = Logger. the getLogger (open_url_test.class);


  Boolean public OpenURL (String url_infor) throws Exception {
      the URL = new new URL the URL (url_infor);
      parent // connection classes, abstract
     the URLConnection urlConnection url.openConnection = ();

    // http connection class of
    HttpURLConnection httpURLConnection = (HttpURLConnection) urlConnection;

  / * set the request method, the default is the GET (for attachment server repository must GET, POST will return if it is 405.

        Accessories migration process which must be POST, differentiated. ) * /
       HttpURLConnection.setRequestMethod ( "GET");
    // Set the character encoding httpURLConnection.setRequestProperty ( "Charset", "UTF-8");
  // Open URL references to this resource communication link (if this has not been established connection).
  code = HttpURLConnection int. getResponseCode ();
  the System. OUT .println ( " code :" + code); // successful connection 200 is
  the try {
    the InputStream httpURLConnection.getInputStream inputStream = ();
    the System. OUT .println ( " connection successful ") ;
    Logger. info ( " open " + url_infor + " success! ");
    return to true;
the catch (Exception Exception) {
    . Logger info ( " open " + url_infor + " failed! ");
    return to false;
  }
    }
}

III: url parsed data desired by crawling, and returns the format json

package com.svse.pachong;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.URL;
import java.nio.charset.Charset;
import net.sf.json.JSONException;
import net.sf.json.JSONObject;

/ **
* url crawling through the resolution desired data, and returns json format
* @param urlString need a website url path crawling
* @return return json data results
* @throws IOException
* @throws JSONException
* /
class public readData {

  public static the JSONObject readData (String the urlString) throws IOException, {JSONException
       the InputStream IS = new new . the URL (the urlString) openStream ();
            the try {
      the BufferedReader RD = new new . the BufferedReader (new new the InputStreamReader (IS, the Charset the forName ( " UTF . 8 ")));
      the StringBuilder SB = new new the StringBuilder ();
      int cp;
      while ((cp = rd.read()) != -1) {
        sb.append((char) cp);
      }
      String jsonText = sb.toString();
      JSONObject json = JSONObject.fromObject(jsonText);
      return json;
    } finally {
      is.close();
     }
       }
}

IV: crawling inlet

package com.svse.pachong;
import java.io.IOException;
import net.sf.json.JSONArray;
import net.sf.json.JSONException;
import net.sf.json.JSONObject;

/**
* 爬取的入口
* @author lenovo
* @date 2019年1月22日
* description:
*/
public class Main {

  static String urlString = "http://www.scggzy.gov.cn/Info/GetInfoListNew?keywords=&times=4&timesStart=&timesEnd=&province=&area=&businessType=&informationType=&industryType=&page=1&parm=1534929604640";

      @SuppressWarnings("static-access")
   public static void main(String[] args) {

    open_url_test oUrl = new open_url_test();
    try {
      if (oUrl.openurl(urlString)) {
        readData rData = new readData();
        JSONObject json = rData.readData(urlString);
        JSONObject ob=JSONObject.fromObject(json);

        String data=ob.get("data").toString(); //JSONObject 转 String
        data="["+data.substring(1,data.length()-1)+"]";

        JSONArray json2=JSONArray.fromObject(data); //String 转 JSONArray
        for (int i = 0; i < 10; i++) {
          JSONObject   jsonObject = (JSONObject) json2.get(i);
          System.out.println("--------------------------------------------");
          System.out.println("项目: "+jsonObject.get("Title"));
          System.out.println("时间: "+jsonObject.get("CreateDateStr"));
          System.out.println(jsonObject.get("TableName"));
          System.out.println(jsonObject.get("Link"));
          System.out.println( jsonObject.get("province") +" "+jsonObject.get("username")+" "+jsonObject.get("businessType")+"             "+jsonObject.get("NoticeType"));
         }
      }else{
        System.out.println("解析数据失败!");
      }
    } catch (JSONException e) {
      e.printStackTrace();
    } catch (IOException e) {
      e.printStackTrace();
    } catch (Exception e) {
      e.printStackTrace();
    }
  }

}

 

Four: Test Results

  At this point, the entire crawling task is over!

Guess you like

Origin www.cnblogs.com/zhaosq/p/10304063.html