Java获取网页内容s实现自动化(IASI)

IasiClient
package com.iasi.client;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.io.PrintWriter;
import java.security.KeyManagementException;
import java.security.NoSuchAlgorithmException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Date;
import java.util.List;
import java.util.Properties;

import javax.net.ssl.SSLContext;
import javax.net.ssl.TrustManager;
import javax.net.ssl.X509TrustManager;

import org.apache.http.Header;
import org.apache.http.HttpEntity;
import org.apache.http.HttpHost;
import org.apache.http.HttpResponse;
import org.apache.http.NameValuePair;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.CookieStore;
import org.apache.http.client.HttpClient;
import org.apache.http.client.entity.UrlEncodedFormEntity;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.conn.params.ConnRoutePNames;
import org.apache.http.conn.scheme.Scheme;
import org.apache.http.conn.ssl.SSLSocketFactory;
import org.apache.http.cookie.Cookie;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.message.BasicHeader;
import org.apache.http.message.BasicNameValuePair;
import org.apache.http.util.EntityUtils;

public class IasiClient {
	private Properties props = new Properties();
	private String resDate;
	private String dataSize = "10";
	private String resStartDate;
	private String resEndDate;

	/*
	 * 根据dayBefore参数dates设定日期
	 */
	private void initProps(String[] dates) throws IOException {
		try {
			//引入iasi.properties配置文件
			InputStream is = new FileInputStream(new File(System.getProperty("user.dir")+"/iasi.properties")); 
			props.load(is);
		} catch (IOException e) {
			e.printStackTrace();
		}
		int dayBefer = 0;
		try {
			dayBefer = Integer.parseInt(props.getProperty("dayBefore"));   //dayBefer=2
		} catch (NumberFormatException e) {
			e.printStackTrace();
		}
		//如果dates为空,设置默认日期为(当天的日期-2),否则按照dates数组去设置
		if(dates.length==0)
		{
			Calendar c = Calendar.getInstance();
			c.add(Calendar.DATE, dayBefer * -1);
			String resDate = new SimpleDateFormat("yyyy-MM-dd").format(c.getTime());
			resStartDate=resDate;
			resEndDate=resDate;
		}
		else if(dates.length==1)
		{
			resStartDate=dates[0];
			resEndDate=dates[0];
		}else
		{
			resStartDate=dates[0];
			resEndDate=dates[1];
		}
		writeLogToFile("Grab date:" + resStartDate+" "+resEndDate);
	}

	/**
	 * 封装前几次请求获取的所有的参数和参数值到这里。
	 * @param httpclient
	 * @param sessionId
	 * @throws ClientProtocolException
	 * @throws IOException
	 */
	private void last(HttpClient httpclient, String sessionId) throws ClientProtocolException,IOException {
		HttpPost httpost = new HttpPost("http://www.class.ncdc.noaa.gov/saa/prod/orderNow");
		System.out.println("第五次请求链接地址(post):"+"http://www.class.ncdc.noaa.gov/saa/prod/orderNow");
		/*
		 * 以下为httpost信息:
			<?xml version="1.0" encoding="ISO-8859-1"?>
			-<qsResults 
					 xmlns:session-dir="http://saa.noaa.gov/session-dir"
					 xmlns:xsp-response="http://apache.org/xsp/response/2.0" 
					 xmlns:util="http://apache.org/xsp/util/2.0"
					 xmlns:esql="http://apache.org/cocoon/SQL/v2" 
					 xmlns:xspdoc="http://apache.org/cocoon/XSPDoc/v1" 
					 xmlns:log="http://apache.org/xsp/log/2.0" 
					 xmlns:xsp-session="http://apache.org/xsp/session/2.0" 
					 xmlns:xsp="http://apache.org/xsp"> 
			 		<message/> <total/> <size/> <hits/> <max/> <subord>null</subord> 
			 </qsResults>
		 */
		
		httpost.setHeader("Cookie", sessionId);
		//以下的formList添加的全部为request要传递的参数
		List<NameValuePair> formList = new ArrayList<NameValuePair>();
		formList.add(new BasicNameValuePair("search_opt", "SC"));
		formList.add(new BasicNameValuePair("gid_pattern", ""));
		formList.add(new BasicNameValuePair("orb_pattern", ""));
		formList.add(new BasicNameValuePair(
				"dsname_pattern",
				"^IASI_(CCR|L02|XXX_1C)_M\\d\\d(_|_V\\d\\d\\d\\d\\d\\d_)20\\d\\d(0[1-9]|1[012])([012][0-9]|3[01])(.{0,65})$"));
		// 配置文件读取
		formList.add(new BasicNameValuePair("nlat", props.getProperty("nlat")));   //       nlat=90
		formList.add(new BasicNameValuePair("wlon", props.getProperty("wlon")));  //		wlon=-180
		formList.add(new BasicNameValuePair("elon", props.getProperty("elon")));  //		elon=180
		formList.add(new BasicNameValuePair("slat", props.getProperty("slat")));  //		slat=-90
		formList.add(new BasicNameValuePair("start_time", props.getProperty("start_time")));   //开始时间     00:00:00
		formList.add(new BasicNameValuePair("end_time", props.getProperty("end_time")));		//结束时间  23:59:59
		// 配置文件读取结束
		formList.add(new BasicNameValuePair("minDiff", "0.0"));
		// 中间参数读取
		formList.add(new BasicNameValuePair("data_start", data_start)); //2007-02-28
		formList.add(new BasicNameValuePair("data_end", data_end));  //  2012-11-06
		formList.add(new BasicNameValuePair("max_days_val", max_days_val)); //366
		// 中间参数读取结束
		// 通过配置文件计算     根据参数dates设定日期如果dates为空,设置默认日期为(当天的日期-2),否则按照dates数组去设置
		formList.add(new BasicNameValuePair("start_date", resStartDate)); //2012-11-04 
		formList.add(new BasicNameValuePair("end_date", resEndDate));   //2012-11-04 
		// 通过配置文件计算结束
		formList.add(new BasicNameValuePair("between_through", "T"));
		formList.add(new BasicNameValuePair("Datatype", "IASI1CAIP"));
		formList.add(new BasicNameValuePair("limit_search", "Y"));
		formList.add(new BasicNameValuePair("max_lat_range", "180"));
		formList.add(new BasicNameValuePair("max_lon_range", "360"));
		formList.add(new BasicNameValuePair("datatype_family", "IASI"));
		UrlEncodedFormEntity form = new UrlEncodedFormEntity(formList);
		httpost.setEntity(form);
		HttpResponse response = httpclient.execute(httpost);
		writeLogToFile("-------------xml----------------");
		// 需要读取数据大小,最后一步需要使用
		String xml = EntityUtils.toString(response.getEntity());
		// <size>29649638114</size>
		
		writeLogToFile(xml); //查看发出这个封装所有参数的请求,返回的是什么
		dataSize = xml.substring(xml.indexOf("<size>") + 6, xml.indexOf("</size>"));
		writeLogToFile("middle parameters Size:" + dataSize);
		// System.out.println(EntityUtils.toString(response.getEntity()));
		writeLogToFile("-------------end----------------");
		EntityUtils.consume(response.getEntity());
		httpost = new HttpPost("http://www.class.ncdc.noaa.gov/saa/products/shopping_cart"); 
		System.out.println("第六次请求链接地址(post):"+"http://www.class.ncdc.noaa.gov/saa/products/shopping_cart");
		httpost.setHeader("Cookie", sessionId);  //可以设定一些请求头信息
		response = httpclient.execute(httpost);  //执行 返回结果
		// System.out.println(EntityUtils.toString(response.getEntity()));
		EntityUtils.consume(response.getEntity());   //差不多意思是做完一次post,略过response,然后下一次用同一个client对象就可以做下一次的访问了,同一个client对象保证了同一个session
	}
	//主入口
	public void doConnection(String[] dates) throws NoSuchAlgorithmException, KeyManagementException,
			ClientProtocolException, IOException {
		/**
		 * HttpClient代表了一个http的客户端,HttpClient接口定义了大多数基本的http请求执行行为
		 * HttpEntity是发送或者接收消息的载体。entities 可以通过request和response获取到.
		 * HttpConnection代表了一个http连接。
		 */
		System.out.println("--------3---------");
		this.initProps(dates);
		DefaultHttpClient httpClient = new DefaultHttpClient();   //获取httpclient对象
		try {
			TrustManager easyTrustManager = new X509TrustManager() {

				public void checkClientTrusted(
						java.security.cert.X509Certificate[] x509Certificates, String s)
						throws java.security.cert.CertificateException {
					// To change body of implemented methods use File | Settings
					// | File Templates.
				}

				public void checkServerTrusted(
						java.security.cert.X509Certificate[] x509Certificates, String s)
						throws java.security.cert.CertificateException {
					// To change body of implemented methods use File | Settings
					// | File Templates.
				}

				public java.security.cert.X509Certificate[] getAcceptedIssuers() {
					return new java.security.cert.X509Certificate[0];
					// To change body of implemented methods use File | Settings
					// | File Templates.
				}
			};

			SSLContext sslcontext = SSLContext.getInstance("TLS");
			sslcontext.init(null, new TrustManager[] { easyTrustManager }, null);
			SSLSocketFactory sf = new SSLSocketFactory(sslcontext);
			Scheme sch = new Scheme("https", 443, sf); //
			httpClient.getConnectionManager().getSchemeRegistry().register(sch);

			/*
			 * 设置代理
			 * GET一般用于获取/查询 资源信息,而POST一般用于更新 资源信息(个人认为这是GET和POST的本质区别,也是协议设计者的本意,其它区别都是具体表现形式的差异 ),GET不经过FORM,POST经过FORM。
			 */
			HttpHost proxy = new HttpHost("10.24.5.105", 39002, "http");
			httpClient.getParams().setParameter(ConnRoutePNames.DEFAULT_PROXY, proxy);
	        System.out.println("代理设置完毕。");
	        System.out.println("第一次请求链接地址(post):"+"https://www.class.ncdc.noaa.gov/saa/products/j_security_check");
	        HttpPost httppost = new HttpPost("https://www.class.ncdc.noaa.gov/saa/products/j_security_check");  //登录页面
			// 封装参数
			List<NameValuePair> form = new ArrayList<NameValuePair>();
			//从配置文件读取的用户名和密码
			form.add(new BasicNameValuePair("j_username", props.getProperty("userName")));  //xuna1
			form.add(new BasicNameValuePair("j_password", props.getProperty("userPassword"))); //gsicsgsics
			UrlEncodedFormEntity formEntity = new UrlEncodedFormEntity(form); //把用户名和密码封装(拼接)到了链接后面
			httppost.setEntity(formEntity);
			// 封装header
			httppost.setHeader(new BasicHeader("Accept",
					"text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8")); //告诉服务器本浏览器可以接受的数据格式
			httppost.setHeader(new BasicHeader("Accept-Encoding", "gzip, deflate"));  //告诉服务器,本浏览器接受gzip压缩过得格式
			httppost.setHeader(new BasicHeader("Accept-Language",
					"zh-cn,zh;q=0.8,en-us;q=0.5,en;q=0.3")); //告诉服务器,本浏览器接受的语言
			httppost.setHeader(new BasicHeader("Connection", "keep-alive")); //告诉服务器,本此次socket链接是保持状态
			httppost.setHeader(new BasicHeader("Cookie", this.getSessionId(httpClient)));  //调用getSessionId,获取浏览器默认SESSIONID
			httppost.setHeader(new BasicHeader("Host", "www.class.ncdc.noaa.gov"));//Host头域指定请求资源的Intenet主机和端口号,必须表示请求url的原始服务器或网关的位置。
			httppost.setHeader(new BasicHeader("Referer","https://rda.ucar.edu/cgi-bin/login?resource=%2Fsaa%2Fproducts%2Fwelcome")); //Referer头域允许客户端指定请求uri的源资源地址,这可以允许服务器生成回退链表,可用来登陆、优化cache等。他也允许废除的或错误的连接由于维护的目的被追踪。如果请求的uri没有自己的uri地址,Referer不能被发送。如果指定的是部分uri地址,则此地址应该是一个相对地址。
			httppost.setHeader(new BasicHeader("User-Agent", 
					"Mozilla/5.0 (Windows NT 6.1; rv:12.0) Gecko/20100101 Firefox/12.0 FirePHP/0.7.1")); //User-Agent头域的内容包含发出请求的用户信息。
			httppost.setHeader(new BasicHeader("x-insight", "activate"));
			writeLogToFile("executing request:" + httppost.getRequestLine()); // POST https://www.class.ncdc.noaa.gov/saa/products/j_security_check HTTP/1.1
			System.out.println("执行返回状态码:"+httppost.getRequestLine().hashCode());
			HttpResponse response = httpClient.execute(httppost);     //发送连接请求
			HttpEntity entity = response.getEntity();    //获取服务器返回的数据
			writeLogToFile("-----------------landing start---登录开始--------------------");
			System.out.println(response.getStatusLine().getStatusCode()); //302
			writeLogToFile(response.getStatusLine().toString()); //Moved Temporarily 暂时移动
			if (entity != null) {
				writeLogToFile("Response content length: " + entity.getContentLength()); 
			}
			writeLogToFile("----------------landing end-------登录结束-----------------");
			Header[] headers = response.getAllHeaders();
			String location = "";
			writeLogToFile("---------------------Befor landing jump page Head Info start 提取登录页面跳转之前的头部信息--------------------------");
			for (Header header : headers) {
				if ("Location".equals(header.getName())) {
					location = header.getValue(); //  location:https://www.class.ncdc.noaa.gov/saa/products/classlogin?resource=%2Fsaa%2Fproducts%2Fwelcome
				}
				writeLogToFile(header.getName() + ":" + header.getValue());
			}
			writeLogToFile("---------------------Befor landing jump page Head Info end-----------------------");
			writeLogToFile("---------------------After landing jump page start-------登录页面跳转之后-------------------");
			EntityUtils.consume(entity);
			//模拟登录进来之后跳转到的页面(发出get请求),不需要任何参数给它
			HttpGet get = new HttpGet(location);
			System.out.println("第三次请求链接地址(get):"+location);
			response = httpClient.execute(get);
			// System.out.println(EntityUtils.toString(response.getEntity()));
			writeLogToFile("---------------------After landing jump page end-----------------------");
			EntityUtils.consume(response.getEntity());
			writeLogToFile("---------------------middle parameters start--------------------------");
			//登录进来之后,进行下一步操作,调用下面方法,查找两个日期2007-02-28    2012-11-06
			this.getMiddleParameters(httpClient, this.getSessionId(httpClient));
			writeLogToFile("---------------------middle parameters end-----------------------");
			writeLogToFile("----------------get Info by ajax start--通过ajax获取信息----------------------");
			//调用上面方法
			this.last(httpClient, this.getSessionId(httpClient));
			writeLogToFile("----------------get Info by ajax end---------------------");
			writeLogToFile("----------------Submit email Info get result start------------------------");
			EntityUtils.consume(response.getEntity());
			this.doLast(httpClient, this.getSessionId(httpClient));
			writeLogToFile("----------------Submit email Info get result end---------------------");
		} finally {
			httpClient.getConnectionManager().shutdown();
		}
	}
	
	/**
	 * 发送订购信息到邮箱
	 * @param httpclient
	 * @param sessionId
	 * @throws ClientProtocolException
	 * @throws IOException
	 */
	private void doLast(HttpClient httpclient, String sessionId) throws ClientProtocolException,IOException {
		System.out.println("--------4---------");
		HttpPost httpost = new HttpPost("http://www.class.ncdc.noaa.gov/saa/products/shop");
		System.out.println("第七次请求链接地址(post):"+"http://www.class.ncdc.noaa.gov/saa/products/shop");
		httpost.setHeader("Cookie", sessionId);
		List<NameValuePair> formList = new ArrayList<NameValuePair>();
		
		formList.add(new BasicNameValuePair("cocoon-action", "PlaceOrder"));
		formList.add(new BasicNameValuePair("deliv_manifest_opt", "N"));
		formList.add(new BasicNameValuePair("delivery_media", ""));
		formList.add(new BasicNameValuePair("delivery_method", ""));
		formList.add(new BasicNameValuePair("ekey", "N"));
		formList.add(new BasicNameValuePair("email", props.getProperty("email")));   
		formList.add(new BasicNameValuePair("encryption", ""));
		formList.add(new BasicNameValuePair("media_list", "0"));
		formList.add(new BasicNameValuePair("order_comment", ""));
		formList.add(new BasicNameValuePair("order_now_IASI", "on"));
		formList.add(new BasicNameValuePair("order_size", dataSize));
		formList.add(new BasicNameValuePair("page", "cart"));
		formList.add(new BasicNameValuePair("price_est", "0"));
		formList.add(new BasicNameValuePair("product_number", ""));
		formList.add(new BasicNameValuePair("quantity_est", "0"));
		UrlEncodedFormEntity form = new UrlEncodedFormEntity(formList);
		httpost.setEntity(form);
		HttpResponse response = httpclient.execute(httpost);
		String res = EntityUtils.toString(response.getEntity());
		 System.out.println("-------------ResultPage----------------");
		 System.out.println(res);
		 System.out.println("-------------ResultPage----------------");
		if (res.indexOf("Your confirmation number is:") != 0) {  
			res = res.substring(res.indexOf("Your confirmation number is:") + 28);
			res = res.substring(0, res.indexOf("<br>"));
		}
		writeLogToFile("-------------lastResult----------------");
		writeLogToFile("ordernum="+res.trim());
		File folder =  new File(props.getProperty("resultPath"));
		if(!folder.exists()){
			folder.mkdirs();
		}
		new File(folder.getPath()+"/"+res.trim().substring(0,res.trim().indexOf("."))).createNewFile();
		System.out.println(folder.getPath()+"/"+res.trim().substring(0,res.trim().indexOf("."))+"----------");
		writeLogToFile("-------------lastResult----------------");
	}
	
	/**
	 * 当用户登录的时候获取sessionId
	 * @param httpClient
	 * @return
	 * @throws ClientProtocolException
	 * @throws IOException
	 */
	private String getSessionId(DefaultHttpClient httpClient) throws ClientProtocolException,
			IOException {
		System.out.println("--------5---------");
		String res = null;
		String url = "https://www.class.ncdc.noaa.gov/saa/products/classlogin?resource=%2Fsaa%2Fproducts%2Fwelcome"; 
		System.out.println("第二次请求链接地址(post):"+"https://www.class.ncdc.noaa.gov/saa/products/classlogin?resource=%2Fsaa%2Fproducts%2Fwelcome");
		HttpPost post = new HttpPost(url);
		HttpResponse response = httpClient.execute(post);
		writeLogToFile("-----------------Get browser default SessionId-----------------------");
		CookieStore cookieStore = httpClient.getCookieStore();
		List<Cookie> cookieList = cookieStore.getCookies();
		for (Cookie cookie : cookieList) {
			writeLogToFile(cookie.getName() + ":" + cookie.getValue());
			if ("JSESSIONID".equals(cookie.getName())) {
				res = "JSESSIONID=" + cookie.getValue();
			}
		}
		//获取浏览器默认的SESSIONID
		writeLogToFile("-----------------Get browser default SessionId end-------------------");
		EntityUtils.consume(response.getEntity());
		return res;
	}

	
	private String data_start = "";
	private String data_end = "";
	private String max_days_val = "";

	/**
	 * 选取IASI,页面有用户开始时间,结束时间,经纬度选择,根据这些信息可以点击Seach可以查看到IASI的所有订单信息
	 * @param httpClient
	 * @param sessionId
	 * @throws ClientProtocolException
	 * @throws IOException
	 */
	private void getMiddleParameters(DefaultHttpClient httpClient, String sessionId)
			throws ClientProtocolException, IOException {
		System.out.println("--------6---------");
		//选取IASI,页面有用户开始时间,结束时间,经纬度选择,根据这些信息可以点击Seach可以查看到IASI的所有订单信息
		String url = "http://www.class.ncdc.noaa.gov/saa/products/search?sub_id=0&datatype_family=IASI";
		System.out.println("第四次请求链接地址(get):"+"http://www.class.ncdc.noaa.gov/saa/products/search?sub_id=0&datatype_family=IASI");
		HttpGet get = new HttpGet(url);
		get.setHeader("Cookie", sessionId);
		HttpResponse response = httpClient.execute(get);
		String str = EntityUtils.toString(response.getEntity());
				
		if (str.indexOf("data_start") != -1) {
			String a = str.substring(str.indexOf("data_start"));
			data_start = a.substring(a.indexOf("value=") + 7, a.indexOf(">") - 1);
		} else {
			data_start = "2007-02-28";
		}
		writeLogToFile(data_start);  //2007-02-28
		if (str.indexOf("data_end") != -1) {
			String b = str.substring(str.indexOf("data_end"));
			data_end = b.substring(b.indexOf("value=") + 7, b.indexOf(">") - 1);
		} else {
			data_end = new SimpleDateFormat("yyyy-MM-dd").format(new Date());
		}
		writeLogToFile(data_end); //2012-11-06
		if (str.indexOf("max_days_val") != -1) {
			String c = str.substring(str.indexOf("max_days_val"));
			max_days_val = c.substring(c.indexOf("value=") + 7, c.indexOf(">") - 1);
		} else {
			max_days_val = "365";
		}
		writeLogToFile("max_days_val*******************"+max_days_val); //366
		EntityUtils.consume(response.getEntity());
	}
	
	public void writeLogToFile(String message) throws IOException{
		System.out.println(message);
		String path = System.getProperty("user.dir")+"/log.log";
		File file = new File(path);
		if(!file.exists()){
			file.createNewFile();
		}
		FileWriter fw =  new FileWriter(file,true);
		PrintWriter pw = new PrintWriter(fw);
		pw.print(message+"\n");
		fw.close();
		fw.close();
	}
	
	public static void main(String[] args) {
		IasiClient client = new IasiClient();
		
			try {
				client.doConnection(args);
			} catch (KeyManagementException e) {
				// TODO Auto-generated catch block
				e.printStackTrace();
			} catch (NoSuchAlgorithmException e) {
				// TODO Auto-generated catch block
				e.printStackTrace();
			} catch (ClientProtocolException e) {
				// TODO Auto-generated catch block
				e.printStackTrace();
			} catch (IOException e) {
				// TODO Auto-generated catch block
				e.printStackTrace();
			}
	}
}

iasi.properties

#登录信息
userName=username
userPassword=password

#配置参数
#发送邮件地址
[email protected]
#几天之前的数据。如今天为2012-05-24 , 参数为1的时候,发送2012-05-23的数据
dayBefore=2
start_time=00:00:00
end_time=23:59:59
#经纬度
nlat=90
wlon=-180
elon=180
slat=-90
#最后文件存放路径(只可以到文件夹层级)
resultPath=/home/gsics/public/

猜你喜欢

转载自hechuanzhen.iteye.com/blog/1727630