获取第三方网站信息解析

package com.proem.pms.qs.test;

import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import org.apache.commons.httpclient.Cookie;
import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.NameValuePair;
import org.apache.commons.httpclient.methods.GetMethod;
import org.apache.commons.httpclient.methods.PostMethod;
import org.apache.commons.lang.StringUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;

public class UserCountTest {

 
 
    public static String doPostGetPDF()  
    {  
          StringBuffer response = new StringBuffer();  
          HttpClient client = new HttpClient();
          NameValuePair name=new NameValuePair("j_username", "xxxxxxxxx");
          NameValuePair password=new NameValuePair("j_password", "xxxxxxxxxxxxxx");
          NameValuePair[] data = {name,password};
          PostMethod method = new PostMethod("http://www.baidu.com/login.do");
          method.setRequestBody(data);   
          try 
          {  
                client.executeMethod(method);
                Cookie[] cookies=client.getState().getCookies(); 
                client.getState().addCookies(cookies); 
                method.releaseConnection(); 
//                newUrl = "http://www.baidu,com/show.do?docId="+URLEncoder.encode("{", "utf-8")+"EAE6F9DF-E73F-42B9-86E8-B65A03FA9639"+URLEncoder.encode("}", "utf-8");
                String newUrl = "http://www.baidu.com/detail.do";
                System.out.println(method.getStatusCode());
                if (302 == method.getStatusCode()){
                 GetMethod get = new GetMethod(newUrl);
                   
                 //int code =
                    client.executeMethod(get);
                    String value = get.getResponseBodyAsString();
                    if (StringUtils.isEmpty(value)){
                     value = "";
                    }
                    //登出
                    String newUrl2 = "http://www.baidu.com/logoff.do";
                    GetMethod get2 = new GetMethod(newUrl2);
                    client.executeMethod(get2);
                   return value;
                }
          }  
          catch ( IOException e )  
          {  
           System.out.println(e);
          }  
          finally 
          {  
                method.releaseConnection();  
          }  
          return response.toString();  
    }
   
   
    public static void main(String [] args){
     String str = doPostGetPDF();
        //System.out.println(str.substring(str.indexOf("<tbody>"), str.indexOf("</tbody>")));
     str = str.substring(str.indexOf("<tbody>")+7, str.indexOf("</tbody>"))
       .replace("&nbsp;", "")
       .replace(" style=\"\"", "")
       .replace("\r", "")
       .replace("\n", "")
       .replace("\t", "").trim();
     str = "<table>"+str+"</table>";
        Document doc = Jsoup.parse(str);
        Elements trs = doc.select("table").select("tr");
 
        for(int i = 0;i<trs.size();i++){
         
            Elements tds = trs.get(i).select("td");
            for(int j = 0;j<tds.size();j++){
             String text = tds.get(j).text();
              System.out.println(text);
            }
        }
    }
}

猜你喜欢

转载自aa84990.iteye.com/blog/2311155
今日推荐