Jsoup解析xml

从网页中取出想要的信息:

导入

<dependency>
  <!-- jsoup HTML parser library @ http://jsoup.org/ -->
  <groupId>org.jsoup</groupId>
  <artifactId>jsoup</artifactId>
  <version>1.10.2</version>
</dependency>

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.URL;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;



public class gethtml {
     public static void main(String[] args) throws Exception    {
            long start= System.currentTimeMillis();
            String str_url="http://xzsp.bjfda.gov.cn/bfdaww/trends/trendsQueryAction!getXkzInfo.dhtml?zjbh=JY11106111223752&jym=SPJY37248";
            Pattern p = Pattern.compile(">(13\\d{5}|15\\d{5}|18\\d{5}|147\\d{4})<");
            String html = getHtml(str_url);
            Document doc = Jsoup.parse(html);  
            Elements rows = doc.select("table").get(0).select("tr td ul");  
            if(rows.size()>0){
               Element row = rows.get(1);  
               System.out.println("名称:" + row.select("li").get(1).text());  
               System.out.println("代码:" + row.select("li").get(3).text());  
            }

            Document document = Jsoup.connect(str_url).get();
            System.out.println("title==="+document.title());
            Matcher m = p.matcher(html);

            int num = 0;       
            while(m.find())
            {
                System.out.println("打印出的号码段落:"+m.group(1)+"  编号"+(++num));  
            }
           System.out.println(num);       
           long end = System.currentTimeMillis();
           System.out.println("花费的时间"+(end-start)+"毫秒");
        } 
        public static String getHtml(String str_url) throws IOException{
            URL url = new URL(str_url);
            String content="";
            StringBuffer page = new StringBuffer();
            try {       
                BufferedReader in = new BufferedReader(new InputStreamReader(url
                        .openStream(), "utf-8"));           
                while((content = in.readLine()) != null){
                    page.append(content);
                }
            } catch (IOException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            }
            return page.toString();
        }


}

猜你喜欢

转载自blog.csdn.net/FORLOVEHUAN/article/details/80221630