【Jsoup】Java使用Jsoup的jar包将下载的html文件table表格输后台输出

1.    导入第三方jar,3个都要下载
2.    引入jar,右键项目Build Path
3.    导入jar
4.    代码编写
使用File类加载文件,我使用的是绝对路径(完整代码如下)
package com.test.demo;

import java.io.File;
import java.util.ArrayList;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;

public class testDemoH4 {
	public static void main(String[] args) throws Exception {
		parseHtmlH4();
	}
	
	private static void parseHtmlH4() throws Exception {
		//读取文件中的panel.html
				File file = new File("D:/html/panel.html");
				String fileName = file.getName();
				//文件名前缀
				String firstName = fileName.substring(0,fileName.lastIndexOf("."));
				if(!file.exists()){
					return;
				}
				System.out.println(firstName+": ");
				System.out.println("{");
				//获取文件
				Document doc = Jsoup.parse(file, "UTF-8");
				//获取html文件中的<table class="doc-table">标签
				Elements elements = doc.getElementsByClass("doc-table");
				
				Elements h = doc.select("h4");
				//有多个标签,所以遍历
				//System.out.println(h.size());
				ArrayList<String> index = new ArrayList<String>(); 
				for(int q = 0;q<h.size();q++){
					//System.out.println(h.get(q));
					String st = h.get(q).text();
					index.add(st);
				}
				//判断条件,需要的表格在标签<h4>Usage Example</h4>后边
				int indexofH4 = index.indexOf("Usage Example");
				
				//有多个标签,所以遍历h4标签
				for(int i=0;i<elements.size();i++){
					int H4 = indexofH4+1+i;
					String nameOfH4 = index.get(H4);
					System.out.println(nameOfH4+": [");
					//获取tr标签
					Elements tr = elements.get(i).select("tr");
					ArrayList<String> thIndex = new ArrayList<String>();
					for(int j=0;j<tr.size();j++){
						//获取tr下的th,td,每个tr对应的td个数一样,所以一起遍历,一起输出
						Elements th = tr.get(j).select("th");
						Elements td = tr.get(j).select("td");
						//将th遍历存入list集合中,用的时候好取
						for(int w = 0;w<th.size();w++){
							String thToString = th.get(w).text();
							thIndex.add(thToString);
						}
						//遍历td
						for(int w = 0;w<td.size();w++){
							String value = td.get(w).text();
							String valueOfTh = thIndex.get(w);
							if(w==0){
								System.out.print("{"+valueOfTh+": "+"\""+value+"\""+",");
							}else if(w==td.size()-1){
								//根据字段中是否存在"Return"来判断是是否有返回值,并更改输出:有返回值则为:hasReturn: true;
								if(value.contains("Return")){
									System.out.print("hasReturn"+": "+"true"+",");
									System.out.print(valueOfTh+": "+"\""+value+"\""+"},");
									System.out.println();
								}else{
									System.out.print(valueOfTh+": "+"\""+value+"\""+"},");
									System.out.println();
								}
								
							}else{
								System.out.print(valueOfTh+": "+"\""+value+"\""+",");
							}
						}
						
					}
					System.out.println("]"+",");
				}
				System.out.println("}");

	}

}

5.    下载的网页:
6.    最后成果

猜你喜欢

转载自blog.csdn.net/liangayang/article/details/80683384