用java实现一个简单的自动添加标签demo

1. 处理文件

对文件进行读取,将文件以块为单位进行划分

package AutoAddLabel;

import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.List;

public class Util {
	private static final String PATH = "...";

	public static List<String> Blocks(String fileName) throws IOException {
		List<String> blocks = new ArrayList<String>(); // 存放所有的string块
		StringBuilder block = new StringBuilder(); // 存放当前所读取的string块
		BufferedReader in = new BufferedReader(new FileReader(fileName));// 读取文件
		String temp; // 存放当前所读取的行
		while ((temp = in.readLine()) != null) {
			if (temp.length() > 0) { // 如果所读取的行不是空行,则代表这个一个块中的内容
				block.append(temp);
			} else { // 这个块中的内容已经读取完毕,将它放入到列表中
				blocks.add(block.toString().trim());
				block.delete(0, block.length()); // 将block清空,以备下一个块使用
			}
		}
		blocks.add(block.toString().trim()); // 读到文件结尾处跳出循环,此时需要将剩余的block添加到文件中
		in.close();
		return blocks;
	}
}

2. 添加标签模块

这个模块主要是对一个文本块进行整体处理,根据条件的不同添加不同的标签

package AutoAddLabel;

import java.io.IOException;
import java.lang.reflect.Method;
import java.util.regex.Matcher;

public class Handlers extends Handler{
	public void start_document() {
		System.out.print("<html><head><title>...</title></head><body>");
	}
	public void end_document() {
		System.out.print("</body></html>");
	}
	
	public void start_paragraph() {
		System.out.print("<p>");
	}
	public void end_paragraph() {
		System.out.print("</p>");
	}
	
	public void start_heading() {
		System.out.print("<h2>");
	}
	public void end_heading() {
		System.out.print("</h2>");
	}
	
	public void start_list() {
		System.out.print("<ul>");
	}
	public void end_list() {
		System.out.print("</ul>");
	}
	
	public void start_listitem() {
		System.out.print("<li>");
	}
	public void end_listitem() {
		System.out.print("</li>");
	}
	
	public void start_title() {
		System.out.print("<h1>");
	}
	public void end_title() {
		System.out.print("</h1>");
	}
	
	public void feed(String data) {
		System.out.print(data);
	}
}

class Handler{
	public boolean callback(String prefix,String name) throws Exception{
		Method method = getClass().getDeclaredMethod(prefix+name);
		if(method==null) 
			return false;
		else{
			method.setAccessible(true);
			method.invoke(new Handlers());
			return true;
		}
	}
	
	public void start(String name) throws Exception{
		callback("start_", name);
	}
	
	public void end(String name) {
		try {
			callback("end_", name);
		}catch (Exception e) {
			// TODO: handle exception
			e.printStackTrace(System.out);
		}
	}
}

3. 过滤器模块

有的文本块中可能含有粗体,网址,或邮件地址等信息,这时候我们需要对它们进行处理,而过滤器模块就是对每个文本块进行检查处理。

package AutoAddLabel;

import java.lang.reflect.Method;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

abstract public class Filter {
	protected Pattern pattern;
	abstract public String sub(String block);
}

class EmphasisFilter extends Filter{
	public EmphasisFilter() {
		this.pattern = Pattern.compile("\\*(.+?)\\*");
	}
	public String sub(String block) {
		Matcher match = pattern.matcher(block);
		String result=null;
		while(match.find()) {
			result = match.replaceFirst("<em>"+match.group(1)+"</em>");
			match = pattern.matcher(result);
		}
		if(result==null)
			return block;
		else
			return result;
	}
}

class MailFilter extends Filter{
	public MailFilter() {
		this.pattern = Pattern.compile("\\(([\\.a-zA-Z]+@[\\.a-zA-Z]+[a-zA-Z]+)\\)");
		
	}
	public String sub(String block) {
		Matcher match = pattern.matcher(block);
		String result=null;
		while(match.find()) {
			result = match.replaceFirst("<a href=\"mailto:"+match.group(1)+"\">"+match.group(1)+"</a>");
			match = pattern.matcher(result);
		}
		if(result==null)
			return block;
		else
			return result;
	}
}

class URLFilter extends Filter{
	public URLFilter() {
		this.pattern = Pattern.compile("\\((http://[\\.a-zA-Z/]+)\\)");
	}
	public String sub(String block) {
		Matcher match = pattern.matcher(block);
		String result=null;
		while(match.find()) {
			result = match.replaceFirst("<a href=\""+match.group(1)+"\">"+match.group(1)+"</a>");
			match = pattern.matcher(result);
		}
		if(result==null)
			return block;
		else
			return result;
	}
}

4. 主程序

结合上面的模块,对文件添加标签。

package AutoAddLabel;

import java.io.BufferedOutputStream;
import java.io.FileOutputStream;
import java.io.PrintStream;
import java.lang.reflect.Method;
import java.util.ArrayList;
import java.util.List;

public class BasicTextParser extends Parser {
	private static final String INFILE = "...";
	private static final String OUTFILE = "...";

	public BasicTextParser(Handlers handlers) {
		super(handlers);

		addRule(ListRule.class);
		addRule(ListItemRule.class);
		addRule(TitleRule.class);
		addRule(HeadingRule.class);
		addRule(ParagraphRule.class);

		addFilter(EmphasisFilter.class);
		addFilter(URLFilter.class);
		addFilter(MailFilter.class);
	}

	public static void main(String[] args) throws Exception {
		PrintStream console = System.out;
		PrintStream out = new PrintStream(new BufferedOutputStream(new FileOutputStream(OUTFILE)));
		System.setOut(out);
		System.setErr(out);

		Handlers htmlHandler = new Handlers();
		BasicTextParser htmlParser = new BasicTextParser(htmlHandler);
		htmlParser.parse(INFILE);

		out.close();
		System.setOut(console);
	}
}

class Parser {
	private Handlers handlers;
	public List<Class<? extends Rule>> rules = new ArrayList<Class<? extends Rule>>();
	public List<Class<? extends Filter>> filters = new ArrayList<Class<? extends Filter>>();

	public Parser(Handlers handler) {
		this.handlers = handler;
	}

	public void addRule(Class<? extends Rule> rule) {
		this.rules.add(rule);
	}

	public void addFilter(Class<? extends Filter> filter) {
		this.filters.add(filter);
	}

	public void parse(String fileName) throws Exception {
		this.handlers.start("document");
		System.out.println();
		for (String block : Util.Blocks(fileName)) {
			for (Class<? extends Filter> filter : filters) {
				Method methodSub = filter.getDeclaredMethod("sub", String.class);
				methodSub.setAccessible(true);
				block = (String) methodSub.invoke(filter.newInstance(), block);
			}
			for (Class<? extends Rule> rule : rules) {
				Method methodCondition = rule.getDeclaredMethod("condition", String.class);
				Method methodAction = rule.getDeclaredMethod("action", String.class, Handlers.class);
				methodCondition.setAccessible(true);
				methodAction.setAccessible(true);
				if ((boolean) methodCondition.invoke(rule.newInstance(), block)) {
					boolean last = (boolean) methodAction.invoke(rule.newInstance(), block, handlers);
					if (last)
						break;
				}
			}
			System.out.println();
		}
		this.handlers.end("document");
	}
}

猜你喜欢

转载自blog.csdn.net/weixin_41811413/article/details/85872138