1. 处理文件
对文件进行读取,将文件以块为单位进行划分
package AutoAddLabel;
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.List;
public class Util {
private static final String PATH = "...";
public static List<String> Blocks(String fileName) throws IOException {
List<String> blocks = new ArrayList<String>(); // 存放所有的string块
StringBuilder block = new StringBuilder(); // 存放当前所读取的string块
BufferedReader in = new BufferedReader(new FileReader(fileName));// 读取文件
String temp; // 存放当前所读取的行
while ((temp = in.readLine()) != null) {
if (temp.length() > 0) { // 如果所读取的行不是空行,则代表这个一个块中的内容
block.append(temp);
} else { // 这个块中的内容已经读取完毕,将它放入到列表中
blocks.add(block.toString().trim());
block.delete(0, block.length()); // 将block清空,以备下一个块使用
}
}
blocks.add(block.toString().trim()); // 读到文件结尾处跳出循环,此时需要将剩余的block添加到文件中
in.close();
return blocks;
}
}
2. 添加标签模块
这个模块主要是对一个文本块进行整体处理,根据条件的不同添加不同的标签
package AutoAddLabel;
import java.io.IOException;
import java.lang.reflect.Method;
import java.util.regex.Matcher;
public class Handlers extends Handler{
public void start_document() {
System.out.print("<html><head><title>...</title></head><body>");
}
public void end_document() {
System.out.print("</body></html>");
}
public void start_paragraph() {
System.out.print("<p>");
}
public void end_paragraph() {
System.out.print("</p>");
}
public void start_heading() {
System.out.print("<h2>");
}
public void end_heading() {
System.out.print("</h2>");
}
public void start_list() {
System.out.print("<ul>");
}
public void end_list() {
System.out.print("</ul>");
}
public void start_listitem() {
System.out.print("<li>");
}
public void end_listitem() {
System.out.print("</li>");
}
public void start_title() {
System.out.print("<h1>");
}
public void end_title() {
System.out.print("</h1>");
}
public void feed(String data) {
System.out.print(data);
}
}
class Handler{
public boolean callback(String prefix,String name) throws Exception{
Method method = getClass().getDeclaredMethod(prefix+name);
if(method==null)
return false;
else{
method.setAccessible(true);
method.invoke(new Handlers());
return true;
}
}
public void start(String name) throws Exception{
callback("start_", name);
}
public void end(String name) {
try {
callback("end_", name);
}catch (Exception e) {
// TODO: handle exception
e.printStackTrace(System.out);
}
}
}
3. 过滤器模块
有的文本块中可能含有粗体,网址,或邮件地址等信息,这时候我们需要对它们进行处理,而过滤器模块就是对每个文本块进行检查处理。
package AutoAddLabel;
import java.lang.reflect.Method;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
abstract public class Filter {
protected Pattern pattern;
abstract public String sub(String block);
}
class EmphasisFilter extends Filter{
public EmphasisFilter() {
this.pattern = Pattern.compile("\\*(.+?)\\*");
}
public String sub(String block) {
Matcher match = pattern.matcher(block);
String result=null;
while(match.find()) {
result = match.replaceFirst("<em>"+match.group(1)+"</em>");
match = pattern.matcher(result);
}
if(result==null)
return block;
else
return result;
}
}
class MailFilter extends Filter{
public MailFilter() {
this.pattern = Pattern.compile("\\(([\\.a-zA-Z]+@[\\.a-zA-Z]+[a-zA-Z]+)\\)");
}
public String sub(String block) {
Matcher match = pattern.matcher(block);
String result=null;
while(match.find()) {
result = match.replaceFirst("<a href=\"mailto:"+match.group(1)+"\">"+match.group(1)+"</a>");
match = pattern.matcher(result);
}
if(result==null)
return block;
else
return result;
}
}
class URLFilter extends Filter{
public URLFilter() {
this.pattern = Pattern.compile("\\((http://[\\.a-zA-Z/]+)\\)");
}
public String sub(String block) {
Matcher match = pattern.matcher(block);
String result=null;
while(match.find()) {
result = match.replaceFirst("<a href=\""+match.group(1)+"\">"+match.group(1)+"</a>");
match = pattern.matcher(result);
}
if(result==null)
return block;
else
return result;
}
}
4. 主程序
结合上面的模块,对文件添加标签。
package AutoAddLabel;
import java.io.BufferedOutputStream;
import java.io.FileOutputStream;
import java.io.PrintStream;
import java.lang.reflect.Method;
import java.util.ArrayList;
import java.util.List;
public class BasicTextParser extends Parser {
private static final String INFILE = "...";
private static final String OUTFILE = "...";
public BasicTextParser(Handlers handlers) {
super(handlers);
addRule(ListRule.class);
addRule(ListItemRule.class);
addRule(TitleRule.class);
addRule(HeadingRule.class);
addRule(ParagraphRule.class);
addFilter(EmphasisFilter.class);
addFilter(URLFilter.class);
addFilter(MailFilter.class);
}
public static void main(String[] args) throws Exception {
PrintStream console = System.out;
PrintStream out = new PrintStream(new BufferedOutputStream(new FileOutputStream(OUTFILE)));
System.setOut(out);
System.setErr(out);
Handlers htmlHandler = new Handlers();
BasicTextParser htmlParser = new BasicTextParser(htmlHandler);
htmlParser.parse(INFILE);
out.close();
System.setOut(console);
}
}
class Parser {
private Handlers handlers;
public List<Class<? extends Rule>> rules = new ArrayList<Class<? extends Rule>>();
public List<Class<? extends Filter>> filters = new ArrayList<Class<? extends Filter>>();
public Parser(Handlers handler) {
this.handlers = handler;
}
public void addRule(Class<? extends Rule> rule) {
this.rules.add(rule);
}
public void addFilter(Class<? extends Filter> filter) {
this.filters.add(filter);
}
public void parse(String fileName) throws Exception {
this.handlers.start("document");
System.out.println();
for (String block : Util.Blocks(fileName)) {
for (Class<? extends Filter> filter : filters) {
Method methodSub = filter.getDeclaredMethod("sub", String.class);
methodSub.setAccessible(true);
block = (String) methodSub.invoke(filter.newInstance(), block);
}
for (Class<? extends Rule> rule : rules) {
Method methodCondition = rule.getDeclaredMethod("condition", String.class);
Method methodAction = rule.getDeclaredMethod("action", String.class, Handlers.class);
methodCondition.setAccessible(true);
methodAction.setAccessible(true);
if ((boolean) methodCondition.invoke(rule.newInstance(), block)) {
boolean last = (boolean) methodAction.invoke(rule.newInstance(), block, handlers);
if (last)
break;
}
}
System.out.println();
}
this.handlers.end("document");
}
}