2021SC@SDUSC
总览
本次分析主要针对的是DryRunGruntParser类。DryRunGruntParse类主要是用来实现org.apache.pig.tools.pigscript.parser.PigScriptParser类的方法的类。读入数据流,结合数据流的所处环境对数据流进行分析得到其数据。
代码分析
初始化
初始化为一个读入的数据流,以及他的来源,还有当前对象在程序中所处的一个环境,一个与系统交互的过程。stream对象是一组支持串行并行聚合操作的元素,也可以理解成集合或者迭代器的增强版。
public DryRunGruntParser(Reader stream, String source, PigContext context) {
super(stream);
this.source = source;
this.context = context;
}
processExplain()
该方法实现了对进程的解释,通过读入别名,形式,目标等数据,通过判断数据是否为空来判断该进程的状态。
protected void processExplain(String alias, String script,
boolean isVerbose, String format, String target,
List<String> params, List<String> files) throws IOException,
ParseException {
sb.append("explain ");
if (script != null) {
sb.append("-script ").append(script).append(" ");
}
if (target != null) {
sb.append("-out ").append(target).append(" ");
}
if (isVerbose) sb.append("-brief ");
if (format != null && format.equals("dot")) {
sb.append("-dot ");
}
if (format != null && format.equals("xml")) {
sb.append("-xml ");
}
if (params != null) {
for (String param : params) {
sb.append("-param ").append(param).append(" ");
}
}
if (files != null) {
for (String file : files) {
sb.append("-param_file ").append(file).append(" ");
}
}
if (alias != null) {
sb.append(alias);
}
sb.append("\n");
}
processRegister()
读入路径,脚本以及名字域来获得进程登记的信息。
@Override
protected void processRegister(String jar) throws IOException {
sb.append("register ").append(jar).append(";\n");
}
@Override
protected void processRegister(String path, String scriptingEngine,
String namespace) throws IOException, ParseException {
sb.append("register '").append(path).append("'");
if (scriptingEngine != null) {
sb.append(" using ").append(scriptingEngine);
}
if (namespace != null) {
sb.append(" as ").append(namespace);
}
sb.append(";\n");
}
processPig()
首先读取指令,如果指令最后没有;则加上,再根据指令生成抽象语法树。
@Override
protected void processPig(String cmd) throws IOException {
int start = getLineNumber();
StringBuilder blder = new StringBuilder();
for (int i = 1; i < start; i++) {
blder.append("\n");
}
if (cmd.charAt(cmd.length() - 1) != ';') {
cmd += ";";
}
blder.append(cmd);
cmd = blder.toString();
CommonTokenStream tokenStream = QueryParserDriver.tokenize(cmd, source);
Tree ast = null;
try {
ast = QueryParserDriver.parse( tokenStream );
} catch(RuntimeException ex) {
throw new ParserException( ex.getMessage() );
}
if (!hasMacro) {
List<CommonTree> importNodes = new ArrayList<CommonTree>();
List<CommonTree> macroNodes = new ArrayList<CommonTree>();
List<CommonTree> inlineNodes = new ArrayList<CommonTree>();
QueryParserDriver.traverseImport(ast, importNodes);
QueryParserDriver.traverse(ast, macroNodes, inlineNodes);
if (!importNodes.isEmpty() || !macroNodes.isEmpty()
|| !inlineNodes.isEmpty()) {
hasMacro = true;
}
}
if (parserTree == null) {
parserTree = ast;
} else {
int n = ast.getChildCount();
for (int i = 0; i < n; i++) {
parserTree.addChild(ast.getChild(i));
}
}
CommonTree dup = (CommonTree)parserTree.dupNode();
dup.addChildren(((CommonTree)parserTree).getChildren());
QueryParserDriver driver = new QueryParserDriver(context, "0",
new HashMap<String, String>());
Tree newAst = driver.expandMacro(dup);
CommonTreeNodeStream nodes = new CommonTreeNodeStream( newAst );
AstPrinter walker = new AstPrinter( nodes );
try {
walker.query();
} catch (RecognitionException e) {
throw new ParserException("Failed to print AST for command " + cmd,
e);
}
String result = walker.getResult().trim();
if (!result.isEmpty()) {
String[] lines = result.split("\n");
for (int i = toSkip; i < lines.length; i++) {
sb.append(lines[i]).append("\n");
toSkip++;
}
}
}
processIllustrate()
进程说明,读入别名,目标等年将其加载到说明文本中。
@Override
protected void processIllustrate(String alias, String script,
String target, List<String> params, List<String> files)
throws IOException, ParseException {
sb.append("illustrate ");
if (script != null) {
sb.append("-script ").append(script).append(" ");
}
if (target != null) {
sb.append("-out ").append(target).append(" ");
}
if (params != null) {
for (String param : params) {
sb.append("-param ").append(param).append(" ");
}
}
if (files != null) {
for (String file : files) {
sb.append("-param_file ").append(file).append(" ");
}
}
if (alias != null) {
sb.append(alias);
}
sb.append("\n");
}
总结
DryRunGruntParser类主要用于实现对于数据流的信息获取以及调用方法生成抽象语法树。