java AST 抽象语法树-JavaParser 框架
JavaParser
JavaParser is java 1-14 Parser and Abstract Syntax Tree for Java, including preview features to Java 13.
该项目包含一组实现具有高级分析功能的Java 1.0-Java 14 Parser的库。
这包括Java 13的预览功能,以及Java 14的预览功能正在进行中。
学习资料
阅读:https://leanpub.com/javaparservisited/read_full
TODO: 整本书的学习笔记。
入门例子
Sample project with a basic Maven + JavaParser setup
Sample project with basic Maven + JavaSymbolSolver set up
Inspecting an AST
maven 引入
com.github.javaparser
javaparser-symbol-solver-core
3.15.21
ps: 需要设置 jdk 级别为 1.8
断点
public static void main(String[] args) {
// Parse the code you want to inspect:
CompilationUnit cu = StaticJavaParser.parse("class X { int x; }");
// Now comes the inspection code:
System.out.println(cu);
}
我们可以通过断点的方式获取 "class X { int x; }"
的信息。
输出
// Now comes the inspection code:
YamlPrinter printer = new YamlPrinter(true);
System.out.println(printer.output(cu));
直接输出
---
root(Type=CompilationUnit):
types:
- type(Type=ClassOrInterfaceDeclaration):
isInterface: "false"
name(Type=SimpleName):
identifier: "X"
members:
- member(Type=FieldDeclaration):
variables:
- variable(Type=VariableDeclarator):
name(Type=SimpleName):
identifier: "x"
type(Type=PrimitiveType):
type: "INT"
...
xml 格式
修改 class 文件
直接参考 Sample project with a basic Maven + JavaParser setup
maven 引入
com.github.javaparser
javaparser-core
3.15.21
原始 java 类
ps: 某种角度是为了展现强大,所以代码很冗余,很长。
import com.github.javaparser.utils.CodeGenerationUtils;
import com.github.javaparser.utils.SourceRoot;
public class Blabla {
private final void method1013(StreamBuffer buf, int opcode) {
if (opcode != 1) {
if (opcode != 2) {
if (opcode != 4) {
do {
if (opcode != 5) {
if (opcode == 6)
((Class94) this).anInt1477 = buf.readUnsignedShort();
else {
if (opcode != 7) {
if (opcode != 8) {
if (opcode == 11)
((Class94) this).anInt1456 = 1;
else if (opcode != 12) {
if (opcode != 16) {
if (opcode == 23)
((Class94) this).anInt1424 = (buf.readUnsignedShort());
else if (opcode != 24) {
if (opcode == 25)
((Class94) this).anInt1487 = (buf.readUnsignedShort());
else if (opcode == 26)
anInt1435 = (buf.readUnsignedShort());
else if (opcode = 35) {
if (opcode >= 35 && opcode = 110) {
if (opcode == 110)
anInt1423 = buf.readUnsignedShort();
else if (opcode != 111) {
if (opcode == 112)
anInt1480 = buf.readUnsignedShort();
else if (opcode != 113) {
if (opcode == 114)
anInt1439 = buf.readByte(false) * 5;
else if (opcode == 115)
((Class94) this).anInt1462 = buf.readUnsignedByte();
else if (opcode != 121) {
if (opcode != 122) {
if (opcode == 125) {
anInt1493 = buf.readByte(false) i_57_; i_57_++) ((Class94) this).anIntArray1441[i_57_] = buf.readUnsignedShort();
}
} else {
((Class94) this).anInt1442 = buf.readUnsignedByte();
((Class94) this).anInt1476 = buf.readUnsignedShort();
}
} else
((Class94) this).anInt1431 = buf.readUnsignedShort();
} else
((Class94) this).anInt1429 = buf.readUnsignedShort();
} else
anInt1458 = buf.readByte(false);
} else
anInt1503 = buf.readUnsignedShort();
} else {
if (((Class94) this).anIntArray1460 == null) {
((Class94) this).anIntArray1460 = new int[10];
((Class94) this).anIntArray1445 = new int[10];
}
((Class94) this).anIntArray1460[-100 + opcode] = buf.readUnsignedShort();
((Class94) this).anIntArray1445[opcode - 100] = buf.readUnsignedShort();
}
} else
((Class94) this).anInt1443 = buf.readUnsignedByte();
} else
((Class94) this).anInt1494 = buf.readUnsignedShort();
} else
anInt1490 = buf.readUnsignedShort();
} else
anInt1466 = buf.readUnsignedShort();
} else
anInt1454 = (buf.readUnsignedShort());
} else
((Class94) this).aBoolean1463 = true;
} else {
int i_58_ = (buf.readUnsignedByte());
aShortArray1504 = (new short[i_58_]);
aShortArray1488 = (new short[i_58_]);
for (int i_59_ = 0; i_59_ 32767)
((Class94) this).anInt1491 -= 65536;
}
} else {
((Class94) this).anInt1425 = buf.readUnsignedShort();
if (((Class94) this).anInt1425 () {
/**
* For every if-statement, see if it has a comparison using "!=".
* Change it to "==" and switch the "then" and "else" statements around.
*/
@Override
public Visitable visit(IfStmt n, Void arg) {
// Figure out what to get and what to cast simply by looking at the AST in a debugger!
n.getCondition().ifBinaryExpr(binaryExpr -> {
if (binaryExpr.getOperator() == BinaryExpr.Operator.NOT_EQUALS && n.getElseStmt().isPresent()) {
/* It's a good idea to clone nodes that you move around.
JavaParser (or you) might get confused about who their parent is!
*/
Statement thenStmt = n.getThenStmt().clone();
Statement elseStmt = n.getElseStmt().get().clone();
n.setThenStmt(elseStmt);
n.setElseStmt(thenStmt);
binaryExpr.setOperator(BinaryExpr.Operator.EQUALS);
}
});
return super.visit(n, arg);
}
}, null);
// This saves all the files we just read to an output directory.
sourceRoot.saveAll(
// The path of the Maven module/project which contains the LogicPositivizer class.
CodeGenerationUtils.mavenModuleRoot(LogicPositivizer.class)
// appended with a path to "output"
.resolve(Paths.get("output")));
}
}
结果
import com.github.javaparser.utils.CodeGenerationUtils;
import com.github.javaparser.utils.SourceRoot;
public class Blabla {
private final void method1013(StreamBuffer buf, int opcode) {
if (opcode == 1)
anInt1481 = buf.readUnsignedShort();
else {
if (opcode == 2)
((Class94) this).aString1434 = buf.readString();
else {
if (opcode == 4)
((Class94) this).anInt1436 = buf.readUnsignedShort();
else {
do {
if (opcode != 5) {
if (opcode == 6)
((Class94) this).anInt1477 = buf.readUnsignedShort();
else {
if (opcode == 7) {
((Class94) this).anInt1425 = buf.readUnsignedShort();
if (((Class94) this).anInt1425 32767)
((Class94) this).anInt1491 -= 65536;
} else {
if (opcode == 11)
((Class94) this).anInt1456 = 1;
else if (opcode == 12)
((Class94) this).anInt1473 = (buf.readInt());
else {
if (opcode == 16)
((Class94) this).aBoolean1502 = true;
else {
if (opcode == 23)
((Class94) this).anInt1424 = (buf.readUnsignedShort());
else if (opcode == 24)
anInt1449 = (buf.readUnsignedShort());
else {
if (opcode == 25)
((Class94) this).anInt1487 = (buf.readUnsignedShort());
else if (opcode == 26)
anInt1435 = (buf.readUnsignedShort());
else if (opcode = 35) {
if (opcode >= 35 && opcode = 110) {
if (opcode == 110)
anInt1423 = buf.readUnsignedShort();
else if (opcode == 111)
anInt1503 = buf.readUnsignedShort();
else {
if (opcode == 112)
anInt1480 = buf.readUnsignedShort();
else if (opcode == 113)
anInt1458 = buf.readByte(false);
else {
if (opcode == 114)
anInt1439 = buf.readByte(false) * 5;
else if (opcode == 115)
((Class94) this).anInt1462 = buf.readUnsignedByte();
else if (opcode == 121)
((Class94) this).anInt1429 = buf.readUnsignedShort();
else {
if (opcode == 122)
((Class94) this).anInt1431 = buf.readUnsignedShort();
else {
if (opcode == 125) {
anInt1493 = buf.readByte(false) i_57_; i_57_++) ((Class94) this).anIntArray1441[i_57_] = buf.readUnsignedShort();
} else {
if (opcode == 249) {
int i_52_ = buf.readUnsignedByte();
if (((Class94) this).aClass194_1472 == null) {
int i_53_ = Class307.calculateSize(i_52_);
((Class94) this).aClass194_1472 = new HashTable(i_53_);
}
for (int i_54_ = 0; i_54_
// use cu
);
或者
import static com.github.javaparser.ParseStart.*;
import static com.github.javaparser.Providers.provider;
...
new JavaParser().parse(COMPILATION_UNIT, provider("class X{}")).ifSuccessful(cu ->
System.out.println(cu)
);
完整的API由JavaParser构造函数和整套解析方法组成,其中有一个额外的功能-一个用于实际解析的功能。
它永远不会引发异常。 ParseResult可以告诉您解析是否正常,如果遇到问题,请告诉我们。
重用JavaParser实例将提高速度。
JavaParser实例不是线程安全的!
extra parse方法的第一个参数指示您将传递的源类型。 通常,它是一个编译单元,但是您可以解析表达式,名称等。
额外解析方法的第二个参数提供源代码。 提供者是对任何种类输入的抽象。
完整的API可让您随意组合这些参数。
再次解析Javadoc是一个例外。 为此,您需要JavadocParser。
可以在构造函数中传递配置。
ParserConfiguration configuration = new ParserConfiguration();
JavaParser parser = new JavaParser(configuration);
ParseResult parseResult = parser.parse(EXPRESSION, provider("1+1"));
if (!parseResult.isSuccessful()) {
System.out.println(parseResult.getProblems().toString());
}
// a failed parse does not always mean there is no result.
parseResult.getResult().ifPresent(System.out::println);
if (parseResult.getCommentsCollection().isPresent()) {
// ...
}
一次性分析整个项目
JavaParser非常适合分析Java代码,并提供了一种一次性处理源目录的方法。
但是它缺少分析项目的方法,该项目可能包含多个源目录。
以前的答案是手动创建所有SourceRoot,然后可以对其进行分析。
让我们回顾一下JavaParser存储库的以下示例。
为了解析存储库中的所有文件,您必须手动定义每个模块的根目录,然后使用每个源根目录创建SourceRoot,然后可以对其进行进一步处理。
Path projectRoot = path-to-project-root;
String[] roots = new String[]{
"javaparser-core/src/main/java",
"javaparser-core-testing/src/test/java",
"javaparser-core-generators/src/main/java",
"javaparser-core-metamodel-generator/src/main/java",
"javaparser-symbol-solver-core/src/main/java",
"javaparser-symbol-solver-logic/src/main/java",
"javaparser-symbol-solver-model/src/main/java",
"javaparser-symbol-solver-testing/src/test/java"
};
for (String root : roots) {
SourceRoot sourceRoot = new SourceRoot(projectRoot.resolve(root));
List parseResults = sourceRoot.tryToParse();
}
我们需要一种自动实现此目的的方法,从而避免了所有人重新发明轮子的麻烦。
为此,我们介绍ProjectRootand CollectionStrategy。
如果仅需要解析Java文件,则仅收集项目中的SourceRoots就足够了。
但是,如果您还想解析java文件中的符号,则还需要收集jar文件。
使用ParserCollectionStrategy或SymbolSolverCollectionStrategy,您可以分别指定要解析还是解析。
以下示例显示如何初始化ProjectRoot:
// only parsing
private final ProjectRoot projectRoot =
new ParserCollectionStrategy()
.collect(root);
// parsing and resolving
private final ProjectRoot projectRoot =
new SymbolSolverCollectionStrategy()
.collect(root);