maven 引入

<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>

    <groupId>org.example</groupId>
    <artifactId>local-test</artifactId>
    <version>1.0-SNAPSHOT</version>

    <properties>
        <maven.compiler.source>17</maven.compiler.source>
        <maven.compiler.target>17</maven.compiler.target>
        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
    </properties>

        <!-- Maven -->
    <dependencies>
        <!-- add tree sitter -->
        <dependency>
            <groupId>io.github.bonede</groupId>
            <artifactId>tree-sitter</artifactId>
            <version>0.25.3</version>
        </dependency>
        <!-- add json parser -->
        <dependency>
            <groupId>io.github.bonede</groupId>
            <artifactId>tree-sitter-json</artifactId>
            <version>0.24.8</version>
        </dependency>
<!--        <dependency>-->
<!--            <groupId>io.github.bonede</groupId>-->
<!--            <artifactId>tree-sitter-java</artifactId>-->
<!--            <version>0.23.4</version>-->
<!--        </dependency>-->
        <dependency>
            <groupId>io.github.bonede</groupId>
            <artifactId>tree-sitter-java</artifactId>
            <version>0.23.4</version>
        </dependency>
<!--        <dependency>-->
<!--            <groupId>io.github.bonede</groupId>-->
<!--            <artifactId>tree-sitter-css</artifactId>-->
<!--            <version>0.23.1</version>-->
<!--        </dependency>-->
<!--        <dependency>-->
<!--            <groupId>io.github.bonede</groupId>-->
<!--            <artifactId>tree-sitter-go</artifactId>-->
<!--            <version>0.23.3</version>-->
<!--        </dependency>-->
<!--        <dependency>-->
<!--            <groupId>io.github.bonede</groupId>-->
<!--            <artifactId>tree-sitter-html</artifactId>-->
<!--            <version>0.23.2</version>-->
<!--        </dependency>-->


        <!-- Apache Commons Codec for SHA-256 hashing -->
        <dependency>
            <groupId>commons-codec</groupId>
            <artifactId>commons-codec</artifactId>
            <version>1.17.1</version>
        </dependency>
        <!-- Gson for JSON serialization -->
        <dependency>
            <groupId>com.google.code.gson</groupId>
            <artifactId>gson</artifactId>
            <version>2.10.1</version>
        </dependency>
    </dependencies>

    <build>
        <plugins>
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-compiler-plugin</artifactId>
                <version>3.11.0</version>
                <configuration>
                    <source>17</source>
                    <target>17</target>
                    <encoding>UTF-8</encoding>
                </configuration>
            </plugin>
            <plugin>
                <groupId>org.codehaus.mojo</groupId>
                <artifactId>exec-maven-plugin</artifactId>
                <version>3.1.0</version>
                <configuration>
                    <mainClass>org.example.demo.TreeSitterJavaQueryTest</mainClass>
                </configuration>
            </plugin>
        </plugins>
    </build>

</project>

测试例子

例子

package org.example.demo;

import org.treesitter.*;

import java.nio.charset.StandardCharsets;

/**
 * Tree-sitter Java 基础解析示例
 * 只提取最核心的信息:类、方法、字段
 */
public class TreeSitterJavaQueryTest2 {

    public static void main(String[] args) {
        testJavaQueryExtraction();
    }

    public static void testJavaQueryExtraction() {

        String source = """
            package demo;

            import java.util.List;
            import java.io.File;

            @interface MyAnno {}

            interface MyInterface {}

            enum MyEnum {
                A, B
            }

            class Parent {}

            class Main extends Parent implements MyInterface {

                public Main() {}

                public void hello() {
                    System.out.println("hello");
                    test();
                }

                private void test() {}
            }
            """;

        byte[] sourceBytes = source.getBytes(StandardCharsets.UTF_8);

        // ===============================
        // 1️⃣ Parser
        // ===============================
        TSParser parser = new TSParser();
        TSLanguage javaLang = new TreeSitterJava();

        parser.setLanguage(javaLang);

        TSTree tree = parser.parseString(null, source);

        // assertNotNull(tree);

        TSNode root = tree.getRootNode();

        // ===============================
        // 2️⃣ Query
        // ===============================
        TSQuery query = new TSQuery(
                javaLang,
                getJavaQuery()
        );

        TSQueryCursor cursor = new TSQueryCursor();
        cursor.exec(query, root);

        TSQueryMatch match = new TSQueryMatch();

        int matchCount = 0;

        // ===============================
        // 3️⃣ Iterate matches
        // ===============================
        while (cursor.nextMatch(match)) {
            matchCount++;

            for (TSQueryCapture capture : match.getCaptures()) {
//                TSQueryCapture capture = match.getCaptures()[i];
                TSNode node = capture.getNode();

                String captureName =
                        query.getCaptureNameForId(
                                capture.getIndex()
                        );

                String text = getNodeText(node, sourceBytes);

                System.out.printf(
                        "Capture: %-30s -> %s%n",
                        captureName,
                        text
                );
            }

            System.out.println("------\n\n");
        }
    }

    /**
     * 从源代码字节数组中提取节点的文本内容
     */
    private static String getNodeText(TSNode node, byte[] sourceBytes) {
        if (node == null || node.isNull()) {
            return "";
        }
        int startByte = node.getStartByte();
        int endByte = node.getEndByte();
        if (startByte >= 0 && endByte <= sourceBytes.length) {
            byte[] contentBytes = new byte[endByte - startByte];
            System.arraycopy(sourceBytes, startByte, contentBytes, 0, endByte - startByte);
            return new String(contentBytes, StandardCharsets.UTF_8);
        }
        return "";
    }

    /**
     * Java 查询语句
     */
    private static String getJavaQuery() {
        return """
            ; Classes, Interfaces, Enums, Annotations
            (class_declaration name: (identifier) @name) @definition.class
            (interface_declaration name: (identifier) @name) @definition.interface
            (enum_declaration name: (identifier) @name) @definition.enum
            (annotation_type_declaration name: (identifier) @name) @definition.annotation
            
            ; Methods & Constructors
            (method_declaration name: (identifier) @name) @definition.method
            (constructor_declaration name: (identifier) @name) @definition.constructor
            
            ; Imports
            (import_declaration (_) @import.source) @import
            
            ; Calls
            (method_invocation name: (identifier) @call.name) @call
            (method_invocation object: (_) name: (identifier) @call.name) @call
            
            ; Heritage - extends class
            (class_declaration
              name: (identifier) @heritage.class
              (superclass (type_identifier) @heritage.extends)
            ) @heritage
            
            ; Heritage - implements interfaces
            (class_declaration
              name: (identifier) @heritage.class
              (super_interfaces
                (type_list
                    (type_identifier) @heritage.implements
                )
              )
            ) @heritage.impl
            """;
    }

}

效果

输出:

m2\repository\com\google\code\gson\gson\2.10.1\gson-2.10.1.jar org.example.demo.TreeSitterJavaQueryTest2


Capture: import                         -> import java.util.List;
Capture: import.source                  -> java.util.List
------


Capture: import                         -> import java.io.File;
Capture: import.source                  -> java.io.File
------


Capture: definition.annotation          -> @interface MyAnno {}
Capture: name                           -> MyAnno
------


Capture: definition.interface           -> interface MyInterface {}
Capture: name                           -> MyInterface
------


Capture: definition.enum                -> enum MyEnum {
    A, B
}
Capture: name                           -> MyEnum
------


Capture: definition.class               -> class Parent {}
Capture: name                           -> Parent
------


Capture: definition.class               -> class Main extends Parent implements MyInterface {

    public Main() {}

    public void hello() {
        System.out.println("hello");
        test();
    }

    private void test() {}
}
Capture: name                           -> Main
------


Capture: heritage                       -> class Main extends Parent implements MyInterface {

    public Main() {}

    public void hello() {
        System.out.println("hello");
        test();
    }

    private void test() {}
}
Capture: heritage.class                 -> Main
Capture: heritage.extends               -> Parent
------


Capture: heritage.impl                  -> class Main extends Parent implements MyInterface {

    public Main() {}

    public void hello() {
        System.out.println("hello");
        test();
    }

    private void test() {}
}
Capture: heritage.class                 -> Main
Capture: heritage.implements            -> MyInterface
------


Capture: definition.constructor         -> public Main() {}
Capture: name                           -> Main
------


Capture: definition.method              -> public void hello() {
        System.out.println("hello");
        test();
    }
Capture: name                           -> hello
------


Capture: call                           -> System.out.println("hello")
Capture: call.name                      -> println
------


Capture: call                           -> System.out.println("hello")
Capture: call.name                      -> println
------


Capture: call                           -> test()
Capture: call.name                      -> test
------


Capture: definition.method              -> private void test() {}
Capture: name                           -> test
------

参考资料

https://github.com/bonede/tree-sitter-ng