Skip to content

Commit 9567c23

Browse files
fix(node): use source encoding in getText()
1 parent 5c560d3 commit 9567c23

File tree

4 files changed

+32
-22
lines changed

4 files changed

+32
-22
lines changed

src/main/java/io/github/treesitter/jtreesitter/Node.java

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ public final class Node {
2121
private final Tree tree;
2222
private @Nullable List<Node> children;
2323
private final Arena arena = Arena.ofAuto();
24+
private boolean wasEdited = false;
2425

2526
Node(MemorySegment self, Tree tree) {
2627
this.self = self;
@@ -409,10 +410,7 @@ public Optional<Node> getChildWithDescendant(Node descendant) {
409410

410411
/** Get the source code of the node, if available. */
411412
public @Nullable String getText() {
412-
var text = tree.getText();
413-
if (text == null) return null;
414-
var endByte = Math.min(getEndByte(), text.length());
415-
return text.substring(getStartByte(), endByte);
413+
return !wasEdited ? tree.getRegion(getStartByte(), getEndByte()) : null;
416414
}
417415

418416
/**
@@ -426,6 +424,7 @@ public Optional<Node> getChildWithDescendant(Node descendant) {
426424
*/
427425
public void edit(InputEdit edit) {
428426
ts_node_edit(self, edit.into(arena));
427+
wasEdited = true;
429428
children = null;
430429
}
431430

src/main/java/io/github/treesitter/jtreesitter/Parser.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -244,7 +244,7 @@ public Optional<Tree> parse(String source, InputEncoding encoding, @Nullable Tre
244244
var old = oldTree == null ? MemorySegment.NULL : oldTree.segment();
245245
var tree = ts_parser_parse_string_encoding(self, old, string, bytes.length, encoding.ordinal());
246246
if (tree.equals(MemorySegment.NULL)) return Optional.empty();
247-
return Optional.of(new Tree(tree, language, source));
247+
return Optional.of(new Tree(tree, language, source, encoding.charset()));
248248
}
249249
}
250250

@@ -299,7 +299,7 @@ public Optional<Tree> parse(ParseCallback callback, InputEncoding encoding, @Nul
299299
var old = oldTree == null ? MemorySegment.NULL : oldTree.segment();
300300
var tree = ts_parser_parse(self, old, input);
301301
if (tree.equals(MemorySegment.NULL)) return Optional.empty();
302-
return Optional.of(new Tree(tree, language, null));
302+
return Optional.of(new Tree(tree, language, null, null));
303303
}
304304

305305
/**

src/main/java/io/github/treesitter/jtreesitter/Tree.java

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import io.github.treesitter.jtreesitter.internal.TSRange;
66
import io.github.treesitter.jtreesitter.internal.TreeSitter;
77
import java.lang.foreign.*;
8+
import java.nio.charset.Charset;
89
import java.util.ArrayList;
910
import java.util.Collections;
1011
import java.util.List;
@@ -15,16 +16,18 @@
1516
@NullMarked
1617
public final class Tree implements AutoCloseable, Cloneable {
1718
private final MemorySegment self;
18-
private @Nullable String source;
19+
private byte[] source;
20+
private @Nullable Charset charset;
1921
private final Arena arena;
2022
private final Language language;
2123
private @Nullable List<Range> includedRanges;
2224

23-
Tree(MemorySegment self, Language language, @Nullable String source) {
25+
Tree(MemorySegment self, Language language, @Nullable String source, @Nullable Charset charset) {
2426
arena = Arena.ofShared();
2527
this.self = self.reinterpret(arena, TreeSitter::ts_tree_delete);
2628
this.language = language;
27-
this.source = source;
29+
this.source = source != null && charset != null ? source.getBytes(charset) : new byte[0];
30+
this.charset = charset;
2831
}
2932

3033
private Tree(Tree tree) {
@@ -33,21 +36,28 @@ private Tree(Tree tree) {
3336
self = copy.reinterpret(arena, TreeSitter::ts_tree_delete);
3437
language = tree.language;
3538
source = tree.source;
39+
charset = tree.charset;
3640
includedRanges = tree.includedRanges;
3741
}
3842

3943
MemorySegment segment() {
4044
return self;
4145
}
4246

47+
@Nullable
48+
String getRegion(@Unsigned int start, @Unsigned int end) {
49+
var length = Math.min(end, source.length) - start;
50+
return charset != null ? new String(source, start, length, charset) : null;
51+
}
52+
4353
/** Get the language that was used to parse the syntax tree. */
4454
public Language getLanguage() {
4555
return language;
4656
}
4757

4858
/** Get the source code of the syntax tree, if available. */
4959
public @Nullable String getText() {
50-
return source;
60+
return charset != null ? new String(source, charset) : null;
5161
}
5262

5363
/** Get the root node of the syntax tree. */
@@ -122,7 +132,8 @@ public void edit(InputEdit edit) {
122132
try (var alloc = Arena.ofConfined()) {
123133
ts_tree_edit(self, edit.into(alloc));
124134
} finally {
125-
source = null;
135+
source = new byte[0];
136+
charset = null;
126137
}
127138
}
128139

src/test/java/io/github/treesitter/jtreesitter/NodeTest.java

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ class NodeTest {
1313
static void beforeAll() {
1414
var language = new Language(TreeSitterJava.language());
1515
try (var parser = new Parser(language)) {
16-
tree = parser.parse("class Foo {}").orElseThrow();
16+
tree = parser.parse("class Foo {} // uni©ode").orElseThrow();
1717
node = tree.getRootNode();
1818
}
1919
}
@@ -100,13 +100,13 @@ void getStartByte() {
100100

101101
@Test
102102
void getEndByte() {
103-
assertEquals(12, node.getEndByte());
103+
assertEquals(24, node.getEndByte());
104104
}
105105

106106
@Test
107107
void getRange() {
108-
Point startPoint = new Point(0, 0), endPoint = new Point(0, 12);
109-
assertEquals(new Range(startPoint, endPoint, 0, 12), node.getRange());
108+
Point startPoint = new Point(0, 0), endPoint = new Point(0, 24);
109+
assertEquals(new Range(startPoint, endPoint, 0, 24), node.getRange());
110110
}
111111

112112
@Test
@@ -116,22 +116,22 @@ void getStartPoint() {
116116

117117
@Test
118118
void getEndPoint() {
119-
assertEquals(new Point(0, 12), node.getEndPoint());
119+
assertEquals(new Point(0, 24), node.getEndPoint());
120120
}
121121

122122
@Test
123123
void getChildCount() {
124-
assertEquals(1, node.getChildCount());
124+
assertEquals(2, node.getChildCount());
125125
}
126126

127127
@Test
128128
void getNamedChildCount() {
129-
assertEquals(1, node.getNamedChildCount());
129+
assertEquals(2, node.getNamedChildCount());
130130
}
131131

132132
@Test
133133
void getDescendantCount() {
134-
assertEquals(7, node.getDescendantCount());
134+
assertEquals(8, node.getDescendantCount());
135135
}
136136

137137
@Test
@@ -273,8 +273,8 @@ void getChildWithDescendant() {
273273

274274
@Test
275275
void getText() {
276-
var child = node.getDescendant(6, 9).orElseThrow();
277-
assertEquals("Foo", child.getText());
276+
var child = node.getChild(1).orElseThrow();
277+
assertEquals("// uni©ode", child.getText());
278278
}
279279

280280
@Test
@@ -298,7 +298,7 @@ void walk() {
298298

299299
@Test
300300
void toSexp() {
301-
var sexp = "(program (class_declaration name: (identifier) body: (class_body)))";
301+
var sexp = "(program (class_declaration name: (identifier) body: (class_body)) (line_comment))";
302302
assertEquals(sexp, node.toSexp());
303303
}
304304

0 commit comments

Comments
 (0)