Skip to content

Commit 9a1c021

Browse files
Introduce tokenizing options for full and partial mode (#1669)
Add tokenizing mode to tokenizing method - Full: We get the full text to tokenize - Partial: We get only a portion of the text to tokenize In indentation lexing, we do not auto-complete dedents for partial mode
1 parent 51d99a6 commit 9a1c021

File tree

7 files changed

+84
-36
lines changed

7 files changed

+84
-36
lines changed

packages/langium/src/parser/indentation-aware.ts

Lines changed: 20 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,11 @@
77
import type { CustomPatternMatcherFunc, TokenType, IToken, IMultiModeLexerDefinition, TokenVocabulary } from 'chevrotain';
88
import type { Grammar, TerminalRule } from '../languages/generated/ast.js';
99
import type { LexingReport, TokenBuilderOptions } from './token-builder.js';
10-
import type { LexerResult } from './lexer.js';
10+
import type { LexerResult, TokenizeOptions } from './lexer.js';
1111
import type { LangiumCoreServices } from '../services.js';
1212
import { createToken, createTokenInstance, Lexer } from 'chevrotain';
1313
import { DefaultTokenBuilder } from './token-builder.js';
14-
import { DefaultLexer, isTokenTypeArray } from './lexer.js';
14+
import { DEFAULT_TOKENIZE_OPTIONS, DefaultLexer, isTokenTypeArray } from './lexer.js';
1515

1616
type IndentationAwareDelimiter<TokenName extends string> = [begin: TokenName, end: TokenName];
1717

@@ -179,11 +179,11 @@ export class IndentationAwareTokenBuilder<Terminals extends string = string, Key
179179
}
180180
}
181181

182-
override popLexingReport(text: string): IndentationLexingReport {
183-
const result = super.popLexingReport(text);
182+
override flushLexingReport(text: string): IndentationLexingReport {
183+
const result = super.flushLexingReport(text);
184184
return {
185185
...result,
186-
remainingDedents: this.popRemainingDedents(text),
186+
remainingDedents: this.flushRemainingDedents(text),
187187
};
188188
}
189189

@@ -203,9 +203,12 @@ export class IndentationAwareTokenBuilder<Terminals extends string = string, Key
203203
*
204204
* @param text The full input string.
205205
* @param offset The current position at which to attempt a match
206+
* @param tokens Previously scanned tokens
207+
* @param groups Token Groups
206208
* @returns The current and previous indentation levels and the matched whitespace
207209
*/
208-
protected matchWhitespace(text: string, offset: number, _tokens: IToken[], _groups: Record<string, IToken[]>): { currIndentLevel: number, prevIndentLevel: number, match: RegExpExecArray | null } {
210+
// eslint-disable-next-line @typescript-eslint/no-unused-vars
211+
protected matchWhitespace(text: string, offset: number, tokens: IToken[], groups: Record<string, IToken[]>): { currIndentLevel: number, prevIndentLevel: number, match: RegExpExecArray | null } {
209212
this.whitespaceRegExp.lastIndex = offset;
210213
const match = this.whitespaceRegExp.exec(text);
211214
return {
@@ -251,12 +254,10 @@ export class IndentationAwareTokenBuilder<Terminals extends string = string, Key
251254
*
252255
* @param text The full input string.
253256
* @param offset The offset at which to attempt a match
254-
* @param tokens Previously scanned Tokens
257+
* @param tokens Previously scanned tokens
255258
* @param groups Token Groups
256259
*/
257260
protected indentMatcher(text: string, offset: number, tokens: IToken[], groups: Record<string, IToken[]>): ReturnType<CustomPatternMatcherFunc> {
258-
const { indentTokenName } = this.options;
259-
260261
if (!this.isStartOfLine(text, offset)) {
261262
return null;
262263
}
@@ -274,7 +275,7 @@ export class IndentationAwareTokenBuilder<Terminals extends string = string, Key
274275
const indentToken = this.createIndentationTokenInstance(
275276
this.indentTokenType,
276277
text,
277-
match?.[0] ?? indentTokenName,
278+
match?.[0] ?? '',
278279
offset,
279280
);
280281
tokens.push(indentToken);
@@ -288,12 +289,10 @@ export class IndentationAwareTokenBuilder<Terminals extends string = string, Key
288289
*
289290
* @param text The full input string.
290291
* @param offset The offset at which to attempt a match
291-
* @param tokens Previously scanned Tokens
292+
* @param tokens Previously scanned tokens
292293
* @param groups Token Groups
293294
*/
294295
protected dedentMatcher(text: string, offset: number, tokens: IToken[], groups: Record<string, IToken[]>): ReturnType<CustomPatternMatcherFunc> {
295-
const { dedentTokenName } = this.options;
296-
297296
if (!this.isStartOfLine(text, offset)) {
298297
return null;
299298
}
@@ -316,7 +315,7 @@ export class IndentationAwareTokenBuilder<Terminals extends string = string, Key
316315
offset,
317316
length: match?.[0]?.length ?? 0,
318317
line: this.getLineNumber(text, offset),
319-
column: 0
318+
column: 1
320319
});
321320
return null;
322321
}
@@ -327,7 +326,7 @@ export class IndentationAwareTokenBuilder<Terminals extends string = string, Key
327326
const token = this.createIndentationTokenInstance(
328327
this.dedentTokenType,
329328
text,
330-
match?.[0] ?? dedentTokenName,
329+
match?.[0] ?? '',
331330
offset,
332331
);
333332
tokens.push(token);
@@ -362,7 +361,7 @@ export class IndentationAwareTokenBuilder<Terminals extends string = string, Key
362361
* @param text Full text that was tokenized
363362
* @returns Remaining dedent tokens to match all previous indents at the end of the file
364363
*/
365-
popRemainingDedents(text: string): IToken[] {
364+
flushRemainingDedents(text: string): IToken[] {
366365
const remainingDedents: IToken[] = [];
367366
while (this.indentationStack.length > 1) {
368367
remainingDedents.push(
@@ -402,13 +401,15 @@ export class IndentationAwareLexer extends DefaultLexer {
402401
}
403402
}
404403

405-
override tokenize(text: string): LexerResult {
404+
override tokenize(text: string, options: TokenizeOptions = DEFAULT_TOKENIZE_OPTIONS): LexerResult {
406405
const result = super.tokenize(text);
407406

408407
// consuming all remaining dedents and remove them as they might not be serializable
409408
const report = result.report as IndentationLexingReport;
410-
const remainingDedents = report.remainingDedents;
411-
result.tokens.push(...remainingDedents);
409+
if (options?.mode === 'full') {
410+
// auto-complete document with remaining dedents
411+
result.tokens.push(...report.remainingDedents);
412+
}
412413
report.remainingDedents = [];
413414

414415
// remove any "indent-dedent" pair with an empty body as these are typically

packages/langium/src/parser/langium-parser.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -527,7 +527,7 @@ export class LangiumCompletionParser extends AbstractLangiumParser {
527527

528528
parse(input: string): CompletionParserResult {
529529
this.resetState();
530-
const tokens = this.lexer.tokenize(input);
530+
const tokens = this.lexer.tokenize(input, { mode: 'partial' });
531531
this.tokens = tokens.tokens;
532532
this.wrapper.input = [...this.tokens];
533533
this.mainRule.call(this.wrapper, {});

packages/langium/src/parser/lexer.ts

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,17 @@ export interface LexerResult {
2525
report?: LexingReport;
2626
}
2727

28+
export type TokenizeMode = 'full' | 'partial';
29+
30+
export interface TokenizeOptions {
31+
mode?: TokenizeMode;
32+
}
33+
34+
export const DEFAULT_TOKENIZE_OPTIONS: TokenizeOptions = { mode: 'full' };
35+
2836
export interface Lexer {
2937
readonly definition: TokenTypeDictionary;
30-
tokenize(text: string): LexerResult;
38+
tokenize(text: string, options?: TokenizeOptions): LexerResult;
3139
}
3240

3341
export class DefaultLexer implements Lexer {
@@ -36,7 +44,7 @@ export class DefaultLexer implements Lexer {
3644
protected tokenBuilder: TokenBuilder;
3745
protected tokenTypes: TokenTypeDictionary;
3846

39-
constructor( services: LangiumCoreServices) {
47+
constructor(services: LangiumCoreServices) {
4048
this.tokenBuilder = services.parser.TokenBuilder;
4149
const tokens = this.tokenBuilder.buildTokens(services.Grammar, {
4250
caseInsensitive: services.LanguageMetaData.caseInsensitive
@@ -52,13 +60,13 @@ export class DefaultLexer implements Lexer {
5260
return this.tokenTypes;
5361
}
5462

55-
tokenize(text: string): LexerResult {
63+
tokenize(text: string, _options: TokenizeOptions = DEFAULT_TOKENIZE_OPTIONS): LexerResult {
5664
const chevrotainResult = this.chevrotainLexer.tokenize(text);
5765
return {
5866
tokens: chevrotainResult.tokens,
5967
errors: chevrotainResult.errors,
6068
hidden: chevrotainResult.groups.hidden ?? [],
61-
report: this.tokenBuilder.popLexingReport?.(text)
69+
report: this.tokenBuilder.flushLexingReport?.(text)
6270
};
6371
}
6472

packages/langium/src/parser/token-builder.ts

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ export interface TokenBuilder {
2525
*
2626
* @param text The text that was tokenized.
2727
*/
28-
popLexingReport?(text: string): LexingReport;
28+
flushLexingReport?(text: string): LexingReport;
2929
}
3030

3131
/**
@@ -36,8 +36,10 @@ export interface LexingReport {
3636
diagnostics: LexingDiagnostic[];
3737
}
3838

39+
export type LexingDiagnosticSeverity = 'error' | 'warning' | 'info' | 'hint';
40+
3941
export interface LexingDiagnostic extends ILexingError {
40-
severity?: 'error' | 'warning' | 'info' | 'hint';
42+
severity?: LexingDiagnosticSeverity;
4143
}
4244

4345
export class DefaultTokenBuilder implements TokenBuilder {
@@ -64,7 +66,8 @@ export class DefaultTokenBuilder implements TokenBuilder {
6466
return tokens;
6567
}
6668

67-
popLexingReport(_text: string): LexingReport {
69+
// eslint-disable-next-line @typescript-eslint/no-unused-vars
70+
flushLexingReport(text: string): LexingReport {
6871
return { diagnostics: this.popDiagnostics() };
6972
}
7073

packages/langium/src/validation/document-validator.ts

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -11,14 +11,14 @@ import type { ParseResult } from '../parser/langium-parser.js';
1111
import type { LangiumCoreServices } from '../services.js';
1212
import type { AstNode, CstNode } from '../syntax-tree.js';
1313
import type { LangiumDocument } from '../workspace/documents.js';
14-
import type { DiagnosticData, DiagnosticInfo, ValidationAcceptor, ValidationCategory, ValidationRegistry } from './validation-registry.js';
14+
import type { DiagnosticData, DiagnosticInfo, ValidationAcceptor, ValidationCategory, ValidationRegistry, ValidationSeverity } from './validation-registry.js';
1515
import { CancellationToken } from '../utils/cancellation.js';
1616
import { findNodeForKeyword, findNodeForProperty } from '../utils/grammar-utils.js';
1717
import { streamAst } from '../utils/ast-utils.js';
1818
import { tokenToRange } from '../utils/cst-utils.js';
1919
import { interruptAndCheck, isOperationCancelled } from '../utils/promise-utils.js';
2020
import { diagnosticData } from './validation-registry.js';
21-
import type { LexingDiagnostic } from '../parser/token-builder.js';
21+
import type { LexingDiagnostic, LexingDiagnosticSeverity } from '../parser/token-builder.js';
2222

2323
export interface ValidationOptions {
2424
/**
@@ -100,7 +100,7 @@ export class DefaultDocumentValidator implements DocumentValidator {
100100
protected processLexingErrors(parseResult: ParseResult, diagnostics: Diagnostic[], _options: ValidationOptions): void {
101101
const lexerDiagnostics = [...parseResult.lexerErrors, ...parseResult.lexerReport?.diagnostics ?? []] as LexingDiagnostic[];
102102
for (const lexerDiagnostic of lexerDiagnostics) {
103-
const severity = lexerDiagnostic?.severity ?? 'error';
103+
const severity = lexerDiagnostic.severity ?? 'error';
104104
const diagnostic: Diagnostic = {
105105
severity: toDiagnosticSeverity(severity),
106106
range: {
@@ -180,7 +180,7 @@ export class DefaultDocumentValidator implements DocumentValidator {
180180

181181
protected async validateAst(rootNode: AstNode, options: ValidationOptions, cancelToken = CancellationToken.None): Promise<Diagnostic[]> {
182182
const validationItems: Diagnostic[] = [];
183-
const acceptor: ValidationAcceptor = <N extends AstNode>(severity: 'error' | 'warning' | 'info' | 'hint', message: string, info: DiagnosticInfo<N>) => {
183+
const acceptor: ValidationAcceptor = <N extends AstNode>(severity: ValidationSeverity, message: string, info: DiagnosticInfo<N>) => {
184184
validationItems.push(this.toDiagnostic(severity, message, info));
185185
};
186186

@@ -194,7 +194,7 @@ export class DefaultDocumentValidator implements DocumentValidator {
194194
return validationItems;
195195
}
196196

197-
protected toDiagnostic<N extends AstNode>(severity: 'error' | 'warning' | 'info' | 'hint', message: string, info: DiagnosticInfo<N, string>): Diagnostic {
197+
protected toDiagnostic<N extends AstNode>(severity: ValidationSeverity, message: string, info: DiagnosticInfo<N, string>): Diagnostic {
198198
return {
199199
message,
200200
range: getDiagnosticRange(info),
@@ -233,7 +233,7 @@ export function getDiagnosticRange<N extends AstNode>(info: DiagnosticInfo<N, st
233233
return cstNode.range;
234234
}
235235

236-
export function toDiagnosticSeverity(severity: 'error' | 'warning' | 'info' | 'hint'): DiagnosticSeverity {
236+
export function toDiagnosticSeverity(severity: LexingDiagnosticSeverity): DiagnosticSeverity {
237237
switch (severity) {
238238
case 'error':
239239
return 1; // according to vscode-languageserver-types/lib/esm/main.js#DiagnosticSeverity.Error
@@ -248,7 +248,7 @@ export function toDiagnosticSeverity(severity: 'error' | 'warning' | 'info' | 'h
248248
}
249249
}
250250

251-
export function toDiagnosticData(severity: 'error' | 'warning' | 'info' | 'hint'): DiagnosticData {
251+
export function toDiagnosticData(severity: LexingDiagnosticSeverity): DiagnosticData {
252252
switch (severity) {
253253
case 'error':
254254
return diagnosticData(DocumentValidator.LexingError);

packages/langium/src/validation/validation-registry.ts

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,9 @@ export function diagnosticData(code: string): DiagnosticData {
5757
return { code };
5858
}
5959

60-
export type ValidationAcceptor = <N extends AstNode>(severity: 'error' | 'warning' | 'info' | 'hint', message: string, info: DiagnosticInfo<N>) => void
60+
export type ValidationSeverity = 'error' | 'warning' | 'info' | 'hint';
61+
62+
export type ValidationAcceptor = <N extends AstNode>(severity: ValidationSeverity, message: string, info: DiagnosticInfo<N>) => void
6163

6264
export type ValidationCheck<T extends AstNode = AstNode> = (node: T, accept: ValidationAcceptor, cancelToken: CancellationToken) => MaybePromise<void>;
6365

packages/langium/test/parser/indentation-aware.test.ts

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ import { EmptyFileSystem, IndentationAwareLexer, IndentationAwareTokenBuilder }
1111
import { createLangiumGrammarServices, createServicesForGrammar } from 'langium/grammar';
1212
import type { LangiumServices, PartialLangiumServices } from 'langium/lsp';
1313
import { expandToString } from 'langium/generate';
14-
import { parseHelper } from 'langium/test';
14+
import { expectCompletion, parseHelper } from 'langium/test';
1515
import type { IMultiModeLexerDefinition } from 'chevrotain';
1616

1717
const grammarServices = createLangiumGrammarServices(EmptyFileSystem).grammar;
@@ -193,6 +193,18 @@ describe('IndentationAwareLexer', () => {
193193
expect(dedent.tokenType.name).toBe('DEDENT');
194194
});
195195

196+
test('should NOT add remaining dedents to the end if partial tokenizing', async () => {
197+
const lexer = await getLexer(sampleGrammar);
198+
const { tokens } = lexer.tokenize(expandToString`
199+
// single-line comment
200+
{
201+
name`, { mode: 'partial' });
202+
expect(tokens).toHaveLength(3);
203+
204+
const [/* L_BRAC */, indent, /* id */] = tokens;
205+
expect(indent.tokenType.name).toBe('INDENT');
206+
});
207+
196208
test('should not return any tokens for empty input', async () => {
197209
const lexer = await getLexer(sampleGrammar);
198210
const { tokens } = lexer.tokenize('');
@@ -389,6 +401,28 @@ describe('IndentationAware parsing', () => {
389401
expect(return2.value).toBe(true);
390402
});
391403

404+
test.fails('should offer correct auto-completion parsing', async () => {
405+
const text = expandToString`
406+
<|>if true:
407+
<|>return true
408+
<|>else:
409+
<|>if false:
410+
<|>return true
411+
<|>return false
412+
<|>return true
413+
`;
414+
415+
const services = await createIndentationAwareServices(sampleGrammar);
416+
const completion = expectCompletion(services);
417+
await completion({ text, index: 0, expectedItems: ['if', 'return'] });
418+
// PR 1669: the lines below currently fail as the completion provider may wrongly assumes that all whitespace tokens are hidden
419+
await completion({ text, index: 1, expectedItems: ['if', 'return'] });
420+
await completion({ text, index: 2, expectedItems: ['else'] });
421+
await completion({ text, index: 3, expectedItems: ['if', 'return'] });
422+
await completion({ text, index: 4, expectedItems: ['if', 'return'] });
423+
await completion({ text, index: 5, expectedItems: ['if', 'return'] });
424+
await completion({ text, index: 6, expectedItems: ['if', 'return'] });
425+
});
392426
});
393427

394428
type Statement = If | Return;

0 commit comments

Comments
 (0)