From 3aaf01d58c0edc747087dfe599c2e4ecd961f1bb Mon Sep 17 00:00:00 2001 From: Alex Ross Date: Tue, 7 Jan 2025 16:06:16 +0100 Subject: [PATCH] Tree sitter improvements (#237392) * Tree sitter improvements * Fix test --- .../common/model/tokenizationTextModelPart.ts | 6 +-- .../editor/common/model/treeSitterTokens.ts | 6 +-- .../inspectEditorTokens.ts | 2 +- .../browser/treeSitterTokenizationFeature.ts | 52 +++++++++++++------ 4 files changed, 43 insertions(+), 23 deletions(-) diff --git a/src/vs/editor/common/model/tokenizationTextModelPart.ts b/src/vs/editor/common/model/tokenizationTextModelPart.ts index f51bf1b98bcb1..704c128b9928d 100644 --- a/src/vs/editor/common/model/tokenizationTextModelPart.ts +++ b/src/vs/editor/common/model/tokenizationTextModelPart.ts @@ -23,7 +23,6 @@ import { TextModelPart } from './textModelPart.js'; import { DefaultBackgroundTokenizer, TokenizerWithStateStoreAndTextModel, TrackingTokenizationStateStore } from './textModelTokens.js'; import { AbstractTokens, AttachedViewHandler, AttachedViews } from './tokens.js'; import { TreeSitterTokens } from './treeSitterTokens.js'; -import { ITreeSitterParserService } from '../services/treeSitterParserService.js'; import { IModelContentChangedEvent, IModelLanguageChangedEvent, IModelLanguageConfigurationChangedEvent, IModelTokensChangedEvent } from '../textModelEvents.js'; import { BackgroundTokenizationState, ITokenizationTextModelPart } from '../tokenizationTextModelPart.js'; import { ContiguousMultilineTokens } from '../tokens/contiguousMultilineTokens.js'; @@ -32,6 +31,7 @@ import { ContiguousTokensStore } from '../tokens/contiguousTokensStore.js'; import { LineTokens } from '../tokens/lineTokens.js'; import { SparseMultilineTokens } from '../tokens/sparseMultilineTokens.js'; import { SparseTokensStore } from '../tokens/sparseTokensStore.js'; +import { IInstantiationService } from '../../../platform/instantiation/common/instantiation.js'; export class TokenizationTextModelPart extends TextModelPart implements ITokenizationTextModelPart { private readonly _semanticTokens: SparseTokensStore = new SparseTokensStore(this._languageService.languageIdCodec); @@ -55,7 +55,7 @@ export class TokenizationTextModelPart extends TextModelPart implements ITokeniz private readonly _attachedViews: AttachedViews, @ILanguageService private readonly _languageService: ILanguageService, @ILanguageConfigurationService private readonly _languageConfigurationService: ILanguageConfigurationService, - @ITreeSitterParserService private readonly _treeSitterService: ITreeSitterParserService, + @IInstantiationService private readonly _instantiationService: IInstantiationService ) { super(); @@ -73,7 +73,7 @@ export class TokenizationTextModelPart extends TextModelPart implements ITokeniz } private createTreeSitterTokens(): AbstractTokens { - return this._register(new TreeSitterTokens(this._treeSitterService, this._languageService.languageIdCodec, this._textModel, () => this._languageId)); + return this._register(this._instantiationService.createInstance(TreeSitterTokens, this._languageService.languageIdCodec, this._textModel, () => this._languageId)); } private createTokens(useTreeSitter: boolean): void { diff --git a/src/vs/editor/common/model/treeSitterTokens.ts b/src/vs/editor/common/model/treeSitterTokens.ts index f4077388ef088..7f8f91bb27623 100644 --- a/src/vs/editor/common/model/treeSitterTokens.ts +++ b/src/vs/editor/common/model/treeSitterTokens.ts @@ -17,10 +17,10 @@ export class TreeSitterTokens extends AbstractTokens { private _lastLanguageId: string | undefined; private readonly _tokensChangedListener: MutableDisposable = this._register(new MutableDisposable()); - constructor(private readonly _treeSitterService: ITreeSitterParserService, - languageIdCodec: ILanguageIdCodec, + constructor(languageIdCodec: ILanguageIdCodec, textModel: TextModel, - languageId: () => string) { + languageId: () => string, + @ITreeSitterParserService private readonly _treeSitterService: ITreeSitterParserService) { super(languageIdCodec, textModel, languageId); this._initialize(); diff --git a/src/vs/workbench/contrib/codeEditor/browser/inspectEditorTokens/inspectEditorTokens.ts b/src/vs/workbench/contrib/codeEditor/browser/inspectEditorTokens/inspectEditorTokens.ts index 2b0f345b3c587..4da76c3114426 100644 --- a/src/vs/workbench/contrib/codeEditor/browser/inspectEditorTokens/inspectEditorTokens.ts +++ b/src/vs/workbench/contrib/codeEditor/browser/inspectEditorTokens/inspectEditorTokens.ts @@ -405,7 +405,7 @@ class InspectEditorTokensWidget extends Disposable implements IContentWidget { const tbody = dom.append(table, $('tbody')); dom.append(tbody, $('tr', undefined, - $('td.tiw-metadata-key', undefined, 'tree-sitter token' as string), + $('td.tiw-metadata-key', undefined, `tree-sitter token ${treeSitterTokenInfo.id}` as string), $('td.tiw-metadata-value', undefined, `${treeSitterTokenInfo.text}`) )); const scopes = new Array(); diff --git a/src/vs/workbench/services/treeSitter/browser/treeSitterTokenizationFeature.ts b/src/vs/workbench/services/treeSitter/browser/treeSitterTokenizationFeature.ts index 0fa967c6b26f5..24c4a1698c5c2 100644 --- a/src/vs/workbench/services/treeSitter/browser/treeSitterTokenizationFeature.ts +++ b/src/vs/workbench/services/treeSitter/browser/treeSitterTokenizationFeature.ts @@ -140,12 +140,12 @@ class TreeSitterTokenizationSupport extends Disposable implements ITreeSitterTok captureAtPosition(lineNumber: number, column: number, textModel: ITextModel): Parser.QueryCapture[] { const tree = this._getTree(textModel); - const captures = this._captureAtRange(lineNumber, new ColumnRange(column, column), tree?.tree); + const captures = this._captureAtRange(lineNumber, new ColumnRange(column, column + 1), tree?.tree); return captures; } captureAtPositionTree(lineNumber: number, column: number, tree: Parser.Tree): Parser.QueryCapture[] { - const captures = this._captureAtRange(lineNumber, new ColumnRange(column, column), tree); + const captures = this._captureAtRange(lineNumber, new ColumnRange(column, column + 1), tree); return captures; } @@ -156,7 +156,7 @@ class TreeSitterTokenizationSupport extends Disposable implements ITreeSitterTok return []; } // Tree sitter row is 0 based, column is 0 based - return query.captures(tree.rootNode, { startPosition: { row: lineNumber - 1, column: columnRange.startColumn - 1 }, endPosition: { row: lineNumber - 1, column: columnRange.endColumnExclusive } }); + return query.captures(tree.rootNode, { startPosition: { row: lineNumber - 1, column: columnRange.startColumn - 1 }, endPosition: { row: lineNumber - 1, column: columnRange.endColumnExclusive - 1 } }); } /** @@ -179,8 +179,16 @@ class TreeSitterTokenizationSupport extends Disposable implements ITreeSitterTok const lineLength = textModel.getLineMaxColumn(lineNumber); const tree = this._getTree(textModel); const captures = this._captureAtRange(lineNumber, new ColumnRange(1, lineLength), tree?.tree); + const encodedLanguageId = this._languageIdCodec.encodeLanguageId(this._languageId); if (captures.length === 0) { + if (tree) { + stopwatch.stop(); + const result = new Uint32Array(2); + result[0] = lineLength; + result[1] = findMetadata(this._colorThemeData, [], encodedLanguageId); + return { result, captureTime: stopwatch.elapsed(), metadataTime: 0 }; + } return undefined; } @@ -193,7 +201,6 @@ class TreeSitterTokenizationSupport extends Disposable implements ITreeSitterTok endOffsetsAndScopes.push({ endOffset: 0, scopes: [] }); }; - const encodedLanguageId = this._languageIdCodec.encodeLanguageId(this._languageId); for (let captureIndex = 0; captureIndex < captures.length; captureIndex++) { const capture = captures[captureIndex]; @@ -225,23 +232,36 @@ class TreeSitterTokenizationSupport extends Disposable implements ITreeSitterTok }; if (previousTokenEnd >= lineRelativeOffset) { - const previousTokenStartOffset = ((tokenIndex >= 2) ? endOffsetsAndScopes[tokenIndex - 2].endOffset : 0); const originalPreviousTokenEndOffset = endOffsetsAndScopes[tokenIndex - 1].endOffset; + const previousTokenStartOffset = ((tokenIndex >= 2) ? endOffsetsAndScopes[tokenIndex - 2].endOffset : 0); + const loopOriginalPreviousTokenEndOffset = endOffsetsAndScopes[tokenIndex - 1].endOffset; + const previousPreviousTokenEndOffset = (tokenIndex >= 2) ? endOffsetsAndScopes[tokenIndex - 2].endOffset : 0; + // Check that the current token doesn't just replace the last token - if ((previousTokenStartOffset + currentTokenLength) === originalPreviousTokenEndOffset) { + if ((previousTokenStartOffset + currentTokenLength) === loopOriginalPreviousTokenEndOffset) { // Current token and previous token span the exact same characters, replace the last scope endOffsetsAndScopes[tokenIndex - 1].scopes[endOffsetsAndScopes[tokenIndex - 1].scopes.length - 1] = capture.name; - } else { - // The current token is within the previous token. Adjust the end of the previous token. - endOffsetsAndScopes[tokenIndex - 1].endOffset = intermediateTokenOffset; + } else if (previousPreviousTokenEndOffset <= intermediateTokenOffset) { + let originalPreviousTokenScopes; + // The current token is within the previous token. Adjust the end of the previous token + if (previousPreviousTokenEndOffset !== intermediateTokenOffset) { + endOffsetsAndScopes[tokenIndex - 1] = { endOffset: intermediateTokenOffset, scopes: endOffsetsAndScopes[tokenIndex - 1].scopes }; + addCurrentTokenToArray(); + originalPreviousTokenScopes = endOffsetsAndScopes[tokenIndex - 2].scopes; + } else { + originalPreviousTokenScopes = endOffsetsAndScopes[tokenIndex - 1].scopes; + endOffsetsAndScopes[tokenIndex - 1] = { endOffset: lineRelativeOffset, scopes: [capture.name] }; + } - addCurrentTokenToArray(); // Add the rest of the previous token after the current token - increaseSizeOfTokensByOneToken(); - endOffsetsAndScopes[tokenIndex].endOffset = originalPreviousTokenEndOffset; - endOffsetsAndScopes[tokenIndex].scopes = endOffsetsAndScopes[tokenIndex - 2].scopes; - tokenIndex++; + if (originalPreviousTokenEndOffset !== lineRelativeOffset) { + increaseSizeOfTokensByOneToken(); + endOffsetsAndScopes[tokenIndex] = { endOffset: originalPreviousTokenEndOffset, scopes: originalPreviousTokenScopes }; + tokenIndex++; + } else { + endOffsetsAndScopes[tokenIndex - 1].scopes.unshift(...originalPreviousTokenScopes); + } } } else { // Just add the token to the array @@ -250,9 +270,9 @@ class TreeSitterTokenizationSupport extends Disposable implements ITreeSitterTok } // Account for uncaptured characters at the end of the line - if (captures[captures.length - 1].node.endPosition.column + 1 < lineLength) { + if (endOffsetsAndScopes[tokenIndex - 1].endOffset < lineLength - 1) { increaseSizeOfTokensByOneToken(); - endOffsetsAndScopes[tokenIndex].endOffset = lineLength - 1; + endOffsetsAndScopes[tokenIndex] = { endOffset: lineLength - 1, scopes: endOffsetsAndScopes[tokenIndex].scopes }; tokenIndex++; } const captureTime = stopwatch.elapsed();