Skip to content

Commit

Permalink
Tree sitter improvements (#237392)
Browse files Browse the repository at this point in the history
* Tree sitter improvements

* Fix test
  • Loading branch information
alexr00 authored Jan 7, 2025
1 parent 7b7c63c commit 3aaf01d
Show file tree
Hide file tree
Showing 4 changed files with 43 additions and 23 deletions.
6 changes: 3 additions & 3 deletions src/vs/editor/common/model/tokenizationTextModelPart.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ import { TextModelPart } from './textModelPart.js';
import { DefaultBackgroundTokenizer, TokenizerWithStateStoreAndTextModel, TrackingTokenizationStateStore } from './textModelTokens.js';
import { AbstractTokens, AttachedViewHandler, AttachedViews } from './tokens.js';
import { TreeSitterTokens } from './treeSitterTokens.js';
import { ITreeSitterParserService } from '../services/treeSitterParserService.js';
import { IModelContentChangedEvent, IModelLanguageChangedEvent, IModelLanguageConfigurationChangedEvent, IModelTokensChangedEvent } from '../textModelEvents.js';
import { BackgroundTokenizationState, ITokenizationTextModelPart } from '../tokenizationTextModelPart.js';
import { ContiguousMultilineTokens } from '../tokens/contiguousMultilineTokens.js';
Expand All @@ -32,6 +31,7 @@ import { ContiguousTokensStore } from '../tokens/contiguousTokensStore.js';
import { LineTokens } from '../tokens/lineTokens.js';
import { SparseMultilineTokens } from '../tokens/sparseMultilineTokens.js';
import { SparseTokensStore } from '../tokens/sparseTokensStore.js';
import { IInstantiationService } from '../../../platform/instantiation/common/instantiation.js';

export class TokenizationTextModelPart extends TextModelPart implements ITokenizationTextModelPart {
private readonly _semanticTokens: SparseTokensStore = new SparseTokensStore(this._languageService.languageIdCodec);
Expand All @@ -55,7 +55,7 @@ export class TokenizationTextModelPart extends TextModelPart implements ITokeniz
private readonly _attachedViews: AttachedViews,
@ILanguageService private readonly _languageService: ILanguageService,
@ILanguageConfigurationService private readonly _languageConfigurationService: ILanguageConfigurationService,
@ITreeSitterParserService private readonly _treeSitterService: ITreeSitterParserService,
@IInstantiationService private readonly _instantiationService: IInstantiationService
) {
super();

Expand All @@ -73,7 +73,7 @@ export class TokenizationTextModelPart extends TextModelPart implements ITokeniz
}

private createTreeSitterTokens(): AbstractTokens {
return this._register(new TreeSitterTokens(this._treeSitterService, this._languageService.languageIdCodec, this._textModel, () => this._languageId));
return this._register(this._instantiationService.createInstance(TreeSitterTokens, this._languageService.languageIdCodec, this._textModel, () => this._languageId));
}

private createTokens(useTreeSitter: boolean): void {
Expand Down
6 changes: 3 additions & 3 deletions src/vs/editor/common/model/treeSitterTokens.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,10 @@ export class TreeSitterTokens extends AbstractTokens {
private _lastLanguageId: string | undefined;
private readonly _tokensChangedListener: MutableDisposable<IDisposable> = this._register(new MutableDisposable());

constructor(private readonly _treeSitterService: ITreeSitterParserService,
languageIdCodec: ILanguageIdCodec,
constructor(languageIdCodec: ILanguageIdCodec,
textModel: TextModel,
languageId: () => string) {
languageId: () => string,
@ITreeSitterParserService private readonly _treeSitterService: ITreeSitterParserService) {
super(languageIdCodec, textModel, languageId);

this._initialize();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -405,7 +405,7 @@ class InspectEditorTokensWidget extends Disposable implements IContentWidget {
const tbody = dom.append(table, $('tbody'));

dom.append(tbody, $('tr', undefined,
$('td.tiw-metadata-key', undefined, 'tree-sitter token' as string),
$('td.tiw-metadata-key', undefined, `tree-sitter token ${treeSitterTokenInfo.id}` as string),
$('td.tiw-metadata-value', undefined, `${treeSitterTokenInfo.text}`)
));
const scopes = new Array<HTMLElement | string>();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -140,12 +140,12 @@ class TreeSitterTokenizationSupport extends Disposable implements ITreeSitterTok

captureAtPosition(lineNumber: number, column: number, textModel: ITextModel): Parser.QueryCapture[] {
const tree = this._getTree(textModel);
const captures = this._captureAtRange(lineNumber, new ColumnRange(column, column), tree?.tree);
const captures = this._captureAtRange(lineNumber, new ColumnRange(column, column + 1), tree?.tree);
return captures;
}

captureAtPositionTree(lineNumber: number, column: number, tree: Parser.Tree): Parser.QueryCapture[] {
const captures = this._captureAtRange(lineNumber, new ColumnRange(column, column), tree);
const captures = this._captureAtRange(lineNumber, new ColumnRange(column, column + 1), tree);
return captures;
}

Expand All @@ -156,7 +156,7 @@ class TreeSitterTokenizationSupport extends Disposable implements ITreeSitterTok
return [];
}
// Tree sitter row is 0 based, column is 0 based
return query.captures(tree.rootNode, { startPosition: { row: lineNumber - 1, column: columnRange.startColumn - 1 }, endPosition: { row: lineNumber - 1, column: columnRange.endColumnExclusive } });
return query.captures(tree.rootNode, { startPosition: { row: lineNumber - 1, column: columnRange.startColumn - 1 }, endPosition: { row: lineNumber - 1, column: columnRange.endColumnExclusive - 1 } });
}

/**
Expand All @@ -179,8 +179,16 @@ class TreeSitterTokenizationSupport extends Disposable implements ITreeSitterTok
const lineLength = textModel.getLineMaxColumn(lineNumber);
const tree = this._getTree(textModel);
const captures = this._captureAtRange(lineNumber, new ColumnRange(1, lineLength), tree?.tree);
const encodedLanguageId = this._languageIdCodec.encodeLanguageId(this._languageId);

if (captures.length === 0) {
if (tree) {
stopwatch.stop();
const result = new Uint32Array(2);
result[0] = lineLength;
result[1] = findMetadata(this._colorThemeData, [], encodedLanguageId);
return { result, captureTime: stopwatch.elapsed(), metadataTime: 0 };
}
return undefined;
}

Expand All @@ -193,7 +201,6 @@ class TreeSitterTokenizationSupport extends Disposable implements ITreeSitterTok
endOffsetsAndScopes.push({ endOffset: 0, scopes: [] });
};

const encodedLanguageId = this._languageIdCodec.encodeLanguageId(this._languageId);

for (let captureIndex = 0; captureIndex < captures.length; captureIndex++) {
const capture = captures[captureIndex];
Expand Down Expand Up @@ -225,23 +232,36 @@ class TreeSitterTokenizationSupport extends Disposable implements ITreeSitterTok
};

if (previousTokenEnd >= lineRelativeOffset) {
const previousTokenStartOffset = ((tokenIndex >= 2) ? endOffsetsAndScopes[tokenIndex - 2].endOffset : 0);
const originalPreviousTokenEndOffset = endOffsetsAndScopes[tokenIndex - 1].endOffset;

const previousTokenStartOffset = ((tokenIndex >= 2) ? endOffsetsAndScopes[tokenIndex - 2].endOffset : 0);
const loopOriginalPreviousTokenEndOffset = endOffsetsAndScopes[tokenIndex - 1].endOffset;
const previousPreviousTokenEndOffset = (tokenIndex >= 2) ? endOffsetsAndScopes[tokenIndex - 2].endOffset : 0;

// Check that the current token doesn't just replace the last token
if ((previousTokenStartOffset + currentTokenLength) === originalPreviousTokenEndOffset) {
if ((previousTokenStartOffset + currentTokenLength) === loopOriginalPreviousTokenEndOffset) {
// Current token and previous token span the exact same characters, replace the last scope
endOffsetsAndScopes[tokenIndex - 1].scopes[endOffsetsAndScopes[tokenIndex - 1].scopes.length - 1] = capture.name;
} else {
// The current token is within the previous token. Adjust the end of the previous token.
endOffsetsAndScopes[tokenIndex - 1].endOffset = intermediateTokenOffset;
} else if (previousPreviousTokenEndOffset <= intermediateTokenOffset) {
let originalPreviousTokenScopes;
// The current token is within the previous token. Adjust the end of the previous token
if (previousPreviousTokenEndOffset !== intermediateTokenOffset) {
endOffsetsAndScopes[tokenIndex - 1] = { endOffset: intermediateTokenOffset, scopes: endOffsetsAndScopes[tokenIndex - 1].scopes };
addCurrentTokenToArray();
originalPreviousTokenScopes = endOffsetsAndScopes[tokenIndex - 2].scopes;
} else {
originalPreviousTokenScopes = endOffsetsAndScopes[tokenIndex - 1].scopes;
endOffsetsAndScopes[tokenIndex - 1] = { endOffset: lineRelativeOffset, scopes: [capture.name] };
}

addCurrentTokenToArray();
// Add the rest of the previous token after the current token
increaseSizeOfTokensByOneToken();
endOffsetsAndScopes[tokenIndex].endOffset = originalPreviousTokenEndOffset;
endOffsetsAndScopes[tokenIndex].scopes = endOffsetsAndScopes[tokenIndex - 2].scopes;
tokenIndex++;
if (originalPreviousTokenEndOffset !== lineRelativeOffset) {
increaseSizeOfTokensByOneToken();
endOffsetsAndScopes[tokenIndex] = { endOffset: originalPreviousTokenEndOffset, scopes: originalPreviousTokenScopes };
tokenIndex++;
} else {
endOffsetsAndScopes[tokenIndex - 1].scopes.unshift(...originalPreviousTokenScopes);
}
}
} else {
// Just add the token to the array
Expand All @@ -250,9 +270,9 @@ class TreeSitterTokenizationSupport extends Disposable implements ITreeSitterTok
}

// Account for uncaptured characters at the end of the line
if (captures[captures.length - 1].node.endPosition.column + 1 < lineLength) {
if (endOffsetsAndScopes[tokenIndex - 1].endOffset < lineLength - 1) {
increaseSizeOfTokensByOneToken();
endOffsetsAndScopes[tokenIndex].endOffset = lineLength - 1;
endOffsetsAndScopes[tokenIndex] = { endOffset: lineLength - 1, scopes: endOffsetsAndScopes[tokenIndex].scopes };
tokenIndex++;
}
const captureTime = stopwatch.elapsed();
Expand Down

0 comments on commit 3aaf01d

Please sign in to comment.