Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Tree sitter improvements #237392

Merged
merged 2 commits into from
Jan 7, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions src/vs/editor/common/model/tokenizationTextModelPart.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ import { TextModelPart } from './textModelPart.js';
import { DefaultBackgroundTokenizer, TokenizerWithStateStoreAndTextModel, TrackingTokenizationStateStore } from './textModelTokens.js';
import { AbstractTokens, AttachedViewHandler, AttachedViews } from './tokens.js';
import { TreeSitterTokens } from './treeSitterTokens.js';
import { ITreeSitterParserService } from '../services/treeSitterParserService.js';
import { IModelContentChangedEvent, IModelLanguageChangedEvent, IModelLanguageConfigurationChangedEvent, IModelTokensChangedEvent } from '../textModelEvents.js';
import { BackgroundTokenizationState, ITokenizationTextModelPart } from '../tokenizationTextModelPart.js';
import { ContiguousMultilineTokens } from '../tokens/contiguousMultilineTokens.js';
Expand All @@ -32,6 +31,7 @@ import { ContiguousTokensStore } from '../tokens/contiguousTokensStore.js';
import { LineTokens } from '../tokens/lineTokens.js';
import { SparseMultilineTokens } from '../tokens/sparseMultilineTokens.js';
import { SparseTokensStore } from '../tokens/sparseTokensStore.js';
import { IInstantiationService } from '../../../platform/instantiation/common/instantiation.js';

export class TokenizationTextModelPart extends TextModelPart implements ITokenizationTextModelPart {
private readonly _semanticTokens: SparseTokensStore = new SparseTokensStore(this._languageService.languageIdCodec);
Expand All @@ -55,7 +55,7 @@ export class TokenizationTextModelPart extends TextModelPart implements ITokeniz
private readonly _attachedViews: AttachedViews,
@ILanguageService private readonly _languageService: ILanguageService,
@ILanguageConfigurationService private readonly _languageConfigurationService: ILanguageConfigurationService,
@ITreeSitterParserService private readonly _treeSitterService: ITreeSitterParserService,
@IInstantiationService private readonly _instantiationService: IInstantiationService
) {
super();

Expand All @@ -73,7 +73,7 @@ export class TokenizationTextModelPart extends TextModelPart implements ITokeniz
}

private createTreeSitterTokens(): AbstractTokens {
return this._register(new TreeSitterTokens(this._treeSitterService, this._languageService.languageIdCodec, this._textModel, () => this._languageId));
return this._register(this._instantiationService.createInstance(TreeSitterTokens, this._languageService.languageIdCodec, this._textModel, () => this._languageId));
}

private createTokens(useTreeSitter: boolean): void {
Expand Down
6 changes: 3 additions & 3 deletions src/vs/editor/common/model/treeSitterTokens.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,10 @@ export class TreeSitterTokens extends AbstractTokens {
private _lastLanguageId: string | undefined;
private readonly _tokensChangedListener: MutableDisposable<IDisposable> = this._register(new MutableDisposable());

constructor(private readonly _treeSitterService: ITreeSitterParserService,
languageIdCodec: ILanguageIdCodec,
constructor(languageIdCodec: ILanguageIdCodec,
textModel: TextModel,
languageId: () => string) {
languageId: () => string,
@ITreeSitterParserService private readonly _treeSitterService: ITreeSitterParserService) {
super(languageIdCodec, textModel, languageId);

this._initialize();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -405,7 +405,7 @@ class InspectEditorTokensWidget extends Disposable implements IContentWidget {
const tbody = dom.append(table, $('tbody'));

dom.append(tbody, $('tr', undefined,
$('td.tiw-metadata-key', undefined, 'tree-sitter token' as string),
$('td.tiw-metadata-key', undefined, `tree-sitter token ${treeSitterTokenInfo.id}` as string),
$('td.tiw-metadata-value', undefined, `${treeSitterTokenInfo.text}`)
));
const scopes = new Array<HTMLElement | string>();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -140,12 +140,12 @@ class TreeSitterTokenizationSupport extends Disposable implements ITreeSitterTok

captureAtPosition(lineNumber: number, column: number, textModel: ITextModel): Parser.QueryCapture[] {
const tree = this._getTree(textModel);
const captures = this._captureAtRange(lineNumber, new ColumnRange(column, column), tree?.tree);
const captures = this._captureAtRange(lineNumber, new ColumnRange(column, column + 1), tree?.tree);
return captures;
}

captureAtPositionTree(lineNumber: number, column: number, tree: Parser.Tree): Parser.QueryCapture[] {
const captures = this._captureAtRange(lineNumber, new ColumnRange(column, column), tree);
const captures = this._captureAtRange(lineNumber, new ColumnRange(column, column + 1), tree);
return captures;
}

Expand All @@ -156,7 +156,7 @@ class TreeSitterTokenizationSupport extends Disposable implements ITreeSitterTok
return [];
}
// Tree sitter row is 0 based, column is 0 based
return query.captures(tree.rootNode, { startPosition: { row: lineNumber - 1, column: columnRange.startColumn - 1 }, endPosition: { row: lineNumber - 1, column: columnRange.endColumnExclusive } });
return query.captures(tree.rootNode, { startPosition: { row: lineNumber - 1, column: columnRange.startColumn - 1 }, endPosition: { row: lineNumber - 1, column: columnRange.endColumnExclusive - 1 } });
}

/**
Expand All @@ -179,8 +179,16 @@ class TreeSitterTokenizationSupport extends Disposable implements ITreeSitterTok
const lineLength = textModel.getLineMaxColumn(lineNumber);
const tree = this._getTree(textModel);
const captures = this._captureAtRange(lineNumber, new ColumnRange(1, lineLength), tree?.tree);
const encodedLanguageId = this._languageIdCodec.encodeLanguageId(this._languageId);

if (captures.length === 0) {
if (tree) {
stopwatch.stop();
const result = new Uint32Array(2);
result[0] = lineLength;
result[1] = findMetadata(this._colorThemeData, [], encodedLanguageId);
return { result, captureTime: stopwatch.elapsed(), metadataTime: 0 };
}
return undefined;
}

Expand All @@ -193,7 +201,6 @@ class TreeSitterTokenizationSupport extends Disposable implements ITreeSitterTok
endOffsetsAndScopes.push({ endOffset: 0, scopes: [] });
};

const encodedLanguageId = this._languageIdCodec.encodeLanguageId(this._languageId);

for (let captureIndex = 0; captureIndex < captures.length; captureIndex++) {
const capture = captures[captureIndex];
Expand Down Expand Up @@ -225,23 +232,36 @@ class TreeSitterTokenizationSupport extends Disposable implements ITreeSitterTok
};

if (previousTokenEnd >= lineRelativeOffset) {
const previousTokenStartOffset = ((tokenIndex >= 2) ? endOffsetsAndScopes[tokenIndex - 2].endOffset : 0);
const originalPreviousTokenEndOffset = endOffsetsAndScopes[tokenIndex - 1].endOffset;

const previousTokenStartOffset = ((tokenIndex >= 2) ? endOffsetsAndScopes[tokenIndex - 2].endOffset : 0);
const loopOriginalPreviousTokenEndOffset = endOffsetsAndScopes[tokenIndex - 1].endOffset;
const previousPreviousTokenEndOffset = (tokenIndex >= 2) ? endOffsetsAndScopes[tokenIndex - 2].endOffset : 0;

// Check that the current token doesn't just replace the last token
if ((previousTokenStartOffset + currentTokenLength) === originalPreviousTokenEndOffset) {
if ((previousTokenStartOffset + currentTokenLength) === loopOriginalPreviousTokenEndOffset) {
// Current token and previous token span the exact same characters, replace the last scope
endOffsetsAndScopes[tokenIndex - 1].scopes[endOffsetsAndScopes[tokenIndex - 1].scopes.length - 1] = capture.name;
} else {
// The current token is within the previous token. Adjust the end of the previous token.
endOffsetsAndScopes[tokenIndex - 1].endOffset = intermediateTokenOffset;
} else if (previousPreviousTokenEndOffset <= intermediateTokenOffset) {
let originalPreviousTokenScopes;
// The current token is within the previous token. Adjust the end of the previous token
if (previousPreviousTokenEndOffset !== intermediateTokenOffset) {
endOffsetsAndScopes[tokenIndex - 1] = { endOffset: intermediateTokenOffset, scopes: endOffsetsAndScopes[tokenIndex - 1].scopes };
addCurrentTokenToArray();
originalPreviousTokenScopes = endOffsetsAndScopes[tokenIndex - 2].scopes;
} else {
originalPreviousTokenScopes = endOffsetsAndScopes[tokenIndex - 1].scopes;
endOffsetsAndScopes[tokenIndex - 1] = { endOffset: lineRelativeOffset, scopes: [capture.name] };
}

addCurrentTokenToArray();
// Add the rest of the previous token after the current token
increaseSizeOfTokensByOneToken();
endOffsetsAndScopes[tokenIndex].endOffset = originalPreviousTokenEndOffset;
endOffsetsAndScopes[tokenIndex].scopes = endOffsetsAndScopes[tokenIndex - 2].scopes;
tokenIndex++;
if (originalPreviousTokenEndOffset !== lineRelativeOffset) {
increaseSizeOfTokensByOneToken();
endOffsetsAndScopes[tokenIndex] = { endOffset: originalPreviousTokenEndOffset, scopes: originalPreviousTokenScopes };
tokenIndex++;
} else {
endOffsetsAndScopes[tokenIndex - 1].scopes.unshift(...originalPreviousTokenScopes);
}
}
} else {
// Just add the token to the array
Expand All @@ -250,9 +270,9 @@ class TreeSitterTokenizationSupport extends Disposable implements ITreeSitterTok
}

// Account for uncaptured characters at the end of the line
if (captures[captures.length - 1].node.endPosition.column + 1 < lineLength) {
if (endOffsetsAndScopes[tokenIndex - 1].endOffset < lineLength - 1) {
increaseSizeOfTokensByOneToken();
endOffsetsAndScopes[tokenIndex].endOffset = lineLength - 1;
endOffsetsAndScopes[tokenIndex] = { endOffset: lineLength - 1, scopes: endOffsetsAndScopes[tokenIndex].scopes };
tokenIndex++;
}
const captureTime = stopwatch.elapsed();
Expand Down
Loading