From df297c7128b70f6925a1539f5bafa31e83dab712 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 8 Dec 2025 11:18:41 +0000 Subject: [PATCH 1/4] Initial plan From d2ebb419f072e2de71253003dddd349cc052202e Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 8 Dec 2025 11:30:39 +0000 Subject: [PATCH 2/4] Add scope comments to IToken interface and track through the stack Co-authored-by: alexr00 <38270282+alexr00@users.noreply.github.com> --- src/grammar/grammar.ts | 22 +++++++++++++------- src/grammar/tokenizeString.ts | 18 ++++++++++------ src/main.ts | 1 + src/rawGrammar.ts | 1 + src/rule.ts | 39 ++++++++++++++++++++++------------- src/theme.ts | 28 ++++++++++++++++++------- 6 files changed, 75 insertions(+), 34 deletions(-) diff --git a/src/grammar/grammar.ts b/src/grammar/grammar.ts index bc324b6..b200fb6 100644 --- a/src/grammar/grammar.ts +++ b/src/grammar/grammar.ts @@ -433,12 +433,12 @@ export class AttributedScopeStack { } public static createRoot(scopeName: ScopeName, tokenAttributes: EncodedTokenAttributes): AttributedScopeStack { - return new AttributedScopeStack(null, new ScopeStack(null, scopeName), tokenAttributes, null); + return new AttributedScopeStack(null, new ScopeStack(null, scopeName, null), tokenAttributes, null); } public static createRootAndLookUpScopeName(scopeName: ScopeName, tokenAttributes: EncodedTokenAttributes, grammar: Grammar): AttributedScopeStack { const rawRootMetadata = grammar.getMetadataForScope(scopeName); - const scopePath = new ScopeStack(null, scopeName); + const scopePath = new ScopeStack(null, scopeName, null); const rootStyle = grammar.themeProvider.themeMatch(scopePath); const resolvedTokenAttributes = AttributedScopeStack.mergeAttributes( @@ -531,7 +531,7 @@ export class AttributedScopeStack { ); } - public pushAttributed(scopePath: ScopePath | null, grammar: Grammar): AttributedScopeStack { + public pushAttributed(scopePath: ScopePath | null, grammar: Grammar, scopeComment?: string | null): AttributedScopeStack { if (scopePath === null) { return this; } @@ -539,13 +539,14 @@ export class AttributedScopeStack { if (scopePath.indexOf(' ') === -1) { // This is the common case and much faster - return AttributedScopeStack._pushAttributed(this, scopePath, grammar); + return AttributedScopeStack._pushAttributed(this, scopePath, grammar, scopeComment); } const scopes = scopePath.split(/ /g); let result: AttributedScopeStack = this; for (const scope of scopes) { - result = AttributedScopeStack._pushAttributed(result, scope, grammar); + // For multi-scope pushes, only the first scope gets the comment + result = AttributedScopeStack._pushAttributed(result, scope, grammar, result === this ? scopeComment : null); } return result; @@ -555,10 +556,11 @@ export class AttributedScopeStack { target: AttributedScopeStack, scopeName: ScopeName, grammar: Grammar, + scopeComment?: string | null ): AttributedScopeStack { const rawMetadata = grammar.getMetadataForScope(scopeName); - const newPath = target.scopePath.push(scopeName); + const newPath = target.scopePath.push(scopeName, scopeComment || null); const scopeThemeMatchResult = grammar.themeProvider.themeMatch(newPath); const metadata = AttributedScopeStack.mergeAttributes( @@ -573,6 +575,10 @@ export class AttributedScopeStack { return this.scopePath.getSegments(); } + public getScopeComments(): (string | null)[] { + return this.scopePath.getComments(); + } + public getExtensionIfDefined(base: AttributedScopeStack | null): AttributedScopeStackFrame[] | undefined { const result: AttributedScopeStackFrame[] = []; let self: AttributedScopeStack | null = this; @@ -1063,6 +1069,7 @@ export class LineTokens { } const scopes = scopesList?.getScopeNames() ?? []; + const scopeComments = scopesList?.getScopeComments() ?? []; if (DebugFlags.InDebugMode) { console.log(' token: |' + this._lineText!.substring(this._lastTokenEndIndex, endIndex).replace(/\n$/, '\\n') + '|'); @@ -1075,7 +1082,8 @@ export class LineTokens { startIndex: this._lastTokenEndIndex, endIndex: endIndex, // value: lineText.substring(lastTokenEndIndex, endIndex), - scopes: scopes + scopes: scopes, + scopeComments: scopeComments }); this._lastTokenEndIndex = endIndex; diff --git a/src/grammar/tokenizeString.ts b/src/grammar/tokenizeString.ts index 6a7f26a..09efd28 100644 --- a/src/grammar/tokenizeString.ts +++ b/src/grammar/tokenizeString.ts @@ -170,9 +170,11 @@ class TokenizeStringResult { const beforePush = stack; // push it on the stack rule const scopeName = _rule.getName(lineText.content, captureIndices); + const scopeComment = _rule.getComment(); const nameScopesList = stack.contentNameScopesList!.pushAttributed( scopeName, - grammar + grammar, + scopeComment ); stack = stack.push( matchedRuleId, @@ -214,7 +216,8 @@ class TokenizeStringResult { ); const contentNameScopesList = nameScopesList.pushAttributed( contentName, - grammar + grammar, + null // contentName doesn't have a separate comment ); stack = stack.withContentNameScopesList(contentNameScopesList); @@ -263,7 +266,8 @@ class TokenizeStringResult { ); const contentNameScopesList = nameScopesList.pushAttributed( contentName, - grammar + grammar, + null // contentName doesn't have a separate comment ); stack = stack.withContentNameScopesList(contentNameScopesList); @@ -626,9 +630,10 @@ function handleCaptures(grammar: Grammar, lineText: OnigString, isFirstLine: boo if (captureRule.retokenizeCapturedWithRuleId) { // the capture requires additional matching const scopeName = captureRule.getName(lineTextContent, captureIndices); - const nameScopesList = stack.contentNameScopesList!.pushAttributed(scopeName, grammar); + const scopeComment = captureRule.getComment(); + const nameScopesList = stack.contentNameScopesList!.pushAttributed(scopeName, grammar, scopeComment); const contentName = captureRule.getContentName(lineTextContent, captureIndices); - const contentNameScopesList = nameScopesList.pushAttributed(contentName, grammar); + const contentNameScopesList = nameScopesList.pushAttributed(contentName, grammar, null); const stackClone = stack.push(captureRule.retokenizeCapturedWithRuleId, captureIndex.start, -1, false, null, nameScopesList, contentNameScopesList); const onigSubStr = grammar.createOnigString(lineTextContent.substring(0, captureIndex.end)); @@ -641,7 +646,8 @@ function handleCaptures(grammar: Grammar, lineText: OnigString, isFirstLine: boo if (captureRuleScopeName !== null) { // push const base = localStack.length > 0 ? localStack[localStack.length - 1].scopes : stack.contentNameScopesList; - const captureRuleScopesList = base!.pushAttributed(captureRuleScopeName, grammar); + const captureRuleScopeComment = captureRule.getComment(); + const captureRuleScopesList = base!.pushAttributed(captureRuleScopeName, grammar, captureRuleScopeComment); localStack.push(new LocalStackElement(captureRuleScopesList, captureIndex.end)); } } diff --git a/src/main.ts b/src/main.ts index 6065917..099c9b0 100644 --- a/src/main.ts +++ b/src/main.ts @@ -285,6 +285,7 @@ export interface IToken { startIndex: number; readonly endIndex: number; readonly scopes: string[]; + readonly scopeComments: (string | null)[]; } /** diff --git a/src/rawGrammar.ts b/src/rawGrammar.ts index d0e0d6e..ac8a065 100644 --- a/src/rawGrammar.ts +++ b/src/rawGrammar.ts @@ -43,6 +43,7 @@ export interface IRawRule extends ILocatable { readonly name?: ScopeName; readonly contentName?: ScopeName; + readonly comment?: string; readonly match?: RegExpString; readonly captures?: IRawCaptures; diff --git a/src/rule.ts b/src/rule.ts index 6f30da6..1e4d19e 100644 --- a/src/rule.ts +++ b/src/rule.ts @@ -51,13 +51,16 @@ export abstract class Rule { private readonly _contentNameIsCapturing: boolean; private readonly _contentName: string | null; - constructor($location: ILocation | undefined, id: RuleId, name: string | null | undefined, contentName: string | null | undefined) { + private readonly _comment: string | null; + + constructor($location: ILocation | undefined, id: RuleId, name: string | null | undefined, contentName: string | null | undefined, comment: string | null | undefined) { this.$location = $location; this.id = id; this._name = name || null; this._nameIsCapturing = RegexSource.hasCaptures(this._name); this._contentName = contentName || null; this._contentNameIsCapturing = RegexSource.hasCaptures(this._contentName); + this._comment = comment || null; } public abstract dispose(): void; @@ -81,6 +84,10 @@ export abstract class Rule { return RegexSource.replaceCaptures(this._contentName, lineText, captureIndices); } + public getComment(): string | null { + return this._comment; + } + public abstract collectPatterns(grammar: IRuleRegistry, out: RegExpSourceList): void; public abstract compile(grammar: IRuleRegistry & IOnigLib, endRegexSource: string | null): CompiledRule; @@ -97,8 +104,8 @@ export class CaptureRule extends Rule { public readonly retokenizeCapturedWithRuleId: RuleId | 0; - constructor($location: ILocation | undefined, id: RuleId, name: string | null | undefined, contentName: string | null | undefined, retokenizeCapturedWithRuleId: RuleId | 0) { - super($location, id, name, contentName); + constructor($location: ILocation | undefined, id: RuleId, name: string | null | undefined, contentName: string | null | undefined, comment: string | null | undefined, retokenizeCapturedWithRuleId: RuleId | 0) { + super($location, id, name, contentName, comment); this.retokenizeCapturedWithRuleId = retokenizeCapturedWithRuleId; } @@ -124,8 +131,8 @@ export class MatchRule extends Rule { public readonly captures: (CaptureRule | null)[]; private _cachedCompiledPatterns: RegExpSourceList | null; - constructor($location: ILocation | undefined, id: RuleId, name: string | undefined, match: string, captures: (CaptureRule | null)[]) { - super($location, id, name, null); + constructor($location: ILocation | undefined, id: RuleId, name: string | undefined, comment: string | undefined, match: string, captures: (CaptureRule | null)[]) { + super($location, id, name, null, comment); this._match = new RegExpSource(match, this.id); this.captures = captures; this._cachedCompiledPatterns = null; @@ -168,8 +175,8 @@ export class IncludeOnlyRule extends Rule { public readonly patterns: RuleId[]; private _cachedCompiledPatterns: RegExpSourceList | null; - constructor($location: ILocation | undefined, id: RuleId, name: string | null | undefined, contentName: string | null | undefined, patterns: ICompilePatternsResult) { - super($location, id, name, contentName); + constructor($location: ILocation | undefined, id: RuleId, name: string | null | undefined, contentName: string | null | undefined, comment: string | null | undefined, patterns: ICompilePatternsResult) { + super($location, id, name, contentName, comment); this.patterns = patterns.patterns; this.hasMissingPatterns = patterns.hasMissingPatterns; this._cachedCompiledPatterns = null; @@ -217,8 +224,8 @@ export class BeginEndRule extends Rule { public readonly patterns: RuleId[]; private _cachedCompiledPatterns: RegExpSourceList | null; - constructor($location: ILocation | undefined, id: RuleId, name: string | null | undefined, contentName: string | null | undefined, begin: string, beginCaptures: (CaptureRule | null)[], end: string | undefined, endCaptures: (CaptureRule | null)[], applyEndPatternLast: boolean | undefined, patterns: ICompilePatternsResult) { - super($location, id, name, contentName); + constructor($location: ILocation | undefined, id: RuleId, name: string | null | undefined, contentName: string | null | undefined, comment: string | null | undefined, begin: string, beginCaptures: (CaptureRule | null)[], end: string | undefined, endCaptures: (CaptureRule | null)[], applyEndPatternLast: boolean | undefined, patterns: ICompilePatternsResult) { + super($location, id, name, contentName, comment); this._begin = new RegExpSource(begin, this.id); this.beginCaptures = beginCaptures; this._end = new RegExpSource(end ? end : '\uFFFF', -1); @@ -298,8 +305,8 @@ export class BeginWhileRule extends Rule { private _cachedCompiledPatterns: RegExpSourceList | null; private _cachedCompiledWhilePatterns: RegExpSourceList | null; - constructor($location: ILocation | undefined, id: RuleId, name: string | null | undefined, contentName: string | null | undefined, begin: string, beginCaptures: (CaptureRule | null)[], _while: string, whileCaptures: (CaptureRule | null)[], patterns: ICompilePatternsResult) { - super($location, id, name, contentName); + constructor($location: ILocation | undefined, id: RuleId, name: string | null | undefined, contentName: string | null | undefined, comment: string | null | undefined, begin: string, beginCaptures: (CaptureRule | null)[], _while: string, whileCaptures: (CaptureRule | null)[], patterns: ICompilePatternsResult) { + super($location, id, name, contentName, comment); this._begin = new RegExpSource(begin, this.id); this.beginCaptures = beginCaptures; this.whileCaptures = whileCaptures; @@ -380,9 +387,9 @@ export class BeginWhileRule extends Rule { export class RuleFactory { - public static createCaptureRule(helper: IRuleFactoryHelper, $location: ILocation | undefined, name: string | null | undefined, contentName: string | null | undefined, retokenizeCapturedWithRuleId: RuleId | 0): CaptureRule { + public static createCaptureRule(helper: IRuleFactoryHelper, $location: ILocation | undefined, name: string | null | undefined, contentName: string | null | undefined, comment: string | null | undefined, retokenizeCapturedWithRuleId: RuleId | 0): CaptureRule { return helper.registerRule((id) => { - return new CaptureRule($location, id, name, contentName, retokenizeCapturedWithRuleId); + return new CaptureRule($location, id, name, contentName, comment, retokenizeCapturedWithRuleId); }); } @@ -396,6 +403,7 @@ export class RuleFactory { desc.$vscodeTextmateLocation, desc.id, desc.name, + desc.comment, desc.match, RuleFactory._compileCaptures(desc.captures, helper, repository) ); @@ -414,6 +422,7 @@ export class RuleFactory { desc.id, desc.name, desc.contentName, + desc.comment, RuleFactory._compilePatterns(patterns, helper, repository) ); } @@ -424,6 +433,7 @@ export class RuleFactory { desc.id, desc.name, desc.contentName, + desc.comment, desc.begin, RuleFactory._compileCaptures(desc.beginCaptures || desc.captures, helper, repository), desc.while, RuleFactory._compileCaptures(desc.whileCaptures || desc.captures, helper, repository), RuleFactory._compilePatterns(desc.patterns, helper, repository) @@ -435,6 +445,7 @@ export class RuleFactory { desc.id, desc.name, desc.contentName, + desc.comment, desc.begin, RuleFactory._compileCaptures(desc.beginCaptures || desc.captures, helper, repository), desc.end, RuleFactory._compileCaptures(desc.endCaptures || desc.captures, helper, repository), desc.applyEndPatternLast, @@ -477,7 +488,7 @@ export class RuleFactory { if (captures[captureId].patterns) { retokenizeCapturedWithRuleId = RuleFactory.getCompiledRuleId(captures[captureId], helper, repository); } - r[numericCaptureId] = RuleFactory.createCaptureRule(helper, captures[captureId].$vscodeTextmateLocation, captures[captureId].name, captures[captureId].contentName, retokenizeCapturedWithRuleId); + r[numericCaptureId] = RuleFactory.createCaptureRule(helper, captures[captureId].$vscodeTextmateLocation, captures[captureId].name, captures[captureId].contentName, captures[captureId].comment, retokenizeCapturedWithRuleId); } } diff --git a/src/theme.ts b/src/theme.ts index 2b22a99..46ac327 100644 --- a/src/theme.ts +++ b/src/theme.ts @@ -105,9 +105,11 @@ export type ScopePattern = string; } export class ScopeStack { - static push(path: ScopeStack | null, scopeNames: ScopeName[]): ScopeStack | null { - for (const name of scopeNames) { - path = new ScopeStack(path, name); + static push(path: ScopeStack | null, scopeNames: ScopeName[], scopeComments?: (string | null)[]): ScopeStack | null { + for (let i = 0; i < scopeNames.length; i++) { + const name = scopeNames[i]; + const comment = scopeComments && scopeComments[i] !== undefined ? scopeComments[i] : null; + path = new ScopeStack(path, name, comment); } return path; } @@ -117,18 +119,19 @@ export class ScopeStack { public static from(...segments: ScopeName[]): ScopeStack | null { let result: ScopeStack | null = null; for (let i = 0; i < segments.length; i++) { - result = new ScopeStack(result, segments[i]); + result = new ScopeStack(result, segments[i], null); } return result; } constructor( public readonly parent: ScopeStack | null, - public readonly scopeName: ScopeName + public readonly scopeName: ScopeName, + public readonly scopeComment: string | null ) {} - public push(scopeName: ScopeName): ScopeStack { - return new ScopeStack(this, scopeName); + public push(scopeName: ScopeName, scopeComment?: string | null): ScopeStack { + return new ScopeStack(this, scopeName, scopeComment || null); } public getSegments(): ScopeName[] { @@ -142,6 +145,17 @@ export class ScopeStack { return result; } + public getComments(): (string | null)[] { + let item: ScopeStack | null = this; + const result: (string | null)[] = []; + while (item) { + result.push(item.scopeComment); + item = item.parent; + } + result.reverse(); + return result; + } + public toString() { return this.getSegments().join(' '); } From 4ff5cea236cadbabfb016dee5f1ab42aa37cd92f Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 8 Dec 2025 11:34:24 +0000 Subject: [PATCH 3/4] Add tests for scope comments functionality Co-authored-by: alexr00 <38270282+alexr00@users.noreply.github.com> --- src/tests/all.test.ts | 1 + src/tests/scopeComments.test.ts | 64 +++++++++++++++++++++ test-cases/scope-comments/test.grammar.json | 19 ++++++ 3 files changed, 84 insertions(+) create mode 100644 src/tests/scopeComments.test.ts create mode 100644 test-cases/scope-comments/test.grammar.json diff --git a/src/tests/all.test.ts b/src/tests/all.test.ts index 97bad18..c11931d 100644 --- a/src/tests/all.test.ts +++ b/src/tests/all.test.ts @@ -5,5 +5,6 @@ import './grammar.test'; import './json.test'; import './matcher.test'; +import './scopeComments.test'; import './themes.test'; import './tokenization.test'; diff --git a/src/tests/scopeComments.test.ts b/src/tests/scopeComments.test.ts new file mode 100644 index 0000000..154a34e --- /dev/null +++ b/src/tests/scopeComments.test.ts @@ -0,0 +1,64 @@ +/*--------------------------------------------------------- + * Copyright (C) Microsoft Corporation. All rights reserved. + *--------------------------------------------------------*/ + +import * as path from 'path'; +import * as assert from 'assert'; +import { Registry, IGrammar, RegistryOptions, parseRawGrammar } from '../main'; +import { getOniguruma } from './onigLibs'; +import * as fs from 'fs'; + +const REPO_ROOT = path.join(__dirname, '../../'); + +suite('Scope Comments', () => { + test('should expose scope comments in tokens', async () => { + const grammarPath = path.join(REPO_ROOT, 'test-cases/scope-comments/test.grammar.json'); + const grammarContent = fs.readFileSync(grammarPath).toString(); + const rawGrammar = parseRawGrammar(grammarContent, grammarPath); + + const options: RegistryOptions = { + onigLib: getOniguruma(), + loadGrammar: () => Promise.resolve(rawGrammar) + }; + + const registry = new Registry(options); + const grammar: IGrammar | null = await registry.loadGrammar('source.test'); + + assert.ok(grammar, 'Grammar should be loaded'); + + // Test line with comment + const result1 = grammar.tokenizeLine('hello world test', null); + + // Filter out whitespace tokens for easier testing + const nonWhitespaceTokens = result1.tokens.filter(t => { + const text = 'hello world test'.substring(t.startIndex, t.endIndex); + return text.trim().length > 0; + }); + + assert.strictEqual(nonWhitespaceTokens.length, 3, 'Should have 3 non-whitespace tokens'); + + // First token: "hello" - should have comment + assert.strictEqual(nonWhitespaceTokens[0].scopes.length, 2); + assert.strictEqual(nonWhitespaceTokens[0].scopes[0], 'source.test'); + assert.strictEqual(nonWhitespaceTokens[0].scopes[1], 'keyword.test'); + assert.strictEqual(nonWhitespaceTokens[0].scopeComments.length, 2); + assert.strictEqual(nonWhitespaceTokens[0].scopeComments[0], null); // root scope has no comment + assert.strictEqual(nonWhitespaceTokens[0].scopeComments[1], 'Matches the hello keyword'); + + // Second token: "world" - should have comment + assert.strictEqual(nonWhitespaceTokens[1].scopes.length, 2); + assert.strictEqual(nonWhitespaceTokens[1].scopes[0], 'source.test'); + assert.strictEqual(nonWhitespaceTokens[1].scopes[1], 'string.test'); + assert.strictEqual(nonWhitespaceTokens[1].scopeComments.length, 2); + assert.strictEqual(nonWhitespaceTokens[1].scopeComments[0], null); // root scope has no comment + assert.strictEqual(nonWhitespaceTokens[1].scopeComments[1], 'Matches the world keyword'); + + // Third token: "test" - should have no comment (null) + assert.strictEqual(nonWhitespaceTokens[2].scopes.length, 2); + assert.strictEqual(nonWhitespaceTokens[2].scopes[0], 'source.test'); + assert.strictEqual(nonWhitespaceTokens[2].scopes[1], 'variable.test'); + assert.strictEqual(nonWhitespaceTokens[2].scopeComments.length, 2); + assert.strictEqual(nonWhitespaceTokens[2].scopeComments[0], null); // root scope has no comment + assert.strictEqual(nonWhitespaceTokens[2].scopeComments[1], null); // this scope has no comment + }); +}); diff --git a/test-cases/scope-comments/test.grammar.json b/test-cases/scope-comments/test.grammar.json new file mode 100644 index 0000000..706efea --- /dev/null +++ b/test-cases/scope-comments/test.grammar.json @@ -0,0 +1,19 @@ +{ + "scopeName": "source.test", + "patterns": [ + { + "match": "\\b(hello)\\b", + "name": "keyword.test", + "comment": "Matches the hello keyword" + }, + { + "match": "\\b(world)\\b", + "name": "string.test", + "comment": "Matches the world keyword" + }, + { + "match": "\\b(test)\\b", + "name": "variable.test" + } + ] +} From 7ffbe964b77df71d60cabbd10b5d6e971e83b51a Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 8 Dec 2025 11:36:05 +0000 Subject: [PATCH 4/4] Fix array bounds check in ScopeStack.push --- src/theme.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/theme.ts b/src/theme.ts index 46ac327..a58632e 100644 --- a/src/theme.ts +++ b/src/theme.ts @@ -108,7 +108,7 @@ export class ScopeStack { static push(path: ScopeStack | null, scopeNames: ScopeName[], scopeComments?: (string | null)[]): ScopeStack | null { for (let i = 0; i < scopeNames.length; i++) { const name = scopeNames[i]; - const comment = scopeComments && scopeComments[i] !== undefined ? scopeComments[i] : null; + const comment = scopeComments && i < scopeComments.length ? scopeComments[i] : null; path = new ScopeStack(path, name, comment); } return path;