From c3ad4d3127bfa273012f77ac39ea9ca11aac85dd Mon Sep 17 00:00:00 2001
From: Dennise Bartlett <bartlett.dc.1@gmail.com>
Date: Wed, 24 Dec 2025 09:28:12 -0800
Subject: [PATCH 1/3] feat: Add evally test runner and implementation for
 vscode-evals

---
 .gitignore                                    |   4 +-
 apps/vscode-evals/.env.local.sample           |   1 +
 apps/vscode-evals/.vscode-test.mjs            |  16 +
 apps/vscode-evals/eslint.config.mjs           |   4 +
 apps/vscode-evals/package.json                |  26 +
 apps/vscode-evals/src/runTest.ts              |  33 +
 .../src/suite/applyDiff.matrix.test.ts        | 984 ++++++++++++++++++
 apps/vscode-evals/src/suite/index.ts          |  52 +
 apps/vscode-evals/src/suite/utils.ts          |  40 +
 apps/vscode-evals/tsconfig.esm.json           |   8 +
 apps/vscode-evals/tsconfig.json               |  19 +
 knip.json                                     |   3 +-
 packages/evally/examples/sampleMatrix.test.ts |  49 +
 packages/evally/package.json                  |  28 +
 packages/evally/src/cli/standaloneRunner.ts   |  66 ++
 packages/evally/src/index.ts                  |   2 +
 .../evally/src/runner/TestMatrixRunner.ts     | 408 ++++++++
 packages/evally/src/runner/types.ts           |  42 +
 packages/evally/tsconfig.json                 |  19 +
 pnpm-lock.yaml                                | 264 ++++-
 20 files changed, 2051 insertions(+), 17 deletions(-)
 create mode 100644 apps/vscode-evals/.env.local.sample
 create mode 100644 apps/vscode-evals/.vscode-test.mjs
 create mode 100644 apps/vscode-evals/eslint.config.mjs
 create mode 100644 apps/vscode-evals/package.json
 create mode 100644 apps/vscode-evals/src/runTest.ts
 create mode 100644 apps/vscode-evals/src/suite/applyDiff.matrix.test.ts
 create mode 100644 apps/vscode-evals/src/suite/index.ts
 create mode 100644 apps/vscode-evals/src/suite/utils.ts
 create mode 100644 apps/vscode-evals/tsconfig.esm.json
 create mode 100644 apps/vscode-evals/tsconfig.json
 create mode 100644 packages/evally/examples/sampleMatrix.test.ts
 create mode 100644 packages/evally/package.json
 create mode 100644 packages/evally/src/cli/standaloneRunner.ts
 create mode 100644 packages/evally/src/index.ts
 create mode 100644 packages/evally/src/runner/TestMatrixRunner.ts
 create mode 100644 packages/evally/src/runner/types.ts
 create mode 100644 packages/evally/tsconfig.json

diff --git a/.gitignore b/.gitignore
index 54cf66cee7a..74d3c4384e9 100644
--- a/.gitignore
+++ b/.gitignore
@@ -51,4 +51,6 @@ logs
 qdrant_storage/
 
 # Architect plans
-plans/
\ No newline at end of file
+plans/
+
+.results
\ No newline at end of file
diff --git a/apps/vscode-evals/.env.local.sample b/apps/vscode-evals/.env.local.sample
new file mode 100644
index 00000000000..40c9da1bb44
--- /dev/null
+++ b/apps/vscode-evals/.env.local.sample
@@ -0,0 +1 @@
+OPENROUTER_API_KEY=sk-or-v1-...
diff --git a/apps/vscode-evals/.vscode-test.mjs b/apps/vscode-evals/.vscode-test.mjs
new file mode 100644
index 00000000000..c83f12c4bb6
--- /dev/null
+++ b/apps/vscode-evals/.vscode-test.mjs
@@ -0,0 +1,16 @@
+/**
+ * See: https://code.visualstudio.com/api/working-with-extensions/testing-extension
+ */
+
+import { defineConfig } from "@vscode/test-cli"
+
+export default defineConfig({
+	label: "integrationTest",
+	files: "out/suite/**/*.test.js",
+	workspaceFolder: ".",
+	mocha: {
+		ui: "tdd",
+		timeout: 60000,
+	},
+	launchArgs: ["--enable-proposed-api=RooVeterinaryInc.roo-cline", "--disable-extensions"],
+})
diff --git a/apps/vscode-evals/eslint.config.mjs b/apps/vscode-evals/eslint.config.mjs
new file mode 100644
index 00000000000..694bf736642
--- /dev/null
+++ b/apps/vscode-evals/eslint.config.mjs
@@ -0,0 +1,4 @@
+import { config } from "@roo-code/config-eslint/base"
+
+/** @type {import("eslint").Linter.Config} */
+export default [...config]
diff --git a/apps/vscode-evals/package.json b/apps/vscode-evals/package.json
new file mode 100644
index 00000000000..0f788d5fe4e
--- /dev/null
+++ b/apps/vscode-evals/package.json
@@ -0,0 +1,26 @@
+{
+	"name": "@roo-code/vscode-evals",
+	"private": true,
+	"scripts": {
+		"lint": "eslint src --ext=ts --max-warnings=0",
+		"check-types": "tsc -p tsconfig.esm.json --noEmit",
+		"format": "prettier --write src",
+		"test:ci": "pnpm -w bundle && pnpm --filter @roo-code/vscode-webview build && pnpm test:run",
+		"test:run": "rimraf out && tsc -p tsconfig.json && npx dotenvx run -f .env.local -- node ./out/runTest.js",
+		"clean": "rimraf out .turbo"
+	},
+	"devDependencies": {
+		"@roo-code/evally": "workspace:^",
+		"@roo-code/config-eslint": "workspace:^",
+		"@roo-code/config-typescript": "workspace:^",
+		"@roo-code/types": "workspace:^",
+		"@types/vscode": "^1.95.0",
+		"@vscode/test-cli": "^0.0.11",
+		"@vscode/test-electron": "^2.4.0",
+		"rimraf": "^6.0.1",
+		"typescript": "5.8.3"
+	},
+	"dependencies": {
+		"vscode": "^1.1.37"
+	}
+}
diff --git a/apps/vscode-evals/src/runTest.ts b/apps/vscode-evals/src/runTest.ts
new file mode 100644
index 00000000000..82394f24abe
--- /dev/null
+++ b/apps/vscode-evals/src/runTest.ts
@@ -0,0 +1,33 @@
+import * as path from "path"
+import * as os from "os"
+import * as fs from "fs/promises"
+
+import { runTests } from "@vscode/test-electron"
+
+async function main() {
+	try {
+		const extensionDevelopmentPath = path.resolve(__dirname, "../../../src")
+		const extensionTestsPath = path.resolve(__dirname, "./suite/index")
+		const testWorkspace = await fs.mkdtemp(path.join(os.tmpdir(), "roo-evals-workspace-"))
+		const testGrep = process.argv.find((arg, i) => process.argv[i - 1] === "--grep") || process.env.TEST_GREP
+		const testFile = process.argv.find((arg, i) => process.argv[i - 1] === "--file") || process.env.TEST_FILE
+		const extensionTestsEnv = {
+			...process.env,
+			...(testGrep && { TEST_GREP: testGrep }),
+			...(testFile && { TEST_FILE: testFile }),
+		}
+		await runTests({
+			extensionDevelopmentPath,
+			extensionTestsPath,
+			launchArgs: [testWorkspace],
+			extensionTestsEnv,
+			version: process.env.VSCODE_VERSION || "1.101.2",
+		})
+		await fs.rm(testWorkspace, { recursive: true, force: true })
+	} catch (error) {
+		console.error("Failed to run vscode evals", error)
+		process.exit(1)
+	}
+}
+
+main()
diff --git a/apps/vscode-evals/src/suite/applyDiff.matrix.test.ts b/apps/vscode-evals/src/suite/applyDiff.matrix.test.ts
new file mode 100644
index 00000000000..810f6ade8d1
--- /dev/null
+++ b/apps/vscode-evals/src/suite/applyDiff.matrix.test.ts
@@ -0,0 +1,984 @@
+import { afterAll, afterEach, beforeAll, beforeEach, defineMatrix, describe, it } from "@roo-code/evally"
+import { RooCodeEventName, type RooCodeAPI, type ClineMessage } from "@roo-code/types"
+import { strict as assert } from "assert"
+import { waitFor, sleep } from "./utils"
+
+import * as fs from "fs/promises"
+import * as path from "path"
+interface NativeProtocolVerification {
+	hasNativeApiProtocol: boolean
+	apiProtocol: string | null
+	responseIsNotXML: boolean
+	toolWasExecuted: boolean
+	executedToolName: string | null
+}
+
+function createVerificationState(): NativeProtocolVerification {
+	return {
+		hasNativeApiProtocol: false,
+		apiProtocol: null,
+		responseIsNotXML: true,
+		toolWasExecuted: false,
+		executedToolName: null,
+	}
+}
+
+function assertNativeProtocolUsed(verification: NativeProtocolVerification, testName: string): void {
+	assert.ok(
+		verification.apiProtocol !== null,
+		`[${testName}] apiProtocol should be set in api_req_started message. This indicates an API request was made.`,
+	)
+
+	assert.strictEqual(
+		verification.hasNativeApiProtocol,
+		true,
+		`[${testName}] Native API protocol should be used. Expected apiProtocol to be "anthropic" or "openai", but got: ${verification.apiProtocol}`,
+	)
+
+	assert.strictEqual(
+		verification.responseIsNotXML,
+		true,
+		`[${testName}] Response should NOT contain XML tool tags. Found XML tags which indicates XML protocol was used instead of native.`,
+	)
+
+	assert.strictEqual(
+		verification.toolWasExecuted,
+		true,
+		`[${testName}] Tool should have been executed. Executed tool: ${verification.executedToolName || "none"}`,
+	)
+}
+
+function createNativeVerificationHandler(
+	verification: NativeProtocolVerification,
+	messages: ClineMessage[],
+	options: {
+		onError?: (error: string) => void
+		onApplyDiffExecuted?: () => void
+		debugLogging?: boolean
+	} = {},
+): (event: { message: ClineMessage }) => void {
+	const { onError, onApplyDiffExecuted, debugLogging = true } = options
+
+	return ({ message }: { message: ClineMessage }) => {
+		messages.push(message)
+
+		if (debugLogging) {
+			console.log(`[DEBUG] Message: type=${message.type}, say=${message.say}, ask=${message.ask}`)
+		}
+
+		if (message.type === "say" && message.say === "error") {
+			const errorText = (message.text as string | undefined) || "Unknown error"
+			console.error("[ERROR]:", errorText)
+			onError?.(errorText)
+		}
+
+		if (message.type === "ask" && message.ask === "tool") {
+			if (debugLogging && typeof message.text === "string") {
+				console.log("[DEBUG] Tool callback:", message.text.substring(0, 300))
+			}
+
+			try {
+				const toolData = JSON.parse((message.text as string) || "{}")
+
+				if (toolData.tool) {
+					verification.toolWasExecuted = true
+					verification.executedToolName = toolData.tool
+					console.log(`[VERIFIED] Tool executed: ${toolData.tool}`)
+				}
+
+				if (toolData.tool === "appliedDiff" || toolData.tool === "apply_diff") {
+					console.log("[TOOL] apply_diff tool executed")
+					onApplyDiffExecuted?.()
+				}
+			} catch {
+				if (debugLogging && typeof message.text === "string") {
+					console.log("[DEBUG] Tool callback not JSON:", message.text.substring(0, 100))
+				}
+			}
+		}
+
+		if (message.type === "say" && message.say === "api_req_started" && typeof message.text === "string") {
+			const rawText = message.text
+			if (debugLogging) {
+				console.log("[DEBUG] API request started:", rawText.substring(0, 200))
+			}
+
+			if (rawText.includes("apply_diff") || rawText.includes("appliedDiff")) {
+				verification.toolWasExecuted = true
+				verification.executedToolName = verification.executedToolName || "apply_diff"
+				console.log("[VERIFIED] Tool executed via raw text check: apply_diff")
+				onApplyDiffExecuted?.()
+			}
+
+			try {
+				const requestData = JSON.parse(rawText)
+
+				if (requestData.apiProtocol) {
+					verification.apiProtocol = requestData.apiProtocol
+					if (requestData.apiProtocol === "anthropic" || requestData.apiProtocol === "openai") {
+						verification.hasNativeApiProtocol = true
+						console.log(`[VERIFIED] API Protocol: ${requestData.apiProtocol}`)
+					}
+				}
+
+				if (
+					requestData.request &&
+					(requestData.request.includes("apply_diff") || requestData.request.includes("appliedDiff"))
+				) {
+					verification.toolWasExecuted = true
+					verification.executedToolName = "apply_diff"
+					console.log("[VERIFIED] Tool executed via parsed request: apply_diff")
+					onApplyDiffExecuted?.()
+				}
+			} catch (e) {
+				console.log("[DEBUG] Failed to parse api_req_started message:", e)
+			}
+		}
+
+		if (message.type === "say" && message.say === "text" && typeof message.text === "string") {
+			const hasXMLToolTags =
+				message.text.includes("<apply_diff>") ||
+				message.text.includes("</apply_diff>") ||
+				message.text.includes("<write_to_file>") ||
+				message.text.includes("</write_to_file>")
+
+			if (hasXMLToolTags) {
+				verification.responseIsNotXML = false
+				console.log("[WARNING] Found XML tool tags in response - this indicates XML protocol")
+			}
+		}
+
+		if (message.type === "say" && message.say === "completion_result" && typeof message.text === "string") {
+			if (debugLogging) {
+				console.log("[DEBUG] AI completion:", message.text.substring(0, 200))
+			}
+		}
+	}
+}
+
+const evalGroupVariables: { openRouterModelId: string }[] = [{ openRouterModelId: "openai/gpt-5.1" }]
+
+const now = Date.now()
+const testFiles: Record<
+	string,
+	{
+		path: string
+		name: string
+		content: string
+	}
+> = {
+	simpleModify: {
+		name: `test-file-simple-native-${now}.txt`,
+		content: "Hello World\nThis is a test file\nWith multiple lines",
+		path: ``,
+	},
+	multipleReplace: {
+		name: `test-func-multiple-native-${now}.js`,
+		content: `function calculate(x, y) {\n  const sum = x + y\n  const product = x * y\n  return { sum: sum, product: product }\n}`,
+		path: ``,
+	},
+	lineNumbers: {
+		name: `test-lines-native-${now}.js`,
+		content: `// Header comment\nfunction oldFunction() {\n  console.log("Old implementation")\n}\n\n// Another function\nfunction keepThis() {\n  console.log("Keep this")\n}\n\n// Footer comment`,
+		path: ``,
+	},
+	errorHandling: {
+		name: `test-error-native-${now}.txt`,
+		content: "Original content",
+		path: ``,
+	},
+	multiSearchReplace: {
+		name: `test-multi-search-native-${now}.js`,
+		content: `function processData(data) {\n  console.log("Processing data")\n  return data.map(item => item * 2)\n}\n\n// Some other code in between\nconst config = {\n  timeout: 5000,\n  retries: 3\n}\n\nfunction validateInput(input) {\n  console.log("Validating input")\n  if (!input) {\n    throw new Error("Invalid input")\n  }\n  return true\n}`,
+		path: ``,
+	},
+}
+
+function getTestWorkspaceDir(): string {
+	const fromGlobal = (globalThis as { rooTestWorkspaceDir?: string }).rooTestWorkspaceDir
+	if (typeof fromGlobal === "string" && fromGlobal.length > 0) {
+		return fromGlobal
+	}
+	return process.cwd()
+}
+
+async function createTestFile(file: { name: string; content: string }): Promise<string> {
+	const tmpPath = path.join(getTestWorkspaceDir(), file.name)
+	await fs.writeFile(tmpPath, file.content)
+	return tmpPath
+}
+async function resetTestFile(file: { name: string; content: string }): Promise<string> {
+	const tmpPath = path.join(getTestWorkspaceDir(), file.name)
+	await fs.writeFile(tmpPath, file.content)
+	return tmpPath
+}
+async function removeTestFile(file: { name: string }): Promise<void> {
+	const tmpPath = path.join(getTestWorkspaceDir(), file.name)
+	try {
+		await fs.unlink(tmpPath)
+	} catch {
+		void 0
+	}
+}
+
+export default defineMatrix({
+	variables: evalGroupVariables,
+	iterations: 10,
+	tests: function () {
+		describe("Apply_diff Tool (Native Tool Calling)", function () {
+			let workspaceDir: string
+			beforeAll(async () => {
+				console.log("beforeAll Executed")
+				workspaceDir = getTestWorkspaceDir()
+				console.log("[INFO] Using workspace directory:", workspaceDir)
+
+				console.log("Creating test files in workspace...")
+				for (const [key, file] of Object.entries(testFiles)) {
+					file.path = path.join(workspaceDir, file.name)
+					await fs.writeFile(file.path, file.content)
+					console.log(`Created ${key} test file at:`, file.path)
+				}
+
+				for (const [key, file] of Object.entries(testFiles)) {
+					const exists = await fs
+						.access(file.path)
+						.then(() => true)
+						.catch(() => false)
+					if (!exists) {
+						throw new Error(`Failed to create ${key} test file at ${file.path}`)
+					}
+				}
+			})
+
+			beforeEach(async () => {
+				console.log("beforeEach Executed - resetting test files to original content")
+				await resetAllTestFiles()
+			})
+			afterEach(async () => {
+				console.log("afterEach Executed")
+			})
+			afterAll(async () => {
+				console.log("afterAll Executed")
+			})
+
+			it("should apply diff to modify file content and events (extension harness integrated)", async function ({
+				variable,
+			}) {
+				const api: RooCodeAPI | undefined = (globalThis as { api?: RooCodeAPI }).api as RooCodeAPI | undefined
+
+				if (!api) {
+					console.warn(
+						"[applyDiff.matrix] globalThis.api is not set; not running inside VSCode extension host. Skipping test.",
+					)
+					return
+				}
+
+				const file = testFiles.simpleModify
+				if (!file) throw new Error("Missing test file definition")
+				const expectedContent = "Hello Universe\nThis is a test file\nWith multiple lines"
+				await createTestFile(file)
+
+				const messages: ClineMessage[] = []
+				let taskStarted = false
+				let taskCompleted = false
+				let errorOccurred: string | null = null
+				let applyDiffExecuted = false
+
+				const verification = createVerificationState()
+
+				let taskId: string = ""
+				const messageHandler = createNativeVerificationHandler(verification, messages, {
+					onError: (error) => {
+						errorOccurred = error
+					},
+					onApplyDiffExecuted: () => {
+						applyDiffExecuted = true
+					},
+					debugLogging: true,
+				})
+				const taskStartedHandler = (id: string) => {
+					if (id === taskId) taskStarted = true
+				}
+				const taskCompletedHandler = (id: string) => {
+					if (id === taskId) taskCompleted = true
+				}
+				api.on(RooCodeEventName.Message, messageHandler)
+				api.on(RooCodeEventName.TaskStarted, taskStartedHandler)
+				api.on(RooCodeEventName.TaskCompleted, taskCompletedHandler)
+
+				let verboseLog = ""
+				function logMsg(msg: string) {
+					verboseLog += msg + "\n"
+				}
+				try {
+					console.log(variable.openRouterModelId)
+					taskId = await api.startNewTask({
+						configuration: {
+							mode: "code",
+							autoApprovalEnabled: true,
+							alwaysAllowWrite: true,
+							alwaysAllowReadOnly: true,
+							alwaysAllowReadOnlyOutsideWorkspace: true,
+							toolProtocol: "native",
+							apiProvider: "openrouter",
+							openRouterModelId: variable.openRouterModelId,
+						},
+						text: `Use apply_diff on the file ${file.name} to change "Hello World" to "Hello Universe". The file already exists with this content:\n${file.content}\nAssume the file exists and you can modify it directly.`,
+					})
+					await waitFor(() => taskStarted, { timeout: 60000 })
+					if (errorOccurred) {
+						logMsg("Task failed early with error: " + errorOccurred)
+						throw createVerboseError("Early error: " + errorOccurred, verboseLog, messages)
+					}
+					await waitFor(() => taskCompleted, { timeout: 60000 })
+					await sleep(2000)
+					const actualContent = await fs.readFile(
+						file.path || path.join(getTestWorkspaceDir(), file.name),
+						"utf-8",
+					)
+					try {
+						assert.strictEqual(
+							actualContent.trim(),
+							expectedContent.trim(),
+							"File was not modified by extension and diff!",
+						)
+					} catch (e) {
+						logMsg("File content did not match expected output.")
+						logMsg("Expected:\n" + expectedContent)
+						logMsg("Actual:\n" + actualContent)
+						throw createVerboseError(e instanceof Error ? e.message : String(e), verboseLog, messages)
+					}
+
+					assertNativeProtocolUsed(verification, "simpleModify")
+
+					if (!applyDiffExecuted) {
+						logMsg("apply_diff tool was not executed!")
+						throw createVerboseError("apply_diff tool was not executed!", verboseLog, messages)
+					}
+				} catch (err) {
+					if (verboseLog || messages.length > 0) {
+						const lines = [
+							"",
+							"========== DEBUG LOG ==========",
+							verboseLog.trim(),
+							"---------- Message History ----------",
+							...messages.map((m) => JSON.stringify(m)),
+							"=====================================",
+						]
+						console.error(lines.filter(Boolean).join("\n"))
+					}
+					throw err
+				} finally {
+					api.off(RooCodeEventName.Message, messageHandler)
+					api.off(RooCodeEventName.TaskStarted, taskStartedHandler)
+					api.off(RooCodeEventName.TaskCompleted, taskCompletedHandler)
+					await removeTestFile(file)
+				}
+
+				function createVerboseError(text: string, log: string, msgArr: ClineMessage[]): Error {
+					let summary = `\n\n========== DEBUG LOG ==========`
+					if (log) summary += `\n${log.trim()}`
+					summary +=
+						`\n---------- Message History ----------\n` + msgArr.map((m) => JSON.stringify(m)).join("\n")
+					summary += `\n=====================================`
+					return new Error(text + summary)
+				}
+			})
+			it("Should apply multiple search/replace blocks in single diff using native tool calling", async function ({
+				variable,
+			}) {
+				const api = (globalThis as { api?: RooCodeAPI }).api as RooCodeAPI
+				const messages: ClineMessage[] = []
+				const testFile = testFiles.multipleReplace
+				if (!testFile) {
+					throw new Error("Missing test file definition: multipleReplace")
+				}
+				const expectedContent =
+					"function compute(a, b) {\n" +
+					"  const total = a + b\n" +
+					"  const result = a * b\n" +
+					"  return { total: total, result: result }\n" +
+					"}"
+				let taskStarted = false
+				let taskCompleted = false
+				let applyDiffExecuted = false
+
+				const verification = createVerificationState()
+
+				const messageHandler = createNativeVerificationHandler(verification, messages, {
+					onApplyDiffExecuted: () => {
+						applyDiffExecuted = true
+					},
+					debugLogging: true,
+				})
+				api.on(RooCodeEventName.Message, messageHandler)
+
+				const taskStartedHandler = (id: string) => {
+					if (id === taskId) {
+						taskStarted = true
+						console.log("Task started:", id)
+					}
+				}
+				api.on(RooCodeEventName.TaskStarted, taskStartedHandler)
+
+				const taskCompletedHandler = (id: string) => {
+					if (id === taskId) {
+						taskCompleted = true
+						console.log("Task completed:", id)
+					}
+				}
+				api.on(RooCodeEventName.TaskCompleted, taskCompletedHandler)
+
+				let taskId: string
+				try {
+					taskId = await api.startNewTask({
+						configuration: {
+							mode: "code",
+							autoApprovalEnabled: true,
+							alwaysAllowWrite: true,
+							alwaysAllowReadOnly: true,
+							alwaysAllowReadOnlyOutsideWorkspace: true,
+							toolProtocol: "native",
+							apiProvider: "openrouter",
+							openRouterModelId: variable.openRouterModelId,
+						},
+						text: `Use apply_diff on the file ${testFile.name} to make ALL of these changes:
+1. Rename function "calculate" to "compute"
+2. Rename parameters "x, y" to "a, b"
+3. Rename variable "sum" to "total" (including in the return statement)
+4. Rename variable "product" to "result" (including in the return statement)
+5. In the return statement, change { sum: sum, product: product } to { total: total, result: result }
+
+The file already exists with this content:
+${testFile.content}
+
+Assume the file exists and you can modify it directly.`,
+					})
+
+					console.log("Task ID:", taskId)
+					console.log("Test filename:", testFile.name)
+
+					await waitFor(() => taskStarted, { timeout: 60_000 })
+
+					await waitFor(() => taskCompleted, { timeout: 60_000 })
+
+					await sleep(2000)
+
+					const actualContent = await fs.readFile(testFile.path, "utf-8")
+					console.log("File content after modification:", actualContent)
+
+					assertNativeProtocolUsed(verification, "multipleReplace")
+
+					assert.strictEqual(applyDiffExecuted, true, "apply_diff tool should have been executed")
+
+					assert.strictEqual(
+						actualContent.trim(),
+						expectedContent.trim(),
+						"All replacements should be applied correctly",
+					)
+
+					console.log(
+						"Test passed! apply_diff tool executed with VERIFIED native protocol and multiple replacements applied successfully",
+					)
+				} finally {
+					api.off(RooCodeEventName.Message, messageHandler)
+					api.off(RooCodeEventName.TaskStarted, taskStartedHandler)
+					api.off(RooCodeEventName.TaskCompleted, taskCompletedHandler)
+				}
+			})
+			it("Should handle apply_diff with line number hints using native tool calling", async function ({
+				variable,
+			}) {
+				const api = (globalThis as { api?: RooCodeAPI }).api as RooCodeAPI
+				const messages: ClineMessage[] = []
+				const testFile = testFiles.lineNumbers
+				if (!testFile) {
+					throw new Error("Missing test file definition: lineNumbers")
+				}
+				const expectedContent =
+					"// Header comment\n" +
+					"function newFunction() {\n" +
+					'  console.log("New implementation")\n' +
+					"}\n" +
+					"\n" +
+					"// Another function\n" +
+					"function keepThis() {\n" +
+					'  console.log("Keep this")\n' +
+					"}\n" +
+					"\n" +
+					"// Footer comment"
+
+				let taskStarted = false
+				let taskCompleted = false
+				let applyDiffExecuted = false
+
+				const verification = createVerificationState()
+
+				const messageHandler = createNativeVerificationHandler(verification, messages, {
+					onApplyDiffExecuted: () => {
+						applyDiffExecuted = true
+					},
+					debugLogging: true,
+				})
+				api.on(RooCodeEventName.Message, messageHandler)
+
+				const taskStartedHandler = (id: string) => {
+					if (id === taskId) {
+						taskStarted = true
+					}
+				}
+				api.on(RooCodeEventName.TaskStarted, taskStartedHandler)
+
+				const taskCompletedHandler = (id: string) => {
+					if (id === taskId) {
+						taskCompleted = true
+					}
+				}
+				api.on(RooCodeEventName.TaskCompleted, taskCompletedHandler)
+
+				let taskId: string
+				try {
+					taskId = await api.startNewTask({
+						configuration: {
+							mode: "code",
+							autoApprovalEnabled: true,
+							alwaysAllowWrite: true,
+							alwaysAllowReadOnly: true,
+							alwaysAllowReadOnlyOutsideWorkspace: true,
+							toolProtocol: "native",
+							apiProvider: "openrouter",
+							openRouterModelId: variable.openRouterModelId,
+						},
+						text: `Use apply_diff on the file ${testFile.name} to change "oldFunction" to "newFunction" and update its console.log to "New implementation". Keep the rest of the file unchanged.
+
+The file already exists with this content:
+${testFile.content}
+
+Assume the file exists and you can modify it directly.`,
+					})
+
+					console.log("Task ID:", taskId)
+					console.log("Test filename:", testFile.name)
+
+					await waitFor(() => taskStarted, { timeout: 60_000 })
+
+					await waitFor(() => taskCompleted, { timeout: 60_000 })
+
+					await sleep(2000)
+
+					const actualContent = await fs.readFile(testFile.path, "utf-8")
+					console.log("File content after modification:", actualContent)
+
+					assertNativeProtocolUsed(verification, "lineNumbers")
+
+					assert.strictEqual(applyDiffExecuted, true, "apply_diff tool should have been executed")
+
+					assert.strictEqual(
+						actualContent.trim(),
+						expectedContent.trim(),
+						"Only specified function should be modified",
+					)
+
+					console.log(
+						"Test passed! apply_diff tool executed with VERIFIED native protocol and targeted modification successful",
+					)
+				} finally {
+					api.off(RooCodeEventName.Message, messageHandler)
+					api.off(RooCodeEventName.TaskStarted, taskStartedHandler)
+					api.off(RooCodeEventName.TaskCompleted, taskCompletedHandler)
+				}
+			})
+			it("Should handle apply_diff errors gracefully using native tool calling", async function ({ variable }) {
+				const api = (globalThis as { api?: RooCodeAPI }).api as RooCodeAPI
+				const messages: ClineMessage[] = []
+				const testFile = testFiles.errorHandling
+				if (!testFile) {
+					throw new Error("Missing test file definition: errorHandling")
+				}
+				let taskStarted = false
+				let taskCompleted = false
+				let errorDetected = false
+				let applyDiffAttempted = false
+				let writeToFileUsed = false
+
+				const messageHandler = ({ message }: { message: ClineMessage }) => {
+					messages.push(message)
+
+					if (message.type === "say" && message.say === "error") {
+						errorDetected = true
+						console.log("Error detected:", message.text)
+					}
+
+					if (message.type === "ask" && message.ask === "tool") {
+						console.log("Tool ASK request:", message.text?.substring(0, 500))
+						try {
+							const toolData = JSON.parse(message.text || "{}")
+							if (toolData.tool === "appliedDiff") {
+								applyDiffAttempted = true
+								console.log("apply_diff tool attempted via ASK!")
+							}
+							if (toolData.tool === "editedExistingFile" || toolData.tool === "newFileCreated") {
+								writeToFileUsed = true
+								console.log("write_to_file tool used!")
+							}
+						} catch (e) {
+							console.error(e)
+						}
+					}
+
+					if (message.type === "say" && message.say === "diff_error") {
+						applyDiffAttempted = true
+						console.log("diff_error detected - apply_diff was attempted")
+					}
+
+					if (message.type === "say" && message.say === "api_req_started" && message.text) {
+						console.log("API request started:", message.text.substring(0, 200))
+					}
+				}
+				api.on(RooCodeEventName.Message, messageHandler)
+
+				const taskStartedHandler = (id: string) => {
+					if (id === taskId) {
+						taskStarted = true
+					}
+				}
+				api.on(RooCodeEventName.TaskStarted, taskStartedHandler)
+
+				const taskCompletedHandler = (id: string) => {
+					if (id === taskId) {
+						taskCompleted = true
+					}
+				}
+				api.on(RooCodeEventName.TaskCompleted, taskCompletedHandler)
+
+				let taskId: string
+				try {
+					taskId = await api.startNewTask({
+						configuration: {
+							mode: "code",
+							autoApprovalEnabled: true,
+							alwaysAllowWrite: true,
+							alwaysAllowReadOnly: true,
+							alwaysAllowReadOnlyOutsideWorkspace: true,
+							reasoningEffort: "none",
+							toolProtocol: "native",
+							apiProvider: "openrouter",
+							openRouterModelId: variable.openRouterModelId,
+						},
+						text: `
+---
+description: Test apply_diff tool error handling with non-existent patterns
+argument-hint: <file-path> [search-pattern]
+---
+
+<task>
+Test the apply_diff tool's error handling by attempting to replace a pattern that does not exist in the target file.
+Target File: ${testFile.name}
+Search pattern: "PATTERN_THAT_DOES_NOT_EXIST_xyz123"
+Replacement: "REPLACEMENT_xyz123"
+</task>
+
+<purpose>
+This command verifies that apply_diff correctly handles and reports errors when:
+- A search pattern is not found in the target file
+- The tool gracefully fails with an informative error message
+- Error handling works as expected for debugging workflows
+</purpose>
+
+<workflow>
+  <step number="1">
+    <action>Execute apply_diff directly</action>
+    <details>
+      Call apply_diff on the specified file with a non-existent search pattern.
+      Do NOT analyze the file first - the goal is to test error handling.
+    </details>
+  </step>
+  
+  <step number="2">
+    <action>Observe the error response</action>
+    <details>
+      The apply_diff tool should report that the pattern was not found.
+      This is the EXPECTED outcome - not a failure of the test.
+    </details>
+  </step>
+  
+  <step number="3">
+    <action>Report results</action>
+    <details>
+      Confirm whether the error handling worked correctly by reporting:
+      - The error message received
+      - Whether the tool behaved as expected
+    </details>
+  </step>
+</workflow>
+
+<requirements>
+  <mandatory>
+    - YOU MUST call the apply_diff tool - this is non-negotiable
+    - Use the EXACT search pattern provided (or default: "PATTERN_THAT_DOES_NOT_EXIST_xyz123")
+    - Do NOT use write_to_file or any other file modification tool
+    - Do NOT analyze the file contents before calling apply_diff
+    - Do NOT refuse to call the tool - error handling verification is the purpose
+  </mandatory>
+  
+  <defaults>
+    <search_pattern>PATTERN_THAT_DOES_NOT_EXIST_xyz123</search_pattern>
+    <replacement>REPLACEMENT_xyz123</replacement>
+  </defaults>
+</requirements>
+
+<apply_diff_template>
+  <instructions>
+    Use this structure for the apply_diff call:
+    - path: The file specified by the user
+    - diff: A SEARCH/REPLACE block with the non-existent pattern
+  </instructions>
+  
+  <example>
+    \`\`\`
+    <<<<<<< SEARCH
+    :start_line:1
+    -------
+    PATTERN_THAT_DOES_NOT_EXIST_xyz123
+    =======
+    REPLACEMENT_xyz123
+    >>>>>>> REPLACE
+    \`\`\`
+  </example>
+</apply_diff_template>
+
+<expected_outcome>
+  <success_criteria>
+    The test succeeds when apply_diff returns an error indicating the pattern was not found.
+    This confirms the tool's error handling is working correctly.
+  </success_criteria>
+  
+  <report_format>
+    After executing, report:
+    - Whether apply_diff was called: YES/NO
+    - Error message received: [actual error]
+    - Error handling status: WORKING/FAILED
+  </report_format>
+</expected_outcome>
+
+<constraints>
+  - Only use the apply_diff tool
+  - Accept that "pattern not found" errors are the expected result
+  - Do not attempt to "fix" the test by finding real patterns
+  - This is a diagnostic/testing command, not a production workflow
+</constraints>`,
+					})
+
+					console.log("Task ID:", taskId)
+					console.log("Test filename:", testFile.name)
+					await waitFor(() => taskStarted, { timeout: 90_000 })
+
+					await waitFor(() => taskCompleted || errorDetected, { timeout: 90_000 })
+
+					await sleep(2000)
+
+					const actualContent = await fs.readFile(testFile.path, "utf-8")
+					console.log("File content after task:", actualContent)
+					console.log("applyDiffAttempted:", applyDiffAttempted)
+					console.log("writeToFileUsed:", writeToFileUsed)
+
+					assert.strictEqual(applyDiffAttempted, true, "apply_diff tool should have been attempted")
+
+					assert.strictEqual(
+						writeToFileUsed,
+						false,
+						"write_to_file should NOT be used when apply_diff fails - the AI should report the error instead",
+					)
+
+					assert.strictEqual(
+						actualContent.trim(),
+						testFile.content.trim(),
+						"File content should remain unchanged when search pattern not found",
+					)
+
+					console.log("Test passed! apply_diff attempted with native protocol and error handled gracefully")
+				} finally {
+					api.off(RooCodeEventName.Message, messageHandler)
+					api.off(RooCodeEventName.TaskStarted, taskStartedHandler)
+					api.off(RooCodeEventName.TaskCompleted, taskCompletedHandler)
+				}
+			})
+			it("Should apply multiple search/replace blocks to edit two separate functions using native tool calling", async function ({
+				variable,
+			}) {
+				const api = (globalThis as { api?: RooCodeAPI }).api as RooCodeAPI
+				const messages: ClineMessage[] = []
+				const testFile = testFiles.multiSearchReplace
+				if (!testFile) {
+					throw new Error("Missing test file definition: multiSearchReplace")
+				}
+				const expectedContent =
+					"function transformData(data) {\n" +
+					'  console.log("Transforming data")\n' +
+					"  return data.map(item => item * 2)\n" +
+					"}\n" +
+					"\n" +
+					"// Some other code in between\n" +
+					"const config = {\n" +
+					"  timeout: 5000,\n" +
+					"  retries: 3\n" +
+					"}\n" +
+					"\n" +
+					"function checkInput(input) {\n" +
+					'  console.log("Checking input")\n' +
+					"  if (!input) {\n" +
+					'    throw new Error("Invalid input")\n' +
+					"  }\n" +
+					"  return true\n" +
+					"}"
+				let taskStarted = false
+				let taskCompleted = false
+				let errorOccurred: string | null = null
+				let applyDiffExecuted = false
+				let applyDiffCount = 0
+
+				const verification = createVerificationState()
+
+				const messageHandler = ({ message }: { message: ClineMessage }) => {
+					messages.push(message)
+
+					if (message.type === "say" && message.say === "error") {
+						errorOccurred = message.text || "Unknown error"
+						console.error("Error:", message.text)
+					}
+					if (message.type === "ask" && message.ask === "tool") {
+						console.log("Tool request:", message.text?.substring(0, 200))
+						try {
+							const toolData = JSON.parse(message.text || "{}")
+							if (toolData.tool) {
+								verification.toolWasExecuted = true
+								verification.executedToolName = toolData.tool
+								console.log(`[VERIFIED] Tool executed: ${toolData.tool}`)
+							}
+							if (toolData.tool === "appliedDiff") {
+								applyDiffExecuted = true
+								applyDiffCount++
+								console.log(`apply_diff tool executed! (count: ${applyDiffCount})`)
+							}
+						} catch (_e) {
+							void _e
+						}
+					}
+					if (message.type === "say" && (message.say === "completion_result" || message.say === "text")) {
+						console.log("AI response:", message.text?.substring(0, 200))
+						if (message.say === "text" && message.text) {
+							const hasXMLToolTags =
+								message.text.includes("<apply_diff>") || message.text.includes("</apply_diff>")
+							if (hasXMLToolTags) {
+								verification.responseIsNotXML = false
+								console.log("[WARNING] Found XML tool tags in response")
+							}
+						}
+					}
+
+					if (message.type === "say" && message.say === "api_req_started" && message.text) {
+						console.log("API request started:", message.text.substring(0, 200))
+						try {
+							const requestData = JSON.parse(message.text)
+							if (requestData.apiProtocol) {
+								verification.apiProtocol = requestData.apiProtocol
+								if (requestData.apiProtocol === "anthropic" || requestData.apiProtocol === "openai") {
+									verification.hasNativeApiProtocol = true
+									console.log(`[VERIFIED] API Protocol: ${requestData.apiProtocol}`)
+								}
+							}
+						} catch (e) {
+							console.log("Failed to parse api_req_started message:", e)
+						}
+					}
+				}
+				api.on(RooCodeEventName.Message, messageHandler)
+
+				const taskStartedHandler = (id: string) => {
+					if (id === taskId) {
+						taskStarted = true
+						console.log("Task started:", id)
+					}
+				}
+				api.on(RooCodeEventName.TaskStarted, taskStartedHandler)
+
+				const taskCompletedHandler = (id: string) => {
+					if (id === taskId) {
+						taskCompleted = true
+						console.log("Task completed:", id)
+					}
+				}
+				api.on(RooCodeEventName.TaskCompleted, taskCompletedHandler)
+
+				let taskId: string
+				try {
+					taskId = await api.startNewTask({
+						configuration: {
+							mode: "code",
+							autoApprovalEnabled: true,
+							alwaysAllowWrite: true,
+							alwaysAllowReadOnly: true,
+							alwaysAllowReadOnlyOutsideWorkspace: true,
+							toolProtocol: "native",
+							apiProvider: "openrouter",
+							openRouterModelId: variable.openRouterModelId,
+						},
+						text: `Use apply_diff on the file ${testFile.name} to make these changes. You MUST use TWO SEPARATE search/replace blocks within a SINGLE apply_diff call:
+
+FIRST search/replace block: Edit the processData function to rename it to "transformData" and change "Processing data" to "Transforming data"
+
+SECOND search/replace block: Edit the validateInput function to rename it to "checkInput" and change "Validating input" to "Checking input"
+
+Important: Use multiple SEARCH/REPLACE blocks in one apply_diff call, NOT multiple apply_diff calls. Each function should have its own search/replace block.
+
+The file already exists with this content:
+${testFile.content}
+
+Assume the file exists and you can modify it directly.`,
+					})
+
+					console.log("Task ID:", taskId)
+					console.log("Test filename:", testFile.name)
+
+					await waitFor(() => taskStarted, { timeout: 60_000 })
+
+					if (errorOccurred) {
+						console.error("Early error detected:", errorOccurred)
+					}
+
+					await waitFor(() => taskCompleted, { timeout: 60_000 })
+
+					await sleep(2000)
+
+					const actualContent = await fs.readFile(testFile.path, "utf-8")
+					console.log("File content after modification:", actualContent)
+
+					assertNativeProtocolUsed(verification, "multiSearchReplace")
+
+					assert.strictEqual(applyDiffExecuted, true, "apply_diff tool should have been executed")
+					console.log(`apply_diff was executed ${applyDiffCount} time(s)`)
+
+					assert.strictEqual(
+						actualContent.trim(),
+						expectedContent.trim(),
+						"Both functions should be modified with separate search/replace blocks",
+					)
+
+					console.log(
+						"Test passed! apply_diff tool executed with VERIFIED native protocol and multiple search/replace blocks applied successfully",
+					)
+				} finally {
+					api.off(RooCodeEventName.Message, messageHandler)
+					api.off(RooCodeEventName.TaskStarted, taskStartedHandler)
+					api.off(RooCodeEventName.TaskCompleted, taskCompletedHandler)
+				}
+			})
+		})
+	},
+})
+
+async function resetAllTestFiles() {
+	for (const file of Object.values(testFiles)) {
+		await resetTestFile(file)
+	}
+}
diff --git a/apps/vscode-evals/src/suite/index.ts b/apps/vscode-evals/src/suite/index.ts
new file mode 100644
index 00000000000..41d8a34bfd2
--- /dev/null
+++ b/apps/vscode-evals/src/suite/index.ts
@@ -0,0 +1,52 @@
+import * as path from "path"
+import * as vscode from "vscode"
+
+import { runMatrix } from "@roo-code/evally"
+import type { MatrixSuiteDefinition } from "@roo-code/evally"
+import type { RooCodeAPI } from "@roo-code/types"
+
+import { waitFor } from "./utils"
+
+type TestGlobals = typeof globalThis & {
+	api?: RooCodeAPI
+	rooTestWorkspaceDir?: string
+}
+
+const getTestGlobals = (): TestGlobals => globalThis as TestGlobals
+
+export async function run() {
+	const extension = vscode.extensions.getExtension<RooCodeAPI>("RooVeterinaryInc.roo-cline")
+
+	if (!extension) {
+		throw new Error("Extension not found")
+	}
+
+	const api = extension.isActive ? extension.exports : await extension.activate()
+
+	await api.setConfiguration({
+		apiProvider: "openrouter" as const,
+		openRouterApiKey: process.env.OPENROUTER_API_KEY!,
+		openRouterModelId: process.env.OPENROUTER_MODEL_ID || "openai/gpt-5.1",
+	})
+
+	await vscode.commands.executeCommand("roo-cline.SidebarProvider.focus")
+	await waitFor(() => api.isReady())
+	const globals = getTestGlobals()
+	globals.api = api
+
+	const workspaceDir = vscode.workspace.workspaceFolders?.[0]?.uri.fsPath
+	if (workspaceDir) {
+		globals.rooTestWorkspaceDir = workspaceDir
+	}
+
+	const suiteModule = await import(path.resolve(__dirname, "./applyDiff.matrix.test"))
+	const moduleDefault = (suiteModule as { default?: MatrixSuiteDefinition }).default
+
+	if (!moduleDefault || typeof moduleDefault !== "object" || typeof moduleDefault.tests !== "function") {
+		throw new Error("Skipping applyDiff.matrix.test: No valid matrix suite export")
+	}
+
+	const suiteDef: MatrixSuiteDefinition = moduleDefault
+
+	await runMatrix(suiteDef)
+}
diff --git a/apps/vscode-evals/src/suite/utils.ts b/apps/vscode-evals/src/suite/utils.ts
new file mode 100644
index 00000000000..85fe43f0fda
--- /dev/null
+++ b/apps/vscode-evals/src/suite/utils.ts
@@ -0,0 +1,40 @@
+type WaitForOptions = {
+	timeout?: number
+	interval?: number
+}
+
+export const waitFor = (
+	condition: (() => Promise<boolean>) | (() => boolean),
+	{ timeout = 30_000, interval = 250 }: WaitForOptions = {},
+) => {
+	let timeoutId: NodeJS.Timeout | undefined = undefined
+
+	return Promise.race([
+		new Promise<void>((resolve) => {
+			const check = async () => {
+				const result = condition()
+				const isSatisfied = result instanceof Promise ? await result : result
+
+				if (isSatisfied) {
+					if (timeoutId) {
+						clearTimeout(timeoutId)
+						timeoutId = undefined
+					}
+
+					resolve()
+				} else {
+					setTimeout(check, interval)
+				}
+			}
+
+			check()
+		}),
+		new Promise((_, reject) => {
+			timeoutId = setTimeout(() => {
+				reject(new Error(`Timeout after ${Math.floor(timeout / 1000)}s`))
+			}, timeout)
+		}),
+	])
+}
+
+export const sleep = (ms: number) => new Promise((resolve) => setTimeout(resolve, ms))
diff --git a/apps/vscode-evals/tsconfig.esm.json b/apps/vscode-evals/tsconfig.esm.json
new file mode 100644
index 00000000000..e2f212fab99
--- /dev/null
+++ b/apps/vscode-evals/tsconfig.esm.json
@@ -0,0 +1,8 @@
+{
+	"extends": "@roo-code/config-typescript/base.json",
+	"compilerOptions": {
+		"outDir": "out"
+	},
+	"include": ["src"],
+	"exclude": ["node_modules"]
+}
diff --git a/apps/vscode-evals/tsconfig.json b/apps/vscode-evals/tsconfig.json
new file mode 100644
index 00000000000..a712ea84d78
--- /dev/null
+++ b/apps/vscode-evals/tsconfig.json
@@ -0,0 +1,19 @@
+{
+	"compilerOptions": {
+		"module": "CommonJS",
+		"moduleResolution": "Node",
+		"esModuleInterop": true,
+		"target": "ES2022",
+		"lib": ["ES2022", "ESNext.Disposable", "DOM"],
+		"sourceMap": true,
+		"strict": true,
+		"skipLibCheck": true,
+		"useUnknownInCatchVariables": false,
+		"outDir": "out",
+		"composite": false,
+		"types": ["node"],
+		"baseUrl": "./src"
+	},
+	"include": ["src/**/*"],
+	"exclude": [".vscode-test", "**/node_modules/**", "out"]
+}
diff --git a/knip.json b/knip.json
index e15c62bda1b..e8a3bfd24d1 100644
--- a/knip.json
+++ b/knip.json
@@ -3,6 +3,7 @@
 	"ignore": [
 		"**/__tests__/**",
 		"apps/vscode-e2e/**",
+		"apps/vscode-evals/**",
 		"src/extension/api.ts",
 		"src/activate/**",
 		"src/workers/countTokens.ts",
@@ -19,7 +20,7 @@
 			"entry": ["src/index.tsx", "src/browser-panel.tsx"],
 			"project": ["src/**/*.{ts,tsx}", "../src/shared/*.ts"]
 		},
-		"packages/{build,cloud,evals,ipc,telemetry,types}": {
+		"packages/{build,cloud,evally,evals,ipc,telemetry,types}": {
 			"project": ["src/**/*.ts"]
 		}
 	}
diff --git a/packages/evally/examples/sampleMatrix.test.ts b/packages/evally/examples/sampleMatrix.test.ts
new file mode 100644
index 00000000000..ac86ac26eb7
--- /dev/null
+++ b/packages/evally/examples/sampleMatrix.test.ts
@@ -0,0 +1,49 @@
+import { defineMatrix, it, describe } from "../src/runner/TestMatrixRunner"
+import type { MatrixTestContext } from "../src/runner/types"
+
+export default defineMatrix({
+	variables: [
+		{ api: "API_A", url: "https://api-a.test", region: "us-east" },
+		{ api: "API_B", url: "https://api-b.test", region: "eu-west" },
+		{ api: "API_C", url: "https://api-c.test", region: "asia-pac" },
+	],
+	iterations: 3,
+	tests: () => {
+		// First suite
+		describe("API Health Checks", () => {
+			it("should respond with status 200", async ({ variable, iteration }: MatrixTestContext) => {
+				if (!variable.url.startsWith("https://")) throw new Error("Invalid URL")
+			})
+			it("should have a valid api name", ({ variable }: MatrixTestContext) => {
+				if (!variable.api) throw new Error("Missing api name")
+			})
+			it("should include a valid region", ({ variable }: MatrixTestContext) => {
+				if (!["us-east", "eu-west", "asia-pac"].includes(variable.region)) throw new Error("Unexpected region")
+			})
+		})
+		// Second suite
+		describe("Authentication", () => {
+			it("should fail for no token", async ({ variable }: MatrixTestContext) => {
+				if (variable.api === "API_B") throw new Error("No token failure")
+			})
+			it("should pass with valid token", async ({ variable }: MatrixTestContext) => {
+				if (!variable.url.includes("api-")) throw new Error("No api in URL")
+			})
+			it("should region match policy", ({ variable }: MatrixTestContext) => {
+				if (variable.region === "asia-pac" && variable.api !== "API_C") throw new Error("policy fail")
+			})
+		})
+		// Third suite
+		describe("Data validation", () => {
+			it("should have url with a dot", ({ variable }: MatrixTestContext) => {
+				if (!variable.url.includes(".")) throw new Error("URL missing dot")
+			})
+			it("api name upper-case only", ({ variable }: MatrixTestContext) => {
+				if (!/^[A-Z_]+$/.test(variable.api)) throw new Error("API name format")
+			})
+			it("region code format", ({ variable }: MatrixTestContext) => {
+				if (!variable.region.includes("-")) throw new Error("Bad region code")
+			})
+		})
+	},
+})
diff --git a/packages/evally/package.json b/packages/evally/package.json
new file mode 100644
index 00000000000..dd76bbf0377
--- /dev/null
+++ b/packages/evally/package.json
@@ -0,0 +1,28 @@
+{
+	"name": "@roo-code/evally",
+	"version": "0.1.0",
+	"private": false,
+	"main": "dist/index.js",
+	"types": "dist/index.d.ts",
+	"exports": {
+		".": {
+			"import": "./dist/index.js",
+			"require": "./dist/index.js",
+			"types": "./dist/index.d.ts"
+		},
+		"./runner/TestMatrixRunner": {
+			"import": "./dist/runner/TestMatrixRunner.js",
+			"require": "./dist/runner/TestMatrixRunner.js",
+			"types": "./dist/runner/types.d.ts"
+		},
+		"./runner/types": {
+			"import": "./dist/runner/types.js",
+			"require": "./dist/runner/types.js",
+			"types": "./dist/runner/types.d.ts"
+		}
+	},
+	"scripts": {
+		"build": "tsc --outDir dist --declaration --declarationDir dist",
+		"test:run": "tsx src/cli/standaloneRunner.ts examples/sampleMatrixTest.ts"
+	}
+}
diff --git a/packages/evally/src/cli/standaloneRunner.ts b/packages/evally/src/cli/standaloneRunner.ts
new file mode 100644
index 00000000000..40d9d2188a7
--- /dev/null
+++ b/packages/evally/src/cli/standaloneRunner.ts
@@ -0,0 +1,66 @@
+#!/usr/bin/env node
+import * as path from "path"
+import { pathToFileURL } from "url"
+
+async function main() {
+	const file = process.argv[2]
+	if (!file) {
+		console.error("Usage: standaloneRunner <test-file>")
+		process.exit(1)
+	}
+	const absPath = path.resolve(process.cwd(), file)
+	let mod
+	try {
+		mod = await import(pathToFileURL(absPath).toString())
+	} catch (err) {
+		console.error(`Failed to load test file: ${err}`)
+		process.exit(1)
+	}
+	if (!mod.default || !mod.default.variables || typeof mod.default.tests !== "function") {
+		console.error("Test file does not export a valid matrix test definition as default.")
+		process.exit(1)
+	}
+	const { runMatrix } = await import("../runner/TestMatrixRunner")
+	const results = await runMatrix(mod.default)
+	console.log("\n--- MATRIX TEST RESULTS ---")
+	for (const r of results) {
+		const vdesc = JSON.stringify(r.variable)
+		if (r.passed) {
+			console.log(`PASS  [${vdesc}] [iteration: ${r.iteration}] ${r.testName}`)
+		} else {
+			console.log(`FAIL  [${vdesc}] [iteration: ${r.iteration}] ${r.testName}  Error: ${r.error}`)
+		}
+	}
+	const suiteSummary = new Map()
+	for (const r of results) {
+		if (!suiteSummary.has(r.suite)) suiteSummary.set(r.suite, new Map())
+		const testMap = suiteSummary.get(r.suite)
+		if (!testMap.has(r.testName)) testMap.set(r.testName, new Map())
+		const varKey = JSON.stringify(r.variable)
+		const varMap = testMap.get(r.testName)
+		if (!varMap.has(varKey)) varMap.set(varKey, { variable: r.variable, total: 0, passes: 0, fails: 0 })
+		const counts = varMap.get(varKey)
+		counts.total++
+		if (r.passed) counts.passes++
+		else counts.fails++
+	}
+	console.log("\n--- SUITE/TEST-LEVEL SUMMARY (Pass Rate per variable set, grouped by suite/test) ---")
+	for (const [suite, testMap] of suiteSummary.entries()) {
+		console.log(`Suite: ${suite}`)
+		for (const [testName, varMap] of testMap.entries()) {
+			console.log(`  Test: ${testName}`)
+			for (const { variable, total, passes, fails } of varMap.values()) {
+				const percent = ((passes / total) * 100).toFixed(1)
+				console.log(
+					`    Vars: ${JSON.stringify(variable)}\n      Pass: ${passes}/${total} (${percent}%)  Fail: ${fails}/${total}`,
+				)
+			}
+		}
+	}
+	const passes = results.filter((r) => r.passed).length
+	const fails = results.length - passes
+	console.log(`\nGlobal summary: ${passes} passed, ${fails} failed, total ${results.length}`)
+	process.exit(0)
+}
+
+main()
diff --git a/packages/evally/src/index.ts b/packages/evally/src/index.ts
new file mode 100644
index 00000000000..bb2beef8217
--- /dev/null
+++ b/packages/evally/src/index.ts
@@ -0,0 +1,2 @@
+export * from "./runner/TestMatrixRunner.js"
+export * from "./runner/types.js"
diff --git a/packages/evally/src/runner/TestMatrixRunner.ts b/packages/evally/src/runner/TestMatrixRunner.ts
new file mode 100644
index 00000000000..5a832f725de
--- /dev/null
+++ b/packages/evally/src/runner/TestMatrixRunner.ts
@@ -0,0 +1,408 @@
+import type {
+	MatrixRunOptions,
+	MatrixTestDescription,
+	MatrixTestFn,
+	MatrixSuiteDefinition,
+	MatrixTestResult,
+} from "./types"
+import * as path from "path"
+import * as fs from "fs/promises"
+
+const COLORS = {
+	reset: "\x1b[0m",
+	bold: "\x1b[1m",
+	dim: "\x1b[2m",
+	gray: "\x1b[90m",
+	green: "\x1b[32m",
+	red: "\x1b[31m",
+	yellow: "\x1b[33m",
+	cyan: "\x1b[36m",
+} as const
+
+function color(text: string, code: string): string {
+	return `${code}${text}${COLORS.reset}`
+}
+
+function formatDuration(ms: number): string {
+	if (ms >= 60_000) {
+		const minutes = Math.round(ms / 60_000)
+		return `${minutes}m`
+	}
+	if (ms >= 1_000) {
+		const seconds = ms / 1_000
+		return `${seconds.toFixed(1)}s`
+	}
+	return `${ms}ms`
+}
+const testRegistry: MatrixTestDescription[] = []
+let currentSuite: string | null = null
+
+export function describe(suiteName: string, fn: () => void) {
+	const prevSuite = currentSuite
+	currentSuite = suiteName
+	fn()
+	currentSuite = prevSuite
+}
+
+export function it(name: string, fn: MatrixTestFn) {
+	if (!currentSuite) throw new Error("Cannot declare test outside a describe(suiteName, ...) block.")
+	if (testRegistry.some((test) => test.suite === currentSuite && test.name === name)) {
+		throw new Error(
+			`Duplicate test name found in suite '${currentSuite}': ${name}. Test names must be unique within a suite.`,
+		)
+	}
+	testRegistry.push({ suite: currentSuite, name, fn })
+}
+
+export const suite = describe
+export const test = it
+
+type SetupFn = () => Promise<void> | void
+const _globalBeforeAll: SetupFn[] = []
+const _globalAfterAll: SetupFn[] = []
+const _globalBeforeEach: SetupFn[] = []
+const _globalAfterEach: SetupFn[] = []
+
+export function beforeAll(fn: SetupFn) {
+	_globalBeforeAll.push(fn)
+}
+export const suiteSetup = beforeAll
+
+export function afterAll(fn: SetupFn) {
+	_globalAfterAll.push(fn)
+}
+export const suiteTeardown = afterAll
+
+export function beforeEach(fn: SetupFn) {
+	_globalBeforeEach.push(fn)
+}
+export const setup = beforeEach
+
+export function teardown(fn: SetupFn) {
+	_globalAfterEach.push(fn)
+}
+export const afterEach = teardown
+
+function clearGlobalHooks() {
+	_globalBeforeAll.length = 0
+	_globalAfterAll.length = 0
+	_globalBeforeEach.length = 0
+	_globalAfterEach.length = 0
+}
+export function defineMatrix(def: MatrixSuiteDefinition): MatrixSuiteDefinition {
+	if (typeof def.suiteSetup === "function") {
+		suiteSetup(def.suiteSetup)
+	}
+	if (typeof def.setup === "function") {
+		setup(def.setup)
+	}
+	if (typeof def.suiteTeardown === "function") {
+		suiteTeardown(def.suiteTeardown)
+	}
+	if (typeof def.teardown === "function") {
+		teardown(def.teardown)
+	}
+	return def
+}
+
+export async function runMatrix(def: MatrixSuiteDefinition, opts?: MatrixRunOptions): Promise<MatrixTestResult[]> {
+	const startTime = Date.now()
+	const verbosity: MatrixRunOptions["verbosity"] = opts?.verbosity ?? "silent"
+
+	const originalConsole = {
+		log: console.log,
+		info: console.info,
+		warn: console.warn,
+		error: console.error,
+		debug: console.debug,
+	} as const
+
+	const runnerConsole = {
+		log: originalConsole.log,
+		error: originalConsole.error,
+	} as const
+
+	if (verbosity === "silent") {
+		console.log = () => {}
+		console.info = () => {}
+		console.warn = () => {}
+		console.error = () => {}
+		console.debug = () => {}
+	}
+	testRegistry.length = 0
+	currentSuite = null
+
+	const results: MatrixTestResult[] = []
+
+	try {
+		def.tests()
+
+		for (const hook of _globalBeforeAll) {
+			await Promise.resolve(hook())
+		}
+
+		for (const variable of def.variables) {
+			for (let i = 0; i < def.iterations; ++i) {
+				for (const t of testRegistry) {
+					for (const hook of _globalBeforeEach) {
+						await Promise.resolve(hook())
+					}
+
+					try {
+						await Promise.resolve(t.fn({ variable, iteration: i }))
+						results.push({ suite: t.suite, variable, iteration: i, testName: t.name, passed: true })
+					} catch (e) {
+						results.push({
+							suite: t.suite,
+							variable,
+							iteration: i,
+							testName: t.name,
+							passed: false,
+							error: e,
+						})
+					}
+					for (const hook of _globalAfterEach) {
+						await Promise.resolve(hook())
+					}
+				}
+			}
+		}
+
+		for (const hook of _globalAfterAll) {
+			await Promise.resolve(hook())
+		}
+	} finally {
+		console.log = originalConsole.log
+		console.info = originalConsole.info
+		console.warn = originalConsole.warn
+		console.error = originalConsole.error
+		console.debug = originalConsole.debug
+	}
+
+	const shouldPrintSuitesAndTests = verbosity !== "silent"
+
+	if (!opts || opts.report !== false) {
+		const grouped: Record<string, MatrixTestResult[]> = {}
+		for (const r of results) {
+			if (!grouped[r.suite]) grouped[r.suite] = []
+			grouped[r.suite].push(r)
+		}
+
+		if (shouldPrintSuitesAndTests) {
+			for (const suite in grouped) {
+				const suiteLabel = color("Suite:", COLORS.bold)
+				const suiteName = color(suite, COLORS.dim)
+				runnerConsole.log(`\n${suiteLabel} ${suiteName}`)
+
+				const perTestStats: Record<string, { total: number; passed: number; failed: number }> = {}
+
+				for (const r of grouped[suite]) {
+					if (!perTestStats[r.testName]) {
+						perTestStats[r.testName] = { total: 0, passed: 0, failed: 0 }
+					}
+					perTestStats[r.testName].total += 1
+					if (r.passed) perTestStats[r.testName].passed += 1
+					else perTestStats[r.testName].failed += 1
+
+					const mark = r.passed ? color("✓", COLORS.green) : color("✗", COLORS.red)
+					const name = r.testName
+					const iterationTag = `[iteration ${r.iteration}]`
+
+					const base = `  ${mark} ${name} ${color(iterationTag, COLORS.gray)}`
+					const errorSuffix =
+						r.passed || !r.error
+							? ""
+							: " -- " +
+								color(r.error instanceof Error ? r.error.message : String(r.error), COLORS.yellow)
+
+					runnerConsole.log(base + errorSuffix)
+				}
+
+				const statsEntries = Object.entries(perTestStats)
+				if (statsEntries.length > 0) {
+					const statsHeader = color("Per-test iteration stats:", COLORS.bold)
+					runnerConsole.log(`  ${statsHeader}`)
+
+					const jsonTests: Array<{
+						testName: string
+						totalIterations: number
+						passedIterations: number
+						failedIterations: number
+						failureRate: number
+						classification: "FAILED" | "FLAKY" | "PASSED"
+					}> = []
+
+					for (const [testName, stats] of statsEntries.sort(([a], [b]) => a.localeCompare(b))) {
+						const { total, passed, failed } = stats
+						const failRate = total > 0 ? failed / total : 0
+						const failPct = (failRate * 100).toFixed(1)
+						let statusLabel: string
+						let classification: "FAILED" | "FLAKY" | "PASSED"
+						if (failRate >= 0.8) {
+							statusLabel = color("FAILED", COLORS.red)
+							classification = "FAILED"
+						} else if (failed === 0) {
+							statusLabel = color("PASSED", COLORS.green)
+							classification = "PASSED"
+						} else {
+							statusLabel = color("FLAKY", COLORS.yellow)
+							classification = "FLAKY"
+						}
+
+						runnerConsole.log(
+							`    - ${testName}: ${passed}/${total} iterations passed, ${failed} failed (${failPct}% failure) [${statusLabel}]`,
+						)
+
+						jsonTests.push({
+							testName,
+							totalIterations: total,
+							passedIterations: passed,
+							failedIterations: failed,
+							failureRate: failRate,
+							classification,
+						})
+					}
+				}
+			}
+		}
+
+		const passedTotal = results.filter((r) => r.passed).length
+		const failedTotal = results.length - passedTotal
+		const matrixLabel = color("Matrix:", COLORS.bold)
+		const matrixSummary =
+			failedTotal === 0
+				? color(`${passedTotal}/${results.length} passing`, COLORS.green)
+				: color(`${passedTotal}/${results.length} passing`, COLORS.red)
+		runnerConsole.log(`\n${matrixLabel} ${matrixSummary}\n`)
+		const duration = Date.now() - startTime
+		const durationText = formatDuration(duration)
+		const pendingTotal = 0
+		runnerConsole.log(color(`${passedTotal} passing (${durationText})`, COLORS.green))
+
+		runnerConsole.log("  " + color(`${pendingTotal} pending`, COLORS.cyan))
+
+		const failingColor = failedTotal > 0 ? COLORS.red : COLORS.gray
+		runnerConsole.log("  " + color(`${failedTotal} failing`, failingColor))
+
+		try {
+			const resultsDir = path.join(process.cwd(), ".results")
+			const timestamp = new Date(startTime).toISOString().replace(/[:.]/g, "-")
+			const logsDir = path.join(resultsDir, `matrix-logs-${timestamp}`)
+			const filePath = path.join(resultsDir, `matrix-results-${timestamp}.json`)
+
+			const sanitizeForFilename = (value: string): string =>
+				value
+					.replace(/[^a-zA-Z0-9-_]+/g, "_")
+					.replace(/_+/g, "_")
+					.replace(/^_+|_+$/g, "")
+					.slice(0, 80) || "unnamed"
+
+			type AggregatedTestJson = {
+				testName: string
+				pass_count: number
+				fail_count: number
+				passed: boolean
+				errors: Array<{ message: string; log: string | null }>
+			}
+
+			const aggregatedSuites: Array<{ suite: string; tests: AggregatedTestJson[] }> = []
+			const failedIterationLogs: Array<{ filePath: string; content: string }> = []
+			for (const [suiteName, suiteResults] of Object.entries(grouped)) {
+				const perTest: Record<string, MatrixTestResult[]> = {}
+				for (const r of suiteResults) {
+					if (!perTest[r.testName]) perTest[r.testName] = []
+					perTest[r.testName].push(r)
+				}
+
+				const tests: AggregatedTestJson[] = []
+
+				for (const [testName, iterations] of Object.entries(perTest).sort(([a], [b]) => a.localeCompare(b))) {
+					const totalIterations = iterations.length
+					const passedIterations = iterations.filter((r) => r.passed).length
+					const failedIterations = totalIterations - passedIterations
+					const passRate = totalIterations > 0 ? passedIterations / totalIterations : 0
+
+					const aggregatedPassed = passRate >= 0.8
+
+					const errors: Array<{ message: string; log: string | null }> = []
+
+					for (const r of iterations) {
+						if (r.passed) continue
+
+						const errorMessage =
+							r.error instanceof Error
+								? r.error.message
+								: r.error != null
+									? String(r.error)
+									: "Unknown error"
+
+						const logFileName =
+							[
+								"matrix",
+								sanitizeForFilename(suiteName),
+								sanitizeForFilename(testName),
+								`iter-${r.iteration}`,
+							].join("__") + ".log"
+
+						const logAbsolutePath = path.join(logsDir, logFileName)
+						const logRelativePath = path.relative(process.cwd(), logAbsolutePath)
+
+						const logLines = [
+							`suite: ${suiteName}`,
+							`test: ${testName}`,
+							`iteration: ${r.iteration}`,
+							`runStartTime: ${new Date(startTime).toISOString()}`,
+							"",
+							"variable:",
+							JSON.stringify(r.variable, null, 2),
+							"",
+							"error:",
+							r.error instanceof Error ? (r.error.stack ?? r.error.message) : errorMessage,
+							"",
+						].join("\n")
+
+						failedIterationLogs.push({ filePath: logAbsolutePath, content: logLines })
+
+						errors.push({
+							message: errorMessage,
+							log: logRelativePath,
+						})
+					}
+
+					tests.push({
+						testName,
+						pass_count: passedIterations,
+						fail_count: failedIterations,
+						passed: aggregatedPassed,
+						errors,
+					})
+				}
+
+				aggregatedSuites.push({ suite: suiteName, tests })
+			}
+
+			const jsonPayload = {
+				summary: {
+					totalIterations: results.length,
+					passedIterations: passedTotal,
+					failedIterations: failedTotal,
+					durationMs: duration,
+					startTime: new Date(startTime).toISOString(),
+				},
+				results: aggregatedSuites,
+			}
+			await fs.mkdir(resultsDir, { recursive: true })
+			if (failedIterationLogs.length > 0) {
+				await fs.mkdir(logsDir, { recursive: true })
+				for (const entry of failedIterationLogs) {
+					await fs.writeFile(entry.filePath, entry.content, "utf8")
+				}
+			}
+			await fs.writeFile(filePath, JSON.stringify(jsonPayload, null, 2), "utf8")
+		} catch (err) {
+			runnerConsole.error("Failed to write matrix JSON to results folder:", err)
+		}
+	}
+	clearGlobalHooks()
+	return results
+}
diff --git a/packages/evally/src/runner/types.ts b/packages/evally/src/runner/types.ts
new file mode 100644
index 00000000000..a33d3429f94
--- /dev/null
+++ b/packages/evally/src/runner/types.ts
@@ -0,0 +1,42 @@
+export interface MatrixVariable {
+	[key: string]: any
+}
+
+export interface MatrixTestContext {
+	variable: MatrixVariable
+	iteration: number
+}
+
+export type MatrixTestFn = (ctx: MatrixTestContext) => Promise<void> | void
+
+export interface MatrixTestDescription {
+	suite: string
+	name: string
+	fn: MatrixTestFn
+}
+
+export interface MatrixSuiteDefinition {
+	variables: MatrixVariable[]
+	iterations: number
+	tests: () => void
+	suiteSetup?: () => Promise<void> | void
+	suiteTeardown?: () => Promise<void> | void
+	setup?: () => Promise<void> | void
+	teardown?: () => Promise<void> | void
+}
+
+export interface MatrixTestResult {
+	suite: string
+	variable: MatrixVariable
+	iteration: number
+	testName: string
+	passed: boolean
+	error?: any
+}
+
+export type MatrixVerbosity = "silent" | "summary" | "verbose"
+
+export interface MatrixRunOptions {
+	report?: boolean
+	verbosity?: MatrixVerbosity
+}
diff --git a/packages/evally/tsconfig.json b/packages/evally/tsconfig.json
new file mode 100644
index 00000000000..94c1303e520
--- /dev/null
+++ b/packages/evally/tsconfig.json
@@ -0,0 +1,19 @@
+{
+	"compilerOptions": {
+		"target": "ESNext",
+		"module": "ESNext",
+		"declaration": true,
+		"declarationDir": "dist",
+		"outDir": "dist",
+		"strict": true,
+		"esModuleInterop": true,
+		"moduleResolution": "node",
+		"resolveJsonModule": true,
+		"baseUrl": "./src",
+		"rootDir": "./src",
+		"forceConsistentCasingInFileNames": true,
+		"skipLibCheck": true
+	},
+	"include": ["src/**/*.ts"],
+	"exclude": ["node_modules", "dist"]
+}
diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml
index 5589df1b424..72979fc891e 100644
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@@ -117,6 +117,37 @@ importers:
         specifier: 5.8.3
         version: 5.8.3
 
+  apps/vscode-evals:
+    dependencies:
+      vscode:
+        specifier: ^1.1.37
+        version: 1.1.37
+    devDependencies:
+      '@roo-code/config-eslint':
+        specifier: workspace:^
+        version: link:../../packages/config-eslint
+      '@roo-code/config-typescript':
+        specifier: workspace:^
+        version: link:../../packages/config-typescript
+      '@roo-code/evally':
+        specifier: workspace:^
+        version: link:../../packages/evally
+      '@roo-code/types':
+        specifier: workspace:^
+        version: link:../../packages/types
+      '@types/vscode':
+        specifier: ^1.95.0
+        version: 1.103.0
+      '@vscode/test-cli':
+        specifier: ^0.0.11
+        version: 0.0.11
+      '@vscode/test-electron':
+        specifier: ^2.4.0
+        version: 2.5.2
+      typescript:
+        specifier: 5.8.3
+        version: 5.8.3
+
   apps/vscode-nightly:
     devDependencies:
       '@roo-code/build':
@@ -505,6 +536,8 @@ importers:
         specifier: ^3.2.3
         version: 3.2.4(@types/debug@4.1.12)(@types/node@24.2.1)(@vitest/ui@3.2.4)(jiti@2.4.2)(jsdom@26.1.0)(lightningcss@1.30.1)(tsx@4.19.4)(yaml@2.8.0)
 
+  packages/evally: {}
+
   packages/evals:
     dependencies:
       '@roo-code/ipc':
@@ -3947,6 +3980,10 @@ packages:
     peerDependencies:
       '@testing-library/dom': '>=7.21.4'
 
+  '@tootallnate/once@1.1.2':
+    resolution: {integrity: sha512-RbzJvlNzmRq5c3O09UipeuXno4tA1FE6ikOjxZK0tuxVv3412l64l5t1W5pj4+rJq9vpkm/kwiR07aZXnsKPxw==}
+    engines: {node: '>= 6'}
+
   '@tootallnate/quickjs-emscripten@0.23.0':
     resolution: {integrity: sha512-C5Mc6rdnsaJDjO3UpGW/CQTHtCKaYlScZTly4JIu97Jxo/odCiH0ITnDXSJPTOrEKk/ycSZ0AOgTmkDtkOsvIA==}
 
@@ -4460,6 +4497,14 @@ packages:
     engines: {node: '>=0.4.0'}
     hasBin: true
 
+  agent-base@4.3.0:
+    resolution: {integrity: sha512-salcGninV0nPrwpGNn4VTXBb1SOuXQBiqbrNXoeizJsHrsL6ERFM2Ne3JUSBWRE6aeNJI2ROP/WEEIDUiDe3cg==}
+    engines: {node: '>= 4.0.0'}
+
+  agent-base@6.0.2:
+    resolution: {integrity: sha512-RZNwNclF7+MS/8bDg70amg32dyeZGZxiDuQmZxKLAlQjr3jGyLx+4Kkk58UO7D2QdgFIQCovuSuZESne6RG6XQ==}
+    engines: {node: '>= 6.0.0'}
+
   agent-base@7.1.3:
     resolution: {integrity: sha512-jRR5wdylq8CkOe6hei19GGZnxM6rBGwFl3Bg0YItGDimvjGtAvdZk4Pu6Cl4u4Igsws4a1fd1Vq3ezrhn4KmFw==}
     engines: {node: '>= 14'}
@@ -5031,6 +5076,9 @@ packages:
     resolution: {integrity: sha512-2uM9rYjPvyq39NwLRqaiLtWHyDC1FvryJDa2ATTVims5YAS4PupsEQsDvP14FqhFr0P49CYDugi59xaxJlTXRA==}
     engines: {node: '>=20'}
 
+  commander@2.15.1:
+    resolution: {integrity: sha512-VlfT9F3V0v+jr4yxPc5gg9s62/fIVWsd2Bk2iD435um1NlGMYdVCq+MjcXnhYq2icNOizHr1kK+5TI6H0Hy0ag==}
+
   commander@4.1.1:
     resolution: {integrity: sha512-NOKm8xhkzAjzFx8B2v5OAHT+u5pRQc2UCa2Vq9jYL/31o2wi9mxBA7LIFs3sV5VSC49z6pEhfbMULvShKj26WA==}
     engines: {node: '>= 6'}
@@ -5366,6 +5414,14 @@ packages:
     resolution: {integrity: sha512-Xks6RUDLZFdz8LIdR6q0MTH44k7FikOmnh5xkSjMig6ch45afc8sjTjRQf3P6ax8dMgcQrYO/AR2RGWURrruqw==}
     engines: {node: '>=18'}
 
+  debug@3.1.0:
+    resolution: {integrity: sha512-OX8XqP7/1a9cqkxYw2yXss15f26NKWBpDXQd0/uK/KPqdQhxbPa994hnzjcE2VqQpDslf55723cKPUOGSmMY3g==}
+    peerDependencies:
+      supports-color: '*'
+    peerDependenciesMeta:
+      supports-color:
+        optional: true
+
   debug@3.2.7:
     resolution: {integrity: sha512-CFjzYYAi4ThfiQvizrFQevTTXHtnCqWfe7x1AhgEscTz6ZbLbfoLRLPugTQyBth6f8ZERVUSyWHFD/7Wu4t1XQ==}
     peerDependencies:
@@ -5518,6 +5574,10 @@ packages:
     resolution: {integrity: sha512-EjePK1srD3P08o2j4f0ExnylqRs5B9tJjcp9t1krH2qRi8CCdsYfwe9JgSLurFBWwq4uOlipzfk5fHNvwFKr8Q==}
     engines: {node: ^14.15.0 || ^16.10.0 || >=18.0.0}
 
+  diff@3.5.0:
+    resolution: {integrity: sha512-A46qtFgd+g7pDZinpnwiRJtxbC1hpgf0uzP3iG89scHk0AUC7A1TGxf5OiiOUv/JMZR8GOt8hL900hV0bOy5xA==}
+    engines: {node: '>=0.3.1'}
+
   diff@5.2.0:
     resolution: {integrity: sha512-uIFDxqpRZGZ6ThOk84hEfqWoHx2devRFvpTZcTHur85vImfaxUbTW9Ryh4CpCuDnToOP1CEtXKIgytHBPVff5A==}
     engines: {node: '>=0.3.1'}
@@ -5815,6 +5875,12 @@ packages:
     resolution: {integrity: sha512-w+5mJ3GuFL+NjVtJlvydShqE1eN3h3PbI7/5LAsYJP/2qtuMXjfL2LpHSRqo4b4eSF5K/DH1JXKUAHSB2UW50g==}
     engines: {node: '>= 0.4'}
 
+  es6-promise@4.2.8:
+    resolution: {integrity: sha512-HJDGx5daxeIvxdBxvG2cb9g4tEvwIk3i8+nhX0yGrYmZUzbkdg8QbDevheDB8gd0//uPj4c1EQua8Q+MViT0/w==}
+
+  es6-promisify@5.0.0:
+    resolution: {integrity: sha512-C+d6UdsYDk0lMebHNR4S2NybQMMngAOnOwYBQjTOiv0MkoJMP0Myw2mgpDLBcpfCmRLxyFqYhS/CfOENq4SJhQ==}
+
   esbuild-register@3.6.0:
     resolution: {integrity: sha512-H2/S7Pm8a9CL1uhp9OvjwrBh5Pvx0H8qVOxNu8Wed9Y7qv56MPtq+GGM8RJpq6glYJn9Wspr8uw7l55uyinNeg==}
     peerDependencies:
@@ -6452,6 +6518,10 @@ packages:
     resolution: {integrity: sha512-5v6yZd4JK3eMI3FqqCouswVqwugaA9r4dNZB1wwcmrD02QkV5H0y7XBQW8QwQqEaZY1pM9aqORSORhJRdNK44Q==}
     engines: {node: '>=6.0'}
 
+  growl@1.10.5:
+    resolution: {integrity: sha512-qBr4OuELkhPenW6goKVXiv47US3clb3/IbuWF9KNKEijAy9oeHxU9IgzjvJhHkUzhaj7rOUD7+YGWqUjLp5oSA==}
+    engines: {node: '>=4.x'}
+
   gtoken@7.1.0:
     resolution: {integrity: sha512-pCcEwRi+TKpMlxAQObHDQ56KawURgyAf6jtIY046fJ5tIv3zDe/LEIubckAO8fj6JnAxLdmWkUfNyulQ2iKdEw==}
     engines: {node: '>=14.0.0'}
@@ -6542,6 +6612,10 @@ packages:
   hastscript@9.0.1:
     resolution: {integrity: sha512-g7df9rMFX/SPi34tyGCyUBREQoKkapwdY/T04Qn9TDWfHhAYt4/I0gMVirzK5wEzeUqIjEB+LXC/ypb7Aqno5w==}
 
+  he@1.1.1:
+    resolution: {integrity: sha512-z/GDPjlRMNOa2XJiB4em8wJpuuBfrFOlYKTZxtpkdr1uPdibHI8rYA3MY0KDObpVyaes0e/aunid/t88ZI2EKA==}
+    hasBin: true
+
   he@1.2.0:
     resolution: {integrity: sha512-F/1DnUGPopORZi0ni+CvrCgHQ5FyEAHRLSApuYWMmrbSwoN2Mn/7k+Gl38gJnR7yyDZk6WLXwiGod1JOWNDKGw==}
     hasBin: true
@@ -6584,10 +6658,26 @@ packages:
     resolution: {integrity: sha512-FtwrG/euBzaEjYeRqOgly7G0qviiXoJWnvEH2Z1plBdXgbyjv34pHTSb9zoeHMyDy33+DWy5Wt9Wo+TURtOYSQ==}
     engines: {node: '>= 0.8'}
 
+  http-proxy-agent@2.1.0:
+    resolution: {integrity: sha512-qwHbBLV7WviBl0rQsOzH6o5lwyOIvwp/BdFnvVxXORldu5TmjFfjzBcWUWS5kWAZhmv+JtiDhSuQCp4sBfbIgg==}
+    engines: {node: '>= 4.5.0'}
+
+  http-proxy-agent@4.0.1:
+    resolution: {integrity: sha512-k0zdNgqWTGA6aeIRVpvfVob4fL52dTfaehylg0Y4UvSySvOq/Y+BOyPrgpUrA7HylqvU8vIZGsRuXmspskV0Tg==}
+    engines: {node: '>= 6'}
+
   http-proxy-agent@7.0.2:
     resolution: {integrity: sha512-T1gkAiYYDWYx3V5Bmyu7HcfcvL7mUrTWiM6yOfa3PIphViJ/gFPbvidQ+veqSOHci/PxBcDabeUNCzpOODJZig==}
     engines: {node: '>= 14'}
 
+  https-proxy-agent@2.2.4:
+    resolution: {integrity: sha512-OmvfoQ53WLjtA9HeYP9RNrWMJzzAz1JGaSFr1nijg0PVR1JaD/xbJq1mdEIIlxGpXp9eSe/O2LgU9DJmTPd0Eg==}
+    engines: {node: '>= 4.5.0'}
+
+  https-proxy-agent@5.0.1:
+    resolution: {integrity: sha512-dFcAjpTQFgoLMzC2VwU+C/CbS7uRL0lWmxDITmqm7C+7F0Odmj6s9l6alZc6AELXhrnggM2CeWSXHGOdX2YtwA==}
+    engines: {node: '>= 6'}
+
   https-proxy-agent@7.0.6:
     resolution: {integrity: sha512-vK9P5/iUfdl95AI+JVyUuIcVtd4ofvtrOr3HNtM2yxC9bnMbEdp3x01OhQNnjb8IJYi38VlTE3mBXwcfvywuSw==}
     engines: {node: '>= 14'}
@@ -7757,6 +7847,9 @@ packages:
     resolution: {integrity: sha512-enIvLvRAFZYXJzkCYG5RKmPfrFArdLv+R+lbQ53BmIMLIry74bjKzX6iHAm8WYamJkhSSEabrWN5D97XnKObjQ==}
     engines: {node: 20 || >=22}
 
+  minimatch@3.0.4:
+    resolution: {integrity: sha512-yJHVQEhyqPLUTgt9B83PXu6W3rx4MvvHvSUvToogpwoGDOUQ+yDrR0HRot+yOCdCO7u4hX3pWft6kWBBcqh0UA==}
+
   minimatch@3.1.2:
     resolution: {integrity: sha512-J7p63hRiAjw1NDEww1W7i37+ByIrOWO5XQQAzZ3VOcL0PNybwpfmV/N05zFAzwQ9USyEcX6t3UO+K5aqBQOIHw==}
 
@@ -7768,6 +7861,9 @@ packages:
     resolution: {integrity: sha512-G6T0ZX48xgozx7587koeX9Ys2NYy6Gmv//P89sEte9V9whIapMNF4idKxnW2QtCcLiTWlb/wfCabAtAFWhhBow==}
     engines: {node: '>=16 || 14 >=14.17'}
 
+  minimist@0.0.8:
+    resolution: {integrity: sha512-miQKw5Hv4NS1Psg2517mV4e4dYNaO3++hjAvLOAzKqZ61rH8NS1SK+vbfBWZ5PY/Me/bEWhUwqMghEW5Fb9T7Q==}
+
   minimist@1.2.8:
     resolution: {integrity: sha512-2yyAR8qBkN3YuheJanUpWC5U3bb5osDywNB8RzDVlDwDHbocAJveqqj1u8+SVD7jkWT4yvsHCpWqqWqAxb0zCA==}
 
@@ -7785,6 +7881,11 @@ packages:
   mkdirp-classic@0.5.3:
     resolution: {integrity: sha512-gKLcREMhtuZRwRAfqP3RFW+TK4JqApVBtOIftVgjuABpAtpxhPGaDcfvbhNvD0B8iD1oUr/txX35NjcaY6Ns/A==}
 
+  mkdirp@0.5.1:
+    resolution: {integrity: sha512-SknJC52obPfGQPnjIkXbmA6+5H15E+fR+E4iR2oQ3zzCLbd7/ONua69R/Gw7AgkTLsRG+r5fzksYwWe1AgTyWA==}
+    deprecated: Legacy versions of mkdirp are no longer supported. Please update to mkdirp 1.x. (Note that the API surface has changed to use Promises in 1.x.)
+    hasBin: true
+
   mkdirp@0.5.6:
     resolution: {integrity: sha512-FP+p8RB8OWpF3YZBCrP5gtADmtXApB5AMLn+vdyA+PyxCjrCs00mjyUozssO33cwDeT3wNGdLxJ5M//YqtHAJw==}
     hasBin: true
@@ -7807,6 +7908,11 @@ packages:
     engines: {node: ^18.18.0 || ^20.9.0 || >=21.1.0}
     hasBin: true
 
+  mocha@5.2.0:
+    resolution: {integrity: sha512-2IUgKDhc3J7Uug+FxMXuqIyYzH7gJjXECKe/w43IGgQHTSj3InJi+yAA7T24L9bQMRKiUEHxEX37G5JpVUGLcQ==}
+    engines: {node: '>= 4.0.0'}
+    hasBin: true
+
   monaco-vscode-textmate-theme-converter@0.1.7:
     resolution: {integrity: sha512-ZMsq1RPWwOD3pvXD0n+9ddnhfzZoiUMwNIWPNUqYqEiQeH2HjyZ9KYOdt/pqe0kkN8WnYWLrxT9C/SrtIsAu2Q==}
     hasBin: true
@@ -7827,6 +7933,9 @@ packages:
     resolution: {integrity: sha512-Y3wQdFg2Va6etvQ5I82yUhGdsKrcYox6p7FfL1LbK2J4V01F9TGlepTIhnK24t7koZibmg82KGglhA1XK5IsLQ==}
     engines: {node: '>=10'}
 
+  ms@2.0.0:
+    resolution: {integrity: sha512-Tpp60P6IUJDTuOq/5Z8cdskzJujfwqfOTkrwIwj7IRISpnkJnT6SyJ4PCPnGMoFjC9ddhal5KVIYtAt97ix05A==}
+
   ms@2.1.3:
     resolution: {integrity: sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==}
 
@@ -9393,6 +9502,10 @@ packages:
     engines: {node: '>=16 || 14 >=14.17'}
     hasBin: true
 
+  supports-color@5.4.0:
+    resolution: {integrity: sha512-zjaXglF5nnWpsq470jSv6P9DwPvgLkuapYmfDm3JWOm0vkNTVF2tI4UrN2r6jH1qM/uc/WtxYY1hYoA2dOKj5w==}
+    engines: {node: '>=4'}
+
   supports-color@5.5.0:
     resolution: {integrity: sha512-QjVjwdXIt408MIiAqCX4oUKsgU2EqAGzs2Ppkm4aQYbjm+ZEWEcW4SfFNTr4uMNZma0ey4f5lgLrkB0aX0QMow==}
     engines: {node: '>=4'}
@@ -10086,9 +10199,20 @@ packages:
   vscode-material-icons@0.1.1:
     resolution: {integrity: sha512-GsoEEF8Tbb0yUFQ6N6FPvh11kFkL9F95x0FkKlbbfRQN9eFms67h+L3t6b9cUv58dSn2gu8kEhNfoESVCrz4ag==}
 
+  vscode-test@0.4.3:
+    resolution: {integrity: sha512-EkMGqBSefZH2MgW65nY05rdRSko15uvzq4VAPM5jVmwYuFQKE7eikKXNJDRxL+OITXHB6pI+a3XqqD32Y3KC5w==}
+    engines: {node: '>=8.9.3'}
+    deprecated: This package has been renamed to @vscode/test-electron, please update to the new name
+
   vscode-uri@3.0.8:
     resolution: {integrity: sha512-AyFQ0EVmsOZOlAnxoFOGOq1SQDWAB7C6aqMGS23svWAllfOaxbuFvcT8D1i8z3Gyn8fraVeZNNmN6e9bxxXkKw==}
 
+  vscode@1.1.37:
+    resolution: {integrity: sha512-vJNj6IlN7IJPdMavlQa1KoFB3Ihn06q1AiN3ZFI/HfzPNzbKZWPPuiU+XkpNOfGU5k15m4r80nxNPlM7wcc0wg==}
+    engines: {node: '>=8.9.3'}
+    deprecated: 'This package is deprecated in favor of @types/vscode and vscode-test. For more information please read: https://code.visualstudio.com/updates/v1_36#_splitting-vscode-package-into-typesvscode-and-vscodetest'
+    hasBin: true
+
   vscrui@0.2.2:
     resolution: {integrity: sha512-buw2OipqUl7GCBq1mxcAjUwoUsslGzVhdaxDPmEx27xzc3QAJJZHtT30QbakgZVJ1Jb3E6kcsguUIFEGxrgkyQ==}
     peerDependencies:
@@ -11180,7 +11304,7 @@ snapshots:
       '@babel/parser': 7.27.2
       '@babel/template': 7.27.2
       '@babel/types': 7.27.1
-      debug: 4.4.1(supports-color@8.1.1)
+      debug: 4.4.3
       globals: 11.12.0
     transitivePeerDependencies:
       - supports-color
@@ -13658,6 +13782,8 @@ snapshots:
     dependencies:
       '@testing-library/dom': 10.4.0
 
+  '@tootallnate/once@1.1.2': {}
+
   '@tootallnate/quickjs-emscripten@0.23.0': {}
 
   '@tybys/wasm-util@0.9.0':
@@ -14043,7 +14169,7 @@ snapshots:
     dependencies:
       '@typescript-eslint/typescript-estree': 8.32.1(typescript@5.8.3)
       '@typescript-eslint/utils': 8.32.1(eslint@9.27.0(jiti@2.4.2))(typescript@5.8.3)
-      debug: 4.4.1(supports-color@8.1.1)
+      debug: 4.4.3
       eslint: 9.27.0(jiti@2.4.2)
       ts-api-utils: 2.1.0(typescript@5.8.3)
       typescript: 5.8.3
@@ -14056,7 +14182,7 @@ snapshots:
     dependencies:
       '@typescript-eslint/types': 8.32.1
       '@typescript-eslint/visitor-keys': 8.32.1
-      debug: 4.4.1(supports-color@8.1.1)
+      debug: 4.4.3
       fast-glob: 3.3.3
       is-glob: 4.0.3
       minimatch: 9.0.5
@@ -14306,6 +14432,16 @@ snapshots:
 
   acorn@8.15.0: {}
 
+  agent-base@4.3.0:
+    dependencies:
+      es6-promisify: 5.0.0
+
+  agent-base@6.0.2:
+    dependencies:
+      debug: 4.4.3
+    transitivePeerDependencies:
+      - supports-color
+
   agent-base@7.1.3: {}
 
   agentkeepalive@4.6.0:
@@ -14607,7 +14743,7 @@ snapshots:
     dependencies:
       bytes: 3.1.2
       content-type: 1.0.5
-      debug: 4.4.1(supports-color@8.1.1)
+      debug: 4.4.3
       http-errors: 2.0.0
       iconv-lite: 0.6.3
       on-finished: 2.4.1
@@ -14953,6 +15089,8 @@ snapshots:
 
   commander@14.0.0: {}
 
+  commander@2.15.1: {}
+
   commander@4.1.1: {}
 
   commander@6.2.1: {}
@@ -15312,6 +15450,12 @@ snapshots:
 
   debounce@2.2.0: {}
 
+  debug@3.1.0(supports-color@5.4.0):
+    dependencies:
+      ms: 2.0.0
+    optionalDependencies:
+      supports-color: 5.4.0
+
   debug@3.2.7:
     dependencies:
       ms: 2.1.3
@@ -15420,6 +15564,8 @@ snapshots:
 
   diff-sequences@29.6.3: {}
 
+  diff@3.5.0: {}
+
   diff@5.2.0: {}
 
   dijkstrajs@1.0.3: {}
@@ -15698,6 +15844,12 @@ snapshots:
       is-date-object: 1.1.0
       is-symbol: 1.1.1
 
+  es6-promise@4.2.8: {}
+
+  es6-promisify@5.0.0:
+    dependencies:
+      es6-promise: 4.2.8
+
   esbuild-register@3.6.0(esbuild@0.25.9):
     dependencies:
       debug: 4.4.1(supports-color@8.1.1)
@@ -16086,7 +16238,7 @@ snapshots:
 
   extract-zip@2.0.1:
     dependencies:
-      debug: 4.4.1(supports-color@8.1.1)
+      debug: 4.4.3
       get-stream: 5.2.0
       yauzl: 2.10.0
     optionalDependencies:
@@ -16187,7 +16339,7 @@ snapshots:
 
   finalhandler@2.1.0:
     dependencies:
-      debug: 4.4.1(supports-color@8.1.1)
+      debug: 4.4.3
       encodeurl: 2.0.0
       escape-html: 1.0.3
       on-finished: 2.4.1
@@ -16435,7 +16587,7 @@ snapshots:
     dependencies:
       basic-ftp: 5.0.5
       data-uri-to-buffer: 6.0.2
-      debug: 4.4.1(supports-color@8.1.1)
+      debug: 4.4.3
     transitivePeerDependencies:
       - supports-color
 
@@ -16524,6 +16676,8 @@ snapshots:
       section-matter: 1.0.0
       strip-bom-string: 1.0.0
 
+  growl@1.10.5: {}
+
   gtoken@7.1.0:
     dependencies:
       gaxios: 6.7.1
@@ -16699,6 +16853,8 @@ snapshots:
       property-information: 7.1.0
       space-separated-tokens: 2.0.2
 
+  he@1.1.1: {}
+
   he@1.2.0: {}
 
   hex-rgb@4.3.0: {}
@@ -16740,6 +16896,21 @@ snapshots:
       statuses: 2.0.1
       toidentifier: 1.0.1
 
+  http-proxy-agent@2.1.0:
+    dependencies:
+      agent-base: 4.3.0
+      debug: 3.1.0(supports-color@5.4.0)
+    transitivePeerDependencies:
+      - supports-color
+
+  http-proxy-agent@4.0.1:
+    dependencies:
+      '@tootallnate/once': 1.1.2
+      agent-base: 6.0.2
+      debug: 4.4.3
+    transitivePeerDependencies:
+      - supports-color
+
   http-proxy-agent@7.0.2:
     dependencies:
       agent-base: 7.1.3
@@ -16747,6 +16918,20 @@ snapshots:
     transitivePeerDependencies:
       - supports-color
 
+  https-proxy-agent@2.2.4:
+    dependencies:
+      agent-base: 4.3.0
+      debug: 3.2.7
+    transitivePeerDependencies:
+      - supports-color
+
+  https-proxy-agent@5.0.1:
+    dependencies:
+      agent-base: 6.0.2
+      debug: 4.4.3
+    transitivePeerDependencies:
+      - supports-color
+
   https-proxy-agent@7.0.6:
     dependencies:
       agent-base: 7.1.3
@@ -17229,7 +17414,7 @@ snapshots:
       lodash.isstring: 4.0.1
       lodash.once: 4.1.1
       ms: 2.1.3
-      semver: 7.7.2
+      semver: 7.7.3
 
   jsx-ast-utils@3.3.5:
     dependencies:
@@ -18069,7 +18254,7 @@ snapshots:
 
   micromark@2.11.4:
     dependencies:
-      debug: 4.4.1(supports-color@8.1.1)
+      debug: 4.4.3
       parse-entities: 2.0.0
     transitivePeerDependencies:
       - supports-color
@@ -18134,6 +18319,10 @@ snapshots:
     dependencies:
       '@isaacs/brace-expansion': 5.0.0
 
+  minimatch@3.0.4:
+    dependencies:
+      brace-expansion: 2.0.2
+
   minimatch@3.1.2:
     dependencies:
       brace-expansion: 2.0.2
@@ -18146,6 +18335,8 @@ snapshots:
     dependencies:
       brace-expansion: 2.0.2
 
+  minimist@0.0.8: {}
+
   minimist@1.2.8: {}
 
   minipass@7.1.2: {}
@@ -18159,6 +18350,10 @@ snapshots:
   mkdirp-classic@0.5.3:
     optional: true
 
+  mkdirp@0.5.1:
+    dependencies:
+      minimist: 0.0.8
+
   mkdirp@0.5.6:
     dependencies:
       minimist: 1.2.8
@@ -18197,6 +18392,20 @@ snapshots:
       yargs-parser: 21.1.1
       yargs-unparser: 2.0.0
 
+  mocha@5.2.0:
+    dependencies:
+      browser-stdout: 1.3.1
+      commander: 2.15.1
+      debug: 3.1.0(supports-color@5.4.0)
+      diff: 3.5.0
+      escape-string-regexp: 1.0.5
+      glob: 11.1.0
+      growl: 1.10.5
+      he: 1.1.1
+      minimatch: 3.0.4
+      mkdirp: 0.5.1
+      supports-color: 5.4.0
+
   monaco-vscode-textmate-theme-converter@0.1.7(tslib@2.8.1):
     dependencies:
       commander: 8.3.0
@@ -18213,6 +18422,8 @@ snapshots:
 
   mrmime@2.0.1: {}
 
+  ms@2.0.0: {}
+
   ms@2.1.3: {}
 
   mute-stream@0.0.8: {}
@@ -18295,7 +18506,7 @@ snapshots:
 
   node-abi@3.75.0:
     dependencies:
-      semver: 7.7.2
+      semver: 7.7.3
     optional: true
 
   node-addon-api@4.3.0:
@@ -18578,7 +18789,7 @@ snapshots:
     dependencies:
       '@tootallnate/quickjs-emscripten': 0.23.0
       agent-base: 7.1.3
-      debug: 4.4.1(supports-color@8.1.1)
+      debug: 4.4.3
       get-uri: 6.0.4
       http-proxy-agent: 7.0.2
       https-proxy-agent: 7.0.6
@@ -18896,7 +19107,7 @@ snapshots:
   proxy-agent@6.5.0:
     dependencies:
       agent-base: 7.1.3
-      debug: 4.4.1(supports-color@8.1.1)
+      debug: 4.4.3
       http-proxy-agent: 7.0.2
       https-proxy-agent: 7.0.6
       lru-cache: 7.18.3
@@ -19445,7 +19656,7 @@ snapshots:
 
   router@2.2.0:
     dependencies:
-      debug: 4.4.1(supports-color@8.1.1)
+      debug: 4.4.3
       depd: 2.0.0
       is-promise: 4.0.0
       parseurl: 1.3.3
@@ -19551,7 +19762,7 @@ snapshots:
 
   send@1.2.0:
     dependencies:
-      debug: 4.4.1(supports-color@8.1.1)
+      debug: 4.4.3
       encodeurl: 2.0.0
       escape-html: 1.0.3
       etag: 1.8.1
@@ -19768,7 +19979,7 @@ snapshots:
   socks-proxy-agent@8.0.5:
     dependencies:
       agent-base: 7.1.3
-      debug: 4.4.1(supports-color@8.1.1)
+      debug: 4.4.3
       socks: 2.8.4
     transitivePeerDependencies:
       - supports-color
@@ -20044,6 +20255,10 @@ snapshots:
       pirates: 4.0.7
       ts-interface-checker: 0.1.13
 
+  supports-color@5.4.0:
+    dependencies:
+      has-flag: 3.0.0
+
   supports-color@5.5.0:
     dependencies:
       has-flag: 3.0.0
@@ -20971,8 +21186,27 @@ snapshots:
 
   vscode-material-icons@0.1.1: {}
 
+  vscode-test@0.4.3:
+    dependencies:
+      http-proxy-agent: 2.1.0
+      https-proxy-agent: 2.2.4
+    transitivePeerDependencies:
+      - supports-color
+
   vscode-uri@3.0.8: {}
 
+  vscode@1.1.37:
+    dependencies:
+      glob: 11.1.0
+      http-proxy-agent: 4.0.1
+      https-proxy-agent: 5.0.1
+      mocha: 5.2.0
+      semver: 5.7.2
+      source-map-support: 0.5.21
+      vscode-test: 0.4.3
+    transitivePeerDependencies:
+      - supports-color
+
   vscrui@0.2.2(@types/react@18.3.23)(react@18.3.1):
     dependencies:
       '@types/react': 18.3.23

From c4ca6f745188f8525faa2c1a827e6ae86a0044d5 Mon Sep 17 00:00:00 2001
From: Dennise Bartlett <bartlett.dc.1@gmail.com>
Date: Wed, 24 Dec 2025 10:26:13 -0800
Subject: [PATCH 2/3] fix: Fix lock-file

---
 pnpm-lock.yaml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml
index 72979fc891e..b7f803fb0e4 100644
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@@ -144,6 +144,9 @@ importers:
       '@vscode/test-electron':
         specifier: ^2.4.0
         version: 2.5.2
+      rimraf:
+        specifier: ^6.0.1
+        version: 6.0.1
       typescript:
         specifier: 5.8.3
         version: 5.8.3

From 032d4a5300e855a9f3c45e1726c532a03d623525 Mon Sep 17 00:00:00 2001
From: Dennise Bartlett <bartlett.dc.1@gmail.com>
Date: Wed, 24 Dec 2025 11:13:44 -0800
Subject: [PATCH 3/3] chore: Add precheck-types script to build before checking
 types

---
 package.json | 1 +
 1 file changed, 1 insertion(+)

diff --git a/package.json b/package.json
index 0f3c3b7ba04..b00ecf3f81a 100644
--- a/package.json
+++ b/package.json
@@ -10,6 +10,7 @@
 		"install": "node scripts/bootstrap.mjs",
 		"install:all": "node scripts/bootstrap.mjs",
 		"lint": "turbo lint --log-order grouped --output-logs new-only",
+		"precheck-types": "pnpm build",
 		"check-types": "turbo check-types --log-order grouped --output-logs new-only",
 		"test": "turbo test --log-order grouped --output-logs new-only",
 		"format": "turbo format --log-order grouped --output-logs new-only",