diff --git a/.gitignore b/.gitignore
index 54cf66cee7..74d3c4384e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -51,4 +51,6 @@ logs
qdrant_storage/
# Architect plans
-plans/
\ No newline at end of file
+plans/
+
+.results
\ No newline at end of file
diff --git a/apps/vscode-evals/.env.local.sample b/apps/vscode-evals/.env.local.sample
new file mode 100644
index 0000000000..40c9da1bb4
--- /dev/null
+++ b/apps/vscode-evals/.env.local.sample
@@ -0,0 +1 @@
+OPENROUTER_API_KEY=sk-or-v1-...
diff --git a/apps/vscode-evals/.vscode-test.mjs b/apps/vscode-evals/.vscode-test.mjs
new file mode 100644
index 0000000000..c83f12c4bb
--- /dev/null
+++ b/apps/vscode-evals/.vscode-test.mjs
@@ -0,0 +1,16 @@
+/**
+ * See: https://code.visualstudio.com/api/working-with-extensions/testing-extension
+ */
+
+import { defineConfig } from "@vscode/test-cli"
+
+export default defineConfig({
+ label: "integrationTest",
+ files: "out/suite/**/*.test.js",
+ workspaceFolder: ".",
+ mocha: {
+ ui: "tdd",
+ timeout: 60000,
+ },
+ launchArgs: ["--enable-proposed-api=RooVeterinaryInc.roo-cline", "--disable-extensions"],
+})
diff --git a/apps/vscode-evals/eslint.config.mjs b/apps/vscode-evals/eslint.config.mjs
new file mode 100644
index 0000000000..694bf73664
--- /dev/null
+++ b/apps/vscode-evals/eslint.config.mjs
@@ -0,0 +1,4 @@
+import { config } from "@roo-code/config-eslint/base"
+
+/** @type {import("eslint").Linter.Config} */
+export default [...config]
diff --git a/apps/vscode-evals/package.json b/apps/vscode-evals/package.json
new file mode 100644
index 0000000000..0f788d5fe4
--- /dev/null
+++ b/apps/vscode-evals/package.json
@@ -0,0 +1,26 @@
+{
+ "name": "@roo-code/vscode-evals",
+ "private": true,
+ "scripts": {
+ "lint": "eslint src --ext=ts --max-warnings=0",
+ "check-types": "tsc -p tsconfig.esm.json --noEmit",
+ "format": "prettier --write src",
+ "test:ci": "pnpm -w bundle && pnpm --filter @roo-code/vscode-webview build && pnpm test:run",
+ "test:run": "rimraf out && tsc -p tsconfig.json && npx dotenvx run -f .env.local -- node ./out/runTest.js",
+ "clean": "rimraf out .turbo"
+ },
+ "devDependencies": {
+ "@roo-code/evally": "workspace:^",
+ "@roo-code/config-eslint": "workspace:^",
+ "@roo-code/config-typescript": "workspace:^",
+ "@roo-code/types": "workspace:^",
+ "@types/vscode": "^1.95.0",
+ "@vscode/test-cli": "^0.0.11",
+ "@vscode/test-electron": "^2.4.0",
+ "rimraf": "^6.0.1",
+ "typescript": "5.8.3"
+ },
+ "dependencies": {
+ "vscode": "^1.1.37"
+ }
+}
diff --git a/apps/vscode-evals/src/runTest.ts b/apps/vscode-evals/src/runTest.ts
new file mode 100644
index 0000000000..82394f24ab
--- /dev/null
+++ b/apps/vscode-evals/src/runTest.ts
@@ -0,0 +1,33 @@
+import * as path from "path"
+import * as os from "os"
+import * as fs from "fs/promises"
+
+import { runTests } from "@vscode/test-electron"
+
+async function main() {
+ try {
+ const extensionDevelopmentPath = path.resolve(__dirname, "../../../src")
+ const extensionTestsPath = path.resolve(__dirname, "./suite/index")
+ const testWorkspace = await fs.mkdtemp(path.join(os.tmpdir(), "roo-evals-workspace-"))
+ const testGrep = process.argv.find((arg, i) => process.argv[i - 1] === "--grep") || process.env.TEST_GREP
+ const testFile = process.argv.find((arg, i) => process.argv[i - 1] === "--file") || process.env.TEST_FILE
+ const extensionTestsEnv = {
+ ...process.env,
+ ...(testGrep && { TEST_GREP: testGrep }),
+ ...(testFile && { TEST_FILE: testFile }),
+ }
+ await runTests({
+ extensionDevelopmentPath,
+ extensionTestsPath,
+ launchArgs: [testWorkspace],
+ extensionTestsEnv,
+ version: process.env.VSCODE_VERSION || "1.101.2",
+ })
+ await fs.rm(testWorkspace, { recursive: true, force: true })
+ } catch (error) {
+ console.error("Failed to run vscode evals", error)
+ process.exit(1)
+ }
+}
+
+main()
diff --git a/apps/vscode-evals/src/suite/applyDiff.matrix.test.ts b/apps/vscode-evals/src/suite/applyDiff.matrix.test.ts
new file mode 100644
index 0000000000..810f6ade8d
--- /dev/null
+++ b/apps/vscode-evals/src/suite/applyDiff.matrix.test.ts
@@ -0,0 +1,984 @@
+import { afterAll, afterEach, beforeAll, beforeEach, defineMatrix, describe, it } from "@roo-code/evally"
+import { RooCodeEventName, type RooCodeAPI, type ClineMessage } from "@roo-code/types"
+import { strict as assert } from "assert"
+import { waitFor, sleep } from "./utils"
+
+import * as fs from "fs/promises"
+import * as path from "path"
+interface NativeProtocolVerification {
+ hasNativeApiProtocol: boolean
+ apiProtocol: string | null
+ responseIsNotXML: boolean
+ toolWasExecuted: boolean
+ executedToolName: string | null
+}
+
+function createVerificationState(): NativeProtocolVerification {
+ return {
+ hasNativeApiProtocol: false,
+ apiProtocol: null,
+ responseIsNotXML: true,
+ toolWasExecuted: false,
+ executedToolName: null,
+ }
+}
+
+function assertNativeProtocolUsed(verification: NativeProtocolVerification, testName: string): void {
+ assert.ok(
+ verification.apiProtocol !== null,
+ `[${testName}] apiProtocol should be set in api_req_started message. This indicates an API request was made.`,
+ )
+
+ assert.strictEqual(
+ verification.hasNativeApiProtocol,
+ true,
+ `[${testName}] Native API protocol should be used. Expected apiProtocol to be "anthropic" or "openai", but got: ${verification.apiProtocol}`,
+ )
+
+ assert.strictEqual(
+ verification.responseIsNotXML,
+ true,
+ `[${testName}] Response should NOT contain XML tool tags. Found XML tags which indicates XML protocol was used instead of native.`,
+ )
+
+ assert.strictEqual(
+ verification.toolWasExecuted,
+ true,
+ `[${testName}] Tool should have been executed. Executed tool: ${verification.executedToolName || "none"}`,
+ )
+}
+
+function createNativeVerificationHandler(
+ verification: NativeProtocolVerification,
+ messages: ClineMessage[],
+ options: {
+ onError?: (error: string) => void
+ onApplyDiffExecuted?: () => void
+ debugLogging?: boolean
+ } = {},
+): (event: { message: ClineMessage }) => void {
+ const { onError, onApplyDiffExecuted, debugLogging = true } = options
+
+ return ({ message }: { message: ClineMessage }) => {
+ messages.push(message)
+
+ if (debugLogging) {
+ console.log(`[DEBUG] Message: type=${message.type}, say=${message.say}, ask=${message.ask}`)
+ }
+
+ if (message.type === "say" && message.say === "error") {
+ const errorText = (message.text as string | undefined) || "Unknown error"
+ console.error("[ERROR]:", errorText)
+ onError?.(errorText)
+ }
+
+ if (message.type === "ask" && message.ask === "tool") {
+ if (debugLogging && typeof message.text === "string") {
+ console.log("[DEBUG] Tool callback:", message.text.substring(0, 300))
+ }
+
+ try {
+ const toolData = JSON.parse((message.text as string) || "{}")
+
+ if (toolData.tool) {
+ verification.toolWasExecuted = true
+ verification.executedToolName = toolData.tool
+ console.log(`[VERIFIED] Tool executed: ${toolData.tool}`)
+ }
+
+ if (toolData.tool === "appliedDiff" || toolData.tool === "apply_diff") {
+ console.log("[TOOL] apply_diff tool executed")
+ onApplyDiffExecuted?.()
+ }
+ } catch {
+ if (debugLogging && typeof message.text === "string") {
+ console.log("[DEBUG] Tool callback not JSON:", message.text.substring(0, 100))
+ }
+ }
+ }
+
+ if (message.type === "say" && message.say === "api_req_started" && typeof message.text === "string") {
+ const rawText = message.text
+ if (debugLogging) {
+ console.log("[DEBUG] API request started:", rawText.substring(0, 200))
+ }
+
+ if (rawText.includes("apply_diff") || rawText.includes("appliedDiff")) {
+ verification.toolWasExecuted = true
+ verification.executedToolName = verification.executedToolName || "apply_diff"
+ console.log("[VERIFIED] Tool executed via raw text check: apply_diff")
+ onApplyDiffExecuted?.()
+ }
+
+ try {
+ const requestData = JSON.parse(rawText)
+
+ if (requestData.apiProtocol) {
+ verification.apiProtocol = requestData.apiProtocol
+ if (requestData.apiProtocol === "anthropic" || requestData.apiProtocol === "openai") {
+ verification.hasNativeApiProtocol = true
+ console.log(`[VERIFIED] API Protocol: ${requestData.apiProtocol}`)
+ }
+ }
+
+ if (
+ requestData.request &&
+ (requestData.request.includes("apply_diff") || requestData.request.includes("appliedDiff"))
+ ) {
+ verification.toolWasExecuted = true
+ verification.executedToolName = "apply_diff"
+ console.log("[VERIFIED] Tool executed via parsed request: apply_diff")
+ onApplyDiffExecuted?.()
+ }
+ } catch (e) {
+ console.log("[DEBUG] Failed to parse api_req_started message:", e)
+ }
+ }
+
+ if (message.type === "say" && message.say === "text" && typeof message.text === "string") {
+ const hasXMLToolTags =
+ message.text.includes("") ||
+ message.text.includes("") ||
+ message.text.includes("") ||
+ message.text.includes("")
+
+ if (hasXMLToolTags) {
+ verification.responseIsNotXML = false
+ console.log("[WARNING] Found XML tool tags in response - this indicates XML protocol")
+ }
+ }
+
+ if (message.type === "say" && message.say === "completion_result" && typeof message.text === "string") {
+ if (debugLogging) {
+ console.log("[DEBUG] AI completion:", message.text.substring(0, 200))
+ }
+ }
+ }
+}
+
+const evalGroupVariables: { openRouterModelId: string }[] = [{ openRouterModelId: "openai/gpt-5.1" }]
+
+const now = Date.now()
+const testFiles: Record<
+ string,
+ {
+ path: string
+ name: string
+ content: string
+ }
+> = {
+ simpleModify: {
+ name: `test-file-simple-native-${now}.txt`,
+ content: "Hello World\nThis is a test file\nWith multiple lines",
+ path: ``,
+ },
+ multipleReplace: {
+ name: `test-func-multiple-native-${now}.js`,
+ content: `function calculate(x, y) {\n const sum = x + y\n const product = x * y\n return { sum: sum, product: product }\n}`,
+ path: ``,
+ },
+ lineNumbers: {
+ name: `test-lines-native-${now}.js`,
+ content: `// Header comment\nfunction oldFunction() {\n console.log("Old implementation")\n}\n\n// Another function\nfunction keepThis() {\n console.log("Keep this")\n}\n\n// Footer comment`,
+ path: ``,
+ },
+ errorHandling: {
+ name: `test-error-native-${now}.txt`,
+ content: "Original content",
+ path: ``,
+ },
+ multiSearchReplace: {
+ name: `test-multi-search-native-${now}.js`,
+ content: `function processData(data) {\n console.log("Processing data")\n return data.map(item => item * 2)\n}\n\n// Some other code in between\nconst config = {\n timeout: 5000,\n retries: 3\n}\n\nfunction validateInput(input) {\n console.log("Validating input")\n if (!input) {\n throw new Error("Invalid input")\n }\n return true\n}`,
+ path: ``,
+ },
+}
+
+function getTestWorkspaceDir(): string {
+ const fromGlobal = (globalThis as { rooTestWorkspaceDir?: string }).rooTestWorkspaceDir
+ if (typeof fromGlobal === "string" && fromGlobal.length > 0) {
+ return fromGlobal
+ }
+ return process.cwd()
+}
+
+async function createTestFile(file: { name: string; content: string }): Promise {
+ const tmpPath = path.join(getTestWorkspaceDir(), file.name)
+ await fs.writeFile(tmpPath, file.content)
+ return tmpPath
+}
+async function resetTestFile(file: { name: string; content: string }): Promise {
+ const tmpPath = path.join(getTestWorkspaceDir(), file.name)
+ await fs.writeFile(tmpPath, file.content)
+ return tmpPath
+}
+async function removeTestFile(file: { name: string }): Promise {
+ const tmpPath = path.join(getTestWorkspaceDir(), file.name)
+ try {
+ await fs.unlink(tmpPath)
+ } catch {
+ void 0
+ }
+}
+
+export default defineMatrix({
+ variables: evalGroupVariables,
+ iterations: 10,
+ tests: function () {
+ describe("Apply_diff Tool (Native Tool Calling)", function () {
+ let workspaceDir: string
+ beforeAll(async () => {
+ console.log("beforeAll Executed")
+ workspaceDir = getTestWorkspaceDir()
+ console.log("[INFO] Using workspace directory:", workspaceDir)
+
+ console.log("Creating test files in workspace...")
+ for (const [key, file] of Object.entries(testFiles)) {
+ file.path = path.join(workspaceDir, file.name)
+ await fs.writeFile(file.path, file.content)
+ console.log(`Created ${key} test file at:`, file.path)
+ }
+
+ for (const [key, file] of Object.entries(testFiles)) {
+ const exists = await fs
+ .access(file.path)
+ .then(() => true)
+ .catch(() => false)
+ if (!exists) {
+ throw new Error(`Failed to create ${key} test file at ${file.path}`)
+ }
+ }
+ })
+
+ beforeEach(async () => {
+ console.log("beforeEach Executed - resetting test files to original content")
+ await resetAllTestFiles()
+ })
+ afterEach(async () => {
+ console.log("afterEach Executed")
+ })
+ afterAll(async () => {
+ console.log("afterAll Executed")
+ })
+
+ it("should apply diff to modify file content and events (extension harness integrated)", async function ({
+ variable,
+ }) {
+ const api: RooCodeAPI | undefined = (globalThis as { api?: RooCodeAPI }).api as RooCodeAPI | undefined
+
+ if (!api) {
+ console.warn(
+ "[applyDiff.matrix] globalThis.api is not set; not running inside VSCode extension host. Skipping test.",
+ )
+ return
+ }
+
+ const file = testFiles.simpleModify
+ if (!file) throw new Error("Missing test file definition")
+ const expectedContent = "Hello Universe\nThis is a test file\nWith multiple lines"
+ await createTestFile(file)
+
+ const messages: ClineMessage[] = []
+ let taskStarted = false
+ let taskCompleted = false
+ let errorOccurred: string | null = null
+ let applyDiffExecuted = false
+
+ const verification = createVerificationState()
+
+ let taskId: string = ""
+ const messageHandler = createNativeVerificationHandler(verification, messages, {
+ onError: (error) => {
+ errorOccurred = error
+ },
+ onApplyDiffExecuted: () => {
+ applyDiffExecuted = true
+ },
+ debugLogging: true,
+ })
+ const taskStartedHandler = (id: string) => {
+ if (id === taskId) taskStarted = true
+ }
+ const taskCompletedHandler = (id: string) => {
+ if (id === taskId) taskCompleted = true
+ }
+ api.on(RooCodeEventName.Message, messageHandler)
+ api.on(RooCodeEventName.TaskStarted, taskStartedHandler)
+ api.on(RooCodeEventName.TaskCompleted, taskCompletedHandler)
+
+ let verboseLog = ""
+ function logMsg(msg: string) {
+ verboseLog += msg + "\n"
+ }
+ try {
+ console.log(variable.openRouterModelId)
+ taskId = await api.startNewTask({
+ configuration: {
+ mode: "code",
+ autoApprovalEnabled: true,
+ alwaysAllowWrite: true,
+ alwaysAllowReadOnly: true,
+ alwaysAllowReadOnlyOutsideWorkspace: true,
+ toolProtocol: "native",
+ apiProvider: "openrouter",
+ openRouterModelId: variable.openRouterModelId,
+ },
+ text: `Use apply_diff on the file ${file.name} to change "Hello World" to "Hello Universe". The file already exists with this content:\n${file.content}\nAssume the file exists and you can modify it directly.`,
+ })
+ await waitFor(() => taskStarted, { timeout: 60000 })
+ if (errorOccurred) {
+ logMsg("Task failed early with error: " + errorOccurred)
+ throw createVerboseError("Early error: " + errorOccurred, verboseLog, messages)
+ }
+ await waitFor(() => taskCompleted, { timeout: 60000 })
+ await sleep(2000)
+ const actualContent = await fs.readFile(
+ file.path || path.join(getTestWorkspaceDir(), file.name),
+ "utf-8",
+ )
+ try {
+ assert.strictEqual(
+ actualContent.trim(),
+ expectedContent.trim(),
+ "File was not modified by extension and diff!",
+ )
+ } catch (e) {
+ logMsg("File content did not match expected output.")
+ logMsg("Expected:\n" + expectedContent)
+ logMsg("Actual:\n" + actualContent)
+ throw createVerboseError(e instanceof Error ? e.message : String(e), verboseLog, messages)
+ }
+
+ assertNativeProtocolUsed(verification, "simpleModify")
+
+ if (!applyDiffExecuted) {
+ logMsg("apply_diff tool was not executed!")
+ throw createVerboseError("apply_diff tool was not executed!", verboseLog, messages)
+ }
+ } catch (err) {
+ if (verboseLog || messages.length > 0) {
+ const lines = [
+ "",
+ "========== DEBUG LOG ==========",
+ verboseLog.trim(),
+ "---------- Message History ----------",
+ ...messages.map((m) => JSON.stringify(m)),
+ "=====================================",
+ ]
+ console.error(lines.filter(Boolean).join("\n"))
+ }
+ throw err
+ } finally {
+ api.off(RooCodeEventName.Message, messageHandler)
+ api.off(RooCodeEventName.TaskStarted, taskStartedHandler)
+ api.off(RooCodeEventName.TaskCompleted, taskCompletedHandler)
+ await removeTestFile(file)
+ }
+
+ function createVerboseError(text: string, log: string, msgArr: ClineMessage[]): Error {
+ let summary = `\n\n========== DEBUG LOG ==========`
+ if (log) summary += `\n${log.trim()}`
+ summary +=
+ `\n---------- Message History ----------\n` + msgArr.map((m) => JSON.stringify(m)).join("\n")
+ summary += `\n=====================================`
+ return new Error(text + summary)
+ }
+ })
+ it("Should apply multiple search/replace blocks in single diff using native tool calling", async function ({
+ variable,
+ }) {
+ const api = (globalThis as { api?: RooCodeAPI }).api as RooCodeAPI
+ const messages: ClineMessage[] = []
+ const testFile = testFiles.multipleReplace
+ if (!testFile) {
+ throw new Error("Missing test file definition: multipleReplace")
+ }
+ const expectedContent =
+ "function compute(a, b) {\n" +
+ " const total = a + b\n" +
+ " const result = a * b\n" +
+ " return { total: total, result: result }\n" +
+ "}"
+ let taskStarted = false
+ let taskCompleted = false
+ let applyDiffExecuted = false
+
+ const verification = createVerificationState()
+
+ const messageHandler = createNativeVerificationHandler(verification, messages, {
+ onApplyDiffExecuted: () => {
+ applyDiffExecuted = true
+ },
+ debugLogging: true,
+ })
+ api.on(RooCodeEventName.Message, messageHandler)
+
+ const taskStartedHandler = (id: string) => {
+ if (id === taskId) {
+ taskStarted = true
+ console.log("Task started:", id)
+ }
+ }
+ api.on(RooCodeEventName.TaskStarted, taskStartedHandler)
+
+ const taskCompletedHandler = (id: string) => {
+ if (id === taskId) {
+ taskCompleted = true
+ console.log("Task completed:", id)
+ }
+ }
+ api.on(RooCodeEventName.TaskCompleted, taskCompletedHandler)
+
+ let taskId: string
+ try {
+ taskId = await api.startNewTask({
+ configuration: {
+ mode: "code",
+ autoApprovalEnabled: true,
+ alwaysAllowWrite: true,
+ alwaysAllowReadOnly: true,
+ alwaysAllowReadOnlyOutsideWorkspace: true,
+ toolProtocol: "native",
+ apiProvider: "openrouter",
+ openRouterModelId: variable.openRouterModelId,
+ },
+ text: `Use apply_diff on the file ${testFile.name} to make ALL of these changes:
+1. Rename function "calculate" to "compute"
+2. Rename parameters "x, y" to "a, b"
+3. Rename variable "sum" to "total" (including in the return statement)
+4. Rename variable "product" to "result" (including in the return statement)
+5. In the return statement, change { sum: sum, product: product } to { total: total, result: result }
+
+The file already exists with this content:
+${testFile.content}
+
+Assume the file exists and you can modify it directly.`,
+ })
+
+ console.log("Task ID:", taskId)
+ console.log("Test filename:", testFile.name)
+
+ await waitFor(() => taskStarted, { timeout: 60_000 })
+
+ await waitFor(() => taskCompleted, { timeout: 60_000 })
+
+ await sleep(2000)
+
+ const actualContent = await fs.readFile(testFile.path, "utf-8")
+ console.log("File content after modification:", actualContent)
+
+ assertNativeProtocolUsed(verification, "multipleReplace")
+
+ assert.strictEqual(applyDiffExecuted, true, "apply_diff tool should have been executed")
+
+ assert.strictEqual(
+ actualContent.trim(),
+ expectedContent.trim(),
+ "All replacements should be applied correctly",
+ )
+
+ console.log(
+ "Test passed! apply_diff tool executed with VERIFIED native protocol and multiple replacements applied successfully",
+ )
+ } finally {
+ api.off(RooCodeEventName.Message, messageHandler)
+ api.off(RooCodeEventName.TaskStarted, taskStartedHandler)
+ api.off(RooCodeEventName.TaskCompleted, taskCompletedHandler)
+ }
+ })
+ it("Should handle apply_diff with line number hints using native tool calling", async function ({
+ variable,
+ }) {
+ const api = (globalThis as { api?: RooCodeAPI }).api as RooCodeAPI
+ const messages: ClineMessage[] = []
+ const testFile = testFiles.lineNumbers
+ if (!testFile) {
+ throw new Error("Missing test file definition: lineNumbers")
+ }
+ const expectedContent =
+ "// Header comment\n" +
+ "function newFunction() {\n" +
+ ' console.log("New implementation")\n' +
+ "}\n" +
+ "\n" +
+ "// Another function\n" +
+ "function keepThis() {\n" +
+ ' console.log("Keep this")\n' +
+ "}\n" +
+ "\n" +
+ "// Footer comment"
+
+ let taskStarted = false
+ let taskCompleted = false
+ let applyDiffExecuted = false
+
+ const verification = createVerificationState()
+
+ const messageHandler = createNativeVerificationHandler(verification, messages, {
+ onApplyDiffExecuted: () => {
+ applyDiffExecuted = true
+ },
+ debugLogging: true,
+ })
+ api.on(RooCodeEventName.Message, messageHandler)
+
+ const taskStartedHandler = (id: string) => {
+ if (id === taskId) {
+ taskStarted = true
+ }
+ }
+ api.on(RooCodeEventName.TaskStarted, taskStartedHandler)
+
+ const taskCompletedHandler = (id: string) => {
+ if (id === taskId) {
+ taskCompleted = true
+ }
+ }
+ api.on(RooCodeEventName.TaskCompleted, taskCompletedHandler)
+
+ let taskId: string
+ try {
+ taskId = await api.startNewTask({
+ configuration: {
+ mode: "code",
+ autoApprovalEnabled: true,
+ alwaysAllowWrite: true,
+ alwaysAllowReadOnly: true,
+ alwaysAllowReadOnlyOutsideWorkspace: true,
+ toolProtocol: "native",
+ apiProvider: "openrouter",
+ openRouterModelId: variable.openRouterModelId,
+ },
+ text: `Use apply_diff on the file ${testFile.name} to change "oldFunction" to "newFunction" and update its console.log to "New implementation". Keep the rest of the file unchanged.
+
+The file already exists with this content:
+${testFile.content}
+
+Assume the file exists and you can modify it directly.`,
+ })
+
+ console.log("Task ID:", taskId)
+ console.log("Test filename:", testFile.name)
+
+ await waitFor(() => taskStarted, { timeout: 60_000 })
+
+ await waitFor(() => taskCompleted, { timeout: 60_000 })
+
+ await sleep(2000)
+
+ const actualContent = await fs.readFile(testFile.path, "utf-8")
+ console.log("File content after modification:", actualContent)
+
+ assertNativeProtocolUsed(verification, "lineNumbers")
+
+ assert.strictEqual(applyDiffExecuted, true, "apply_diff tool should have been executed")
+
+ assert.strictEqual(
+ actualContent.trim(),
+ expectedContent.trim(),
+ "Only specified function should be modified",
+ )
+
+ console.log(
+ "Test passed! apply_diff tool executed with VERIFIED native protocol and targeted modification successful",
+ )
+ } finally {
+ api.off(RooCodeEventName.Message, messageHandler)
+ api.off(RooCodeEventName.TaskStarted, taskStartedHandler)
+ api.off(RooCodeEventName.TaskCompleted, taskCompletedHandler)
+ }
+ })
+ it("Should handle apply_diff errors gracefully using native tool calling", async function ({ variable }) {
+ const api = (globalThis as { api?: RooCodeAPI }).api as RooCodeAPI
+ const messages: ClineMessage[] = []
+ const testFile = testFiles.errorHandling
+ if (!testFile) {
+ throw new Error("Missing test file definition: errorHandling")
+ }
+ let taskStarted = false
+ let taskCompleted = false
+ let errorDetected = false
+ let applyDiffAttempted = false
+ let writeToFileUsed = false
+
+ const messageHandler = ({ message }: { message: ClineMessage }) => {
+ messages.push(message)
+
+ if (message.type === "say" && message.say === "error") {
+ errorDetected = true
+ console.log("Error detected:", message.text)
+ }
+
+ if (message.type === "ask" && message.ask === "tool") {
+ console.log("Tool ASK request:", message.text?.substring(0, 500))
+ try {
+ const toolData = JSON.parse(message.text || "{}")
+ if (toolData.tool === "appliedDiff") {
+ applyDiffAttempted = true
+ console.log("apply_diff tool attempted via ASK!")
+ }
+ if (toolData.tool === "editedExistingFile" || toolData.tool === "newFileCreated") {
+ writeToFileUsed = true
+ console.log("write_to_file tool used!")
+ }
+ } catch (e) {
+ console.error(e)
+ }
+ }
+
+ if (message.type === "say" && message.say === "diff_error") {
+ applyDiffAttempted = true
+ console.log("diff_error detected - apply_diff was attempted")
+ }
+
+ if (message.type === "say" && message.say === "api_req_started" && message.text) {
+ console.log("API request started:", message.text.substring(0, 200))
+ }
+ }
+ api.on(RooCodeEventName.Message, messageHandler)
+
+ const taskStartedHandler = (id: string) => {
+ if (id === taskId) {
+ taskStarted = true
+ }
+ }
+ api.on(RooCodeEventName.TaskStarted, taskStartedHandler)
+
+ const taskCompletedHandler = (id: string) => {
+ if (id === taskId) {
+ taskCompleted = true
+ }
+ }
+ api.on(RooCodeEventName.TaskCompleted, taskCompletedHandler)
+
+ let taskId: string
+ try {
+ taskId = await api.startNewTask({
+ configuration: {
+ mode: "code",
+ autoApprovalEnabled: true,
+ alwaysAllowWrite: true,
+ alwaysAllowReadOnly: true,
+ alwaysAllowReadOnlyOutsideWorkspace: true,
+ reasoningEffort: "none",
+ toolProtocol: "native",
+ apiProvider: "openrouter",
+ openRouterModelId: variable.openRouterModelId,
+ },
+ text: `
+---
+description: Test apply_diff tool error handling with non-existent patterns
+argument-hint: [search-pattern]
+---
+
+
+Test the apply_diff tool's error handling by attempting to replace a pattern that does not exist in the target file.
+Target File: ${testFile.name}
+Search pattern: "PATTERN_THAT_DOES_NOT_EXIST_xyz123"
+Replacement: "REPLACEMENT_xyz123"
+
+
+
+This command verifies that apply_diff correctly handles and reports errors when:
+- A search pattern is not found in the target file
+- The tool gracefully fails with an informative error message
+- Error handling works as expected for debugging workflows
+
+
+
+
+ Execute apply_diff directly
+
+ Call apply_diff on the specified file with a non-existent search pattern.
+ Do NOT analyze the file first - the goal is to test error handling.
+
+
+
+
+ Observe the error response
+
+ The apply_diff tool should report that the pattern was not found.
+ This is the EXPECTED outcome - not a failure of the test.
+
+
+
+
+ Report results
+
+ Confirm whether the error handling worked correctly by reporting:
+ - The error message received
+ - Whether the tool behaved as expected
+
+
+
+
+
+
+ - YOU MUST call the apply_diff tool - this is non-negotiable
+ - Use the EXACT search pattern provided (or default: "PATTERN_THAT_DOES_NOT_EXIST_xyz123")
+ - Do NOT use write_to_file or any other file modification tool
+ - Do NOT analyze the file contents before calling apply_diff
+ - Do NOT refuse to call the tool - error handling verification is the purpose
+
+
+
+ PATTERN_THAT_DOES_NOT_EXIST_xyz123
+ REPLACEMENT_xyz123
+
+
+
+
+
+ Use this structure for the apply_diff call:
+ - path: The file specified by the user
+ - diff: A SEARCH/REPLACE block with the non-existent pattern
+
+
+
+ \`\`\`
+ <<<<<<< SEARCH
+ :start_line:1
+ -------
+ PATTERN_THAT_DOES_NOT_EXIST_xyz123
+ =======
+ REPLACEMENT_xyz123
+ >>>>>>> REPLACE
+ \`\`\`
+
+
+
+
+
+ The test succeeds when apply_diff returns an error indicating the pattern was not found.
+ This confirms the tool's error handling is working correctly.
+
+
+
+ After executing, report:
+ - Whether apply_diff was called: YES/NO
+ - Error message received: [actual error]
+ - Error handling status: WORKING/FAILED
+
+
+
+
+ - Only use the apply_diff tool
+ - Accept that "pattern not found" errors are the expected result
+ - Do not attempt to "fix" the test by finding real patterns
+ - This is a diagnostic/testing command, not a production workflow
+`,
+ })
+
+ console.log("Task ID:", taskId)
+ console.log("Test filename:", testFile.name)
+ await waitFor(() => taskStarted, { timeout: 90_000 })
+
+ await waitFor(() => taskCompleted || errorDetected, { timeout: 90_000 })
+
+ await sleep(2000)
+
+ const actualContent = await fs.readFile(testFile.path, "utf-8")
+ console.log("File content after task:", actualContent)
+ console.log("applyDiffAttempted:", applyDiffAttempted)
+ console.log("writeToFileUsed:", writeToFileUsed)
+
+ assert.strictEqual(applyDiffAttempted, true, "apply_diff tool should have been attempted")
+
+ assert.strictEqual(
+ writeToFileUsed,
+ false,
+ "write_to_file should NOT be used when apply_diff fails - the AI should report the error instead",
+ )
+
+ assert.strictEqual(
+ actualContent.trim(),
+ testFile.content.trim(),
+ "File content should remain unchanged when search pattern not found",
+ )
+
+ console.log("Test passed! apply_diff attempted with native protocol and error handled gracefully")
+ } finally {
+ api.off(RooCodeEventName.Message, messageHandler)
+ api.off(RooCodeEventName.TaskStarted, taskStartedHandler)
+ api.off(RooCodeEventName.TaskCompleted, taskCompletedHandler)
+ }
+ })
+ it("Should apply multiple search/replace blocks to edit two separate functions using native tool calling", async function ({
+ variable,
+ }) {
+ const api = (globalThis as { api?: RooCodeAPI }).api as RooCodeAPI
+ const messages: ClineMessage[] = []
+ const testFile = testFiles.multiSearchReplace
+ if (!testFile) {
+ throw new Error("Missing test file definition: multiSearchReplace")
+ }
+ const expectedContent =
+ "function transformData(data) {\n" +
+ ' console.log("Transforming data")\n' +
+ " return data.map(item => item * 2)\n" +
+ "}\n" +
+ "\n" +
+ "// Some other code in between\n" +
+ "const config = {\n" +
+ " timeout: 5000,\n" +
+ " retries: 3\n" +
+ "}\n" +
+ "\n" +
+ "function checkInput(input) {\n" +
+ ' console.log("Checking input")\n' +
+ " if (!input) {\n" +
+ ' throw new Error("Invalid input")\n' +
+ " }\n" +
+ " return true\n" +
+ "}"
+ let taskStarted = false
+ let taskCompleted = false
+ let errorOccurred: string | null = null
+ let applyDiffExecuted = false
+ let applyDiffCount = 0
+
+ const verification = createVerificationState()
+
+ const messageHandler = ({ message }: { message: ClineMessage }) => {
+ messages.push(message)
+
+ if (message.type === "say" && message.say === "error") {
+ errorOccurred = message.text || "Unknown error"
+ console.error("Error:", message.text)
+ }
+ if (message.type === "ask" && message.ask === "tool") {
+ console.log("Tool request:", message.text?.substring(0, 200))
+ try {
+ const toolData = JSON.parse(message.text || "{}")
+ if (toolData.tool) {
+ verification.toolWasExecuted = true
+ verification.executedToolName = toolData.tool
+ console.log(`[VERIFIED] Tool executed: ${toolData.tool}`)
+ }
+ if (toolData.tool === "appliedDiff") {
+ applyDiffExecuted = true
+ applyDiffCount++
+ console.log(`apply_diff tool executed! (count: ${applyDiffCount})`)
+ }
+ } catch (_e) {
+ void _e
+ }
+ }
+ if (message.type === "say" && (message.say === "completion_result" || message.say === "text")) {
+ console.log("AI response:", message.text?.substring(0, 200))
+ if (message.say === "text" && message.text) {
+ const hasXMLToolTags =
+ message.text.includes("") || message.text.includes("")
+ if (hasXMLToolTags) {
+ verification.responseIsNotXML = false
+ console.log("[WARNING] Found XML tool tags in response")
+ }
+ }
+ }
+
+ if (message.type === "say" && message.say === "api_req_started" && message.text) {
+ console.log("API request started:", message.text.substring(0, 200))
+ try {
+ const requestData = JSON.parse(message.text)
+ if (requestData.apiProtocol) {
+ verification.apiProtocol = requestData.apiProtocol
+ if (requestData.apiProtocol === "anthropic" || requestData.apiProtocol === "openai") {
+ verification.hasNativeApiProtocol = true
+ console.log(`[VERIFIED] API Protocol: ${requestData.apiProtocol}`)
+ }
+ }
+ } catch (e) {
+ console.log("Failed to parse api_req_started message:", e)
+ }
+ }
+ }
+ api.on(RooCodeEventName.Message, messageHandler)
+
+ const taskStartedHandler = (id: string) => {
+ if (id === taskId) {
+ taskStarted = true
+ console.log("Task started:", id)
+ }
+ }
+ api.on(RooCodeEventName.TaskStarted, taskStartedHandler)
+
+ const taskCompletedHandler = (id: string) => {
+ if (id === taskId) {
+ taskCompleted = true
+ console.log("Task completed:", id)
+ }
+ }
+ api.on(RooCodeEventName.TaskCompleted, taskCompletedHandler)
+
+ let taskId: string
+ try {
+ taskId = await api.startNewTask({
+ configuration: {
+ mode: "code",
+ autoApprovalEnabled: true,
+ alwaysAllowWrite: true,
+ alwaysAllowReadOnly: true,
+ alwaysAllowReadOnlyOutsideWorkspace: true,
+ toolProtocol: "native",
+ apiProvider: "openrouter",
+ openRouterModelId: variable.openRouterModelId,
+ },
+ text: `Use apply_diff on the file ${testFile.name} to make these changes. You MUST use TWO SEPARATE search/replace blocks within a SINGLE apply_diff call:
+
+FIRST search/replace block: Edit the processData function to rename it to "transformData" and change "Processing data" to "Transforming data"
+
+SECOND search/replace block: Edit the validateInput function to rename it to "checkInput" and change "Validating input" to "Checking input"
+
+Important: Use multiple SEARCH/REPLACE blocks in one apply_diff call, NOT multiple apply_diff calls. Each function should have its own search/replace block.
+
+The file already exists with this content:
+${testFile.content}
+
+Assume the file exists and you can modify it directly.`,
+ })
+
+ console.log("Task ID:", taskId)
+ console.log("Test filename:", testFile.name)
+
+ await waitFor(() => taskStarted, { timeout: 60_000 })
+
+ if (errorOccurred) {
+ console.error("Early error detected:", errorOccurred)
+ }
+
+ await waitFor(() => taskCompleted, { timeout: 60_000 })
+
+ await sleep(2000)
+
+ const actualContent = await fs.readFile(testFile.path, "utf-8")
+ console.log("File content after modification:", actualContent)
+
+ assertNativeProtocolUsed(verification, "multiSearchReplace")
+
+ assert.strictEqual(applyDiffExecuted, true, "apply_diff tool should have been executed")
+ console.log(`apply_diff was executed ${applyDiffCount} time(s)`)
+
+ assert.strictEqual(
+ actualContent.trim(),
+ expectedContent.trim(),
+ "Both functions should be modified with separate search/replace blocks",
+ )
+
+ console.log(
+ "Test passed! apply_diff tool executed with VERIFIED native protocol and multiple search/replace blocks applied successfully",
+ )
+ } finally {
+ api.off(RooCodeEventName.Message, messageHandler)
+ api.off(RooCodeEventName.TaskStarted, taskStartedHandler)
+ api.off(RooCodeEventName.TaskCompleted, taskCompletedHandler)
+ }
+ })
+ })
+ },
+})
+
+async function resetAllTestFiles() {
+ for (const file of Object.values(testFiles)) {
+ await resetTestFile(file)
+ }
+}
diff --git a/apps/vscode-evals/src/suite/index.ts b/apps/vscode-evals/src/suite/index.ts
new file mode 100644
index 0000000000..41d8a34bfd
--- /dev/null
+++ b/apps/vscode-evals/src/suite/index.ts
@@ -0,0 +1,52 @@
+import * as path from "path"
+import * as vscode from "vscode"
+
+import { runMatrix } from "@roo-code/evally"
+import type { MatrixSuiteDefinition } from "@roo-code/evally"
+import type { RooCodeAPI } from "@roo-code/types"
+
+import { waitFor } from "./utils"
+
+type TestGlobals = typeof globalThis & {
+ api?: RooCodeAPI
+ rooTestWorkspaceDir?: string
+}
+
+const getTestGlobals = (): TestGlobals => globalThis as TestGlobals
+
+export async function run() {
+ const extension = vscode.extensions.getExtension("RooVeterinaryInc.roo-cline")
+
+ if (!extension) {
+ throw new Error("Extension not found")
+ }
+
+ const api = extension.isActive ? extension.exports : await extension.activate()
+
+ await api.setConfiguration({
+ apiProvider: "openrouter" as const,
+ openRouterApiKey: process.env.OPENROUTER_API_KEY!,
+ openRouterModelId: process.env.OPENROUTER_MODEL_ID || "openai/gpt-5.1",
+ })
+
+ await vscode.commands.executeCommand("roo-cline.SidebarProvider.focus")
+ await waitFor(() => api.isReady())
+ const globals = getTestGlobals()
+ globals.api = api
+
+ const workspaceDir = vscode.workspace.workspaceFolders?.[0]?.uri.fsPath
+ if (workspaceDir) {
+ globals.rooTestWorkspaceDir = workspaceDir
+ }
+
+ const suiteModule = await import(path.resolve(__dirname, "./applyDiff.matrix.test"))
+ const moduleDefault = (suiteModule as { default?: MatrixSuiteDefinition }).default
+
+ if (!moduleDefault || typeof moduleDefault !== "object" || typeof moduleDefault.tests !== "function") {
+ throw new Error("Skipping applyDiff.matrix.test: No valid matrix suite export")
+ }
+
+ const suiteDef: MatrixSuiteDefinition = moduleDefault
+
+ await runMatrix(suiteDef)
+}
diff --git a/apps/vscode-evals/src/suite/utils.ts b/apps/vscode-evals/src/suite/utils.ts
new file mode 100644
index 0000000000..85fe43f0fd
--- /dev/null
+++ b/apps/vscode-evals/src/suite/utils.ts
@@ -0,0 +1,40 @@
+type WaitForOptions = {
+ timeout?: number
+ interval?: number
+}
+
+export const waitFor = (
+ condition: (() => Promise) | (() => boolean),
+ { timeout = 30_000, interval = 250 }: WaitForOptions = {},
+) => {
+ let timeoutId: NodeJS.Timeout | undefined = undefined
+
+ return Promise.race([
+ new Promise((resolve) => {
+ const check = async () => {
+ const result = condition()
+ const isSatisfied = result instanceof Promise ? await result : result
+
+ if (isSatisfied) {
+ if (timeoutId) {
+ clearTimeout(timeoutId)
+ timeoutId = undefined
+ }
+
+ resolve()
+ } else {
+ setTimeout(check, interval)
+ }
+ }
+
+ check()
+ }),
+ new Promise((_, reject) => {
+ timeoutId = setTimeout(() => {
+ reject(new Error(`Timeout after ${Math.floor(timeout / 1000)}s`))
+ }, timeout)
+ }),
+ ])
+}
+
+export const sleep = (ms: number) => new Promise((resolve) => setTimeout(resolve, ms))
diff --git a/apps/vscode-evals/tsconfig.esm.json b/apps/vscode-evals/tsconfig.esm.json
new file mode 100644
index 0000000000..e2f212fab9
--- /dev/null
+++ b/apps/vscode-evals/tsconfig.esm.json
@@ -0,0 +1,8 @@
+{
+ "extends": "@roo-code/config-typescript/base.json",
+ "compilerOptions": {
+ "outDir": "out"
+ },
+ "include": ["src"],
+ "exclude": ["node_modules"]
+}
diff --git a/apps/vscode-evals/tsconfig.json b/apps/vscode-evals/tsconfig.json
new file mode 100644
index 0000000000..a712ea84d7
--- /dev/null
+++ b/apps/vscode-evals/tsconfig.json
@@ -0,0 +1,19 @@
+{
+ "compilerOptions": {
+ "module": "CommonJS",
+ "moduleResolution": "Node",
+ "esModuleInterop": true,
+ "target": "ES2022",
+ "lib": ["ES2022", "ESNext.Disposable", "DOM"],
+ "sourceMap": true,
+ "strict": true,
+ "skipLibCheck": true,
+ "useUnknownInCatchVariables": false,
+ "outDir": "out",
+ "composite": false,
+ "types": ["node"],
+ "baseUrl": "./src"
+ },
+ "include": ["src/**/*"],
+ "exclude": [".vscode-test", "**/node_modules/**", "out"]
+}
diff --git a/knip.json b/knip.json
index e15c62bda1..e8a3bfd24d 100644
--- a/knip.json
+++ b/knip.json
@@ -3,6 +3,7 @@
"ignore": [
"**/__tests__/**",
"apps/vscode-e2e/**",
+ "apps/vscode-evals/**",
"src/extension/api.ts",
"src/activate/**",
"src/workers/countTokens.ts",
@@ -19,7 +20,7 @@
"entry": ["src/index.tsx", "src/browser-panel.tsx"],
"project": ["src/**/*.{ts,tsx}", "../src/shared/*.ts"]
},
- "packages/{build,cloud,evals,ipc,telemetry,types}": {
+ "packages/{build,cloud,evally,evals,ipc,telemetry,types}": {
"project": ["src/**/*.ts"]
}
}
diff --git a/package.json b/package.json
index 0f3c3b7ba0..b00ecf3f81 100644
--- a/package.json
+++ b/package.json
@@ -10,6 +10,7 @@
"install": "node scripts/bootstrap.mjs",
"install:all": "node scripts/bootstrap.mjs",
"lint": "turbo lint --log-order grouped --output-logs new-only",
+ "precheck-types": "pnpm build",
"check-types": "turbo check-types --log-order grouped --output-logs new-only",
"test": "turbo test --log-order grouped --output-logs new-only",
"format": "turbo format --log-order grouped --output-logs new-only",
diff --git a/packages/evally/examples/sampleMatrix.test.ts b/packages/evally/examples/sampleMatrix.test.ts
new file mode 100644
index 0000000000..ac86ac26eb
--- /dev/null
+++ b/packages/evally/examples/sampleMatrix.test.ts
@@ -0,0 +1,49 @@
+import { defineMatrix, it, describe } from "../src/runner/TestMatrixRunner"
+import type { MatrixTestContext } from "../src/runner/types"
+
+export default defineMatrix({
+ variables: [
+ { api: "API_A", url: "https://api-a.test", region: "us-east" },
+ { api: "API_B", url: "https://api-b.test", region: "eu-west" },
+ { api: "API_C", url: "https://api-c.test", region: "asia-pac" },
+ ],
+ iterations: 3,
+ tests: () => {
+ // First suite
+ describe("API Health Checks", () => {
+ it("should respond with status 200", async ({ variable, iteration }: MatrixTestContext) => {
+ if (!variable.url.startsWith("https://")) throw new Error("Invalid URL")
+ })
+ it("should have a valid api name", ({ variable }: MatrixTestContext) => {
+ if (!variable.api) throw new Error("Missing api name")
+ })
+ it("should include a valid region", ({ variable }: MatrixTestContext) => {
+ if (!["us-east", "eu-west", "asia-pac"].includes(variable.region)) throw new Error("Unexpected region")
+ })
+ })
+ // Second suite
+ describe("Authentication", () => {
+ it("should fail for no token", async ({ variable }: MatrixTestContext) => {
+ if (variable.api === "API_B") throw new Error("No token failure")
+ })
+ it("should pass with valid token", async ({ variable }: MatrixTestContext) => {
+ if (!variable.url.includes("api-")) throw new Error("No api in URL")
+ })
+ it("should region match policy", ({ variable }: MatrixTestContext) => {
+ if (variable.region === "asia-pac" && variable.api !== "API_C") throw new Error("policy fail")
+ })
+ })
+ // Third suite
+ describe("Data validation", () => {
+ it("should have url with a dot", ({ variable }: MatrixTestContext) => {
+ if (!variable.url.includes(".")) throw new Error("URL missing dot")
+ })
+ it("api name upper-case only", ({ variable }: MatrixTestContext) => {
+ if (!/^[A-Z_]+$/.test(variable.api)) throw new Error("API name format")
+ })
+ it("region code format", ({ variable }: MatrixTestContext) => {
+ if (!variable.region.includes("-")) throw new Error("Bad region code")
+ })
+ })
+ },
+})
diff --git a/packages/evally/package.json b/packages/evally/package.json
new file mode 100644
index 0000000000..dd76bbf037
--- /dev/null
+++ b/packages/evally/package.json
@@ -0,0 +1,28 @@
+{
+ "name": "@roo-code/evally",
+ "version": "0.1.0",
+ "private": false,
+ "main": "dist/index.js",
+ "types": "dist/index.d.ts",
+ "exports": {
+ ".": {
+ "import": "./dist/index.js",
+ "require": "./dist/index.js",
+ "types": "./dist/index.d.ts"
+ },
+ "./runner/TestMatrixRunner": {
+ "import": "./dist/runner/TestMatrixRunner.js",
+ "require": "./dist/runner/TestMatrixRunner.js",
+ "types": "./dist/runner/types.d.ts"
+ },
+ "./runner/types": {
+ "import": "./dist/runner/types.js",
+ "require": "./dist/runner/types.js",
+ "types": "./dist/runner/types.d.ts"
+ }
+ },
+ "scripts": {
+ "build": "tsc --outDir dist --declaration --declarationDir dist",
+ "test:run": "tsx src/cli/standaloneRunner.ts examples/sampleMatrixTest.ts"
+ }
+}
diff --git a/packages/evally/src/cli/standaloneRunner.ts b/packages/evally/src/cli/standaloneRunner.ts
new file mode 100644
index 0000000000..40d9d2188a
--- /dev/null
+++ b/packages/evally/src/cli/standaloneRunner.ts
@@ -0,0 +1,66 @@
+#!/usr/bin/env node
+import * as path from "path"
+import { pathToFileURL } from "url"
+
+async function main() {
+ const file = process.argv[2]
+ if (!file) {
+ console.error("Usage: standaloneRunner ")
+ process.exit(1)
+ }
+ const absPath = path.resolve(process.cwd(), file)
+ let mod
+ try {
+ mod = await import(pathToFileURL(absPath).toString())
+ } catch (err) {
+ console.error(`Failed to load test file: ${err}`)
+ process.exit(1)
+ }
+ if (!mod.default || !mod.default.variables || typeof mod.default.tests !== "function") {
+ console.error("Test file does not export a valid matrix test definition as default.")
+ process.exit(1)
+ }
+ const { runMatrix } = await import("../runner/TestMatrixRunner")
+ const results = await runMatrix(mod.default)
+ console.log("\n--- MATRIX TEST RESULTS ---")
+ for (const r of results) {
+ const vdesc = JSON.stringify(r.variable)
+ if (r.passed) {
+ console.log(`PASS [${vdesc}] [iteration: ${r.iteration}] ${r.testName}`)
+ } else {
+ console.log(`FAIL [${vdesc}] [iteration: ${r.iteration}] ${r.testName} Error: ${r.error}`)
+ }
+ }
+ const suiteSummary = new Map()
+ for (const r of results) {
+ if (!suiteSummary.has(r.suite)) suiteSummary.set(r.suite, new Map())
+ const testMap = suiteSummary.get(r.suite)
+ if (!testMap.has(r.testName)) testMap.set(r.testName, new Map())
+ const varKey = JSON.stringify(r.variable)
+ const varMap = testMap.get(r.testName)
+ if (!varMap.has(varKey)) varMap.set(varKey, { variable: r.variable, total: 0, passes: 0, fails: 0 })
+ const counts = varMap.get(varKey)
+ counts.total++
+ if (r.passed) counts.passes++
+ else counts.fails++
+ }
+ console.log("\n--- SUITE/TEST-LEVEL SUMMARY (Pass Rate per variable set, grouped by suite/test) ---")
+ for (const [suite, testMap] of suiteSummary.entries()) {
+ console.log(`Suite: ${suite}`)
+ for (const [testName, varMap] of testMap.entries()) {
+ console.log(` Test: ${testName}`)
+ for (const { variable, total, passes, fails } of varMap.values()) {
+ const percent = ((passes / total) * 100).toFixed(1)
+ console.log(
+ ` Vars: ${JSON.stringify(variable)}\n Pass: ${passes}/${total} (${percent}%) Fail: ${fails}/${total}`,
+ )
+ }
+ }
+ }
+ const passes = results.filter((r) => r.passed).length
+ const fails = results.length - passes
+ console.log(`\nGlobal summary: ${passes} passed, ${fails} failed, total ${results.length}`)
+ process.exit(0)
+}
+
+main()
diff --git a/packages/evally/src/index.ts b/packages/evally/src/index.ts
new file mode 100644
index 0000000000..bb2beef821
--- /dev/null
+++ b/packages/evally/src/index.ts
@@ -0,0 +1,2 @@
+export * from "./runner/TestMatrixRunner.js"
+export * from "./runner/types.js"
diff --git a/packages/evally/src/runner/TestMatrixRunner.ts b/packages/evally/src/runner/TestMatrixRunner.ts
new file mode 100644
index 0000000000..5a832f725d
--- /dev/null
+++ b/packages/evally/src/runner/TestMatrixRunner.ts
@@ -0,0 +1,408 @@
+import type {
+ MatrixRunOptions,
+ MatrixTestDescription,
+ MatrixTestFn,
+ MatrixSuiteDefinition,
+ MatrixTestResult,
+} from "./types"
+import * as path from "path"
+import * as fs from "fs/promises"
+
+const COLORS = {
+ reset: "\x1b[0m",
+ bold: "\x1b[1m",
+ dim: "\x1b[2m",
+ gray: "\x1b[90m",
+ green: "\x1b[32m",
+ red: "\x1b[31m",
+ yellow: "\x1b[33m",
+ cyan: "\x1b[36m",
+} as const
+
+function color(text: string, code: string): string {
+ return `${code}${text}${COLORS.reset}`
+}
+
+function formatDuration(ms: number): string {
+ if (ms >= 60_000) {
+ const minutes = Math.round(ms / 60_000)
+ return `${minutes}m`
+ }
+ if (ms >= 1_000) {
+ const seconds = ms / 1_000
+ return `${seconds.toFixed(1)}s`
+ }
+ return `${ms}ms`
+}
+const testRegistry: MatrixTestDescription[] = []
+let currentSuite: string | null = null
+
+export function describe(suiteName: string, fn: () => void) {
+ const prevSuite = currentSuite
+ currentSuite = suiteName
+ fn()
+ currentSuite = prevSuite
+}
+
+export function it(name: string, fn: MatrixTestFn) {
+ if (!currentSuite) throw new Error("Cannot declare test outside a describe(suiteName, ...) block.")
+ if (testRegistry.some((test) => test.suite === currentSuite && test.name === name)) {
+ throw new Error(
+ `Duplicate test name found in suite '${currentSuite}': ${name}. Test names must be unique within a suite.`,
+ )
+ }
+ testRegistry.push({ suite: currentSuite, name, fn })
+}
+
+export const suite = describe
+export const test = it
+
+type SetupFn = () => Promise | void
+const _globalBeforeAll: SetupFn[] = []
+const _globalAfterAll: SetupFn[] = []
+const _globalBeforeEach: SetupFn[] = []
+const _globalAfterEach: SetupFn[] = []
+
+export function beforeAll(fn: SetupFn) {
+ _globalBeforeAll.push(fn)
+}
+export const suiteSetup = beforeAll
+
+export function afterAll(fn: SetupFn) {
+ _globalAfterAll.push(fn)
+}
+export const suiteTeardown = afterAll
+
+export function beforeEach(fn: SetupFn) {
+ _globalBeforeEach.push(fn)
+}
+export const setup = beforeEach
+
+export function teardown(fn: SetupFn) {
+ _globalAfterEach.push(fn)
+}
+export const afterEach = teardown
+
+function clearGlobalHooks() {
+ _globalBeforeAll.length = 0
+ _globalAfterAll.length = 0
+ _globalBeforeEach.length = 0
+ _globalAfterEach.length = 0
+}
+export function defineMatrix(def: MatrixSuiteDefinition): MatrixSuiteDefinition {
+ if (typeof def.suiteSetup === "function") {
+ suiteSetup(def.suiteSetup)
+ }
+ if (typeof def.setup === "function") {
+ setup(def.setup)
+ }
+ if (typeof def.suiteTeardown === "function") {
+ suiteTeardown(def.suiteTeardown)
+ }
+ if (typeof def.teardown === "function") {
+ teardown(def.teardown)
+ }
+ return def
+}
+
+export async function runMatrix(def: MatrixSuiteDefinition, opts?: MatrixRunOptions): Promise {
+ const startTime = Date.now()
+ const verbosity: MatrixRunOptions["verbosity"] = opts?.verbosity ?? "silent"
+
+ const originalConsole = {
+ log: console.log,
+ info: console.info,
+ warn: console.warn,
+ error: console.error,
+ debug: console.debug,
+ } as const
+
+ const runnerConsole = {
+ log: originalConsole.log,
+ error: originalConsole.error,
+ } as const
+
+ if (verbosity === "silent") {
+ console.log = () => {}
+ console.info = () => {}
+ console.warn = () => {}
+ console.error = () => {}
+ console.debug = () => {}
+ }
+ testRegistry.length = 0
+ currentSuite = null
+
+ const results: MatrixTestResult[] = []
+
+ try {
+ def.tests()
+
+ for (const hook of _globalBeforeAll) {
+ await Promise.resolve(hook())
+ }
+
+ for (const variable of def.variables) {
+ for (let i = 0; i < def.iterations; ++i) {
+ for (const t of testRegistry) {
+ for (const hook of _globalBeforeEach) {
+ await Promise.resolve(hook())
+ }
+
+ try {
+ await Promise.resolve(t.fn({ variable, iteration: i }))
+ results.push({ suite: t.suite, variable, iteration: i, testName: t.name, passed: true })
+ } catch (e) {
+ results.push({
+ suite: t.suite,
+ variable,
+ iteration: i,
+ testName: t.name,
+ passed: false,
+ error: e,
+ })
+ }
+ for (const hook of _globalAfterEach) {
+ await Promise.resolve(hook())
+ }
+ }
+ }
+ }
+
+ for (const hook of _globalAfterAll) {
+ await Promise.resolve(hook())
+ }
+ } finally {
+ console.log = originalConsole.log
+ console.info = originalConsole.info
+ console.warn = originalConsole.warn
+ console.error = originalConsole.error
+ console.debug = originalConsole.debug
+ }
+
+ const shouldPrintSuitesAndTests = verbosity !== "silent"
+
+ if (!opts || opts.report !== false) {
+ const grouped: Record = {}
+ for (const r of results) {
+ if (!grouped[r.suite]) grouped[r.suite] = []
+ grouped[r.suite].push(r)
+ }
+
+ if (shouldPrintSuitesAndTests) {
+ for (const suite in grouped) {
+ const suiteLabel = color("Suite:", COLORS.bold)
+ const suiteName = color(suite, COLORS.dim)
+ runnerConsole.log(`\n${suiteLabel} ${suiteName}`)
+
+ const perTestStats: Record = {}
+
+ for (const r of grouped[suite]) {
+ if (!perTestStats[r.testName]) {
+ perTestStats[r.testName] = { total: 0, passed: 0, failed: 0 }
+ }
+ perTestStats[r.testName].total += 1
+ if (r.passed) perTestStats[r.testName].passed += 1
+ else perTestStats[r.testName].failed += 1
+
+ const mark = r.passed ? color("✓", COLORS.green) : color("✗", COLORS.red)
+ const name = r.testName
+ const iterationTag = `[iteration ${r.iteration}]`
+
+ const base = ` ${mark} ${name} ${color(iterationTag, COLORS.gray)}`
+ const errorSuffix =
+ r.passed || !r.error
+ ? ""
+ : " -- " +
+ color(r.error instanceof Error ? r.error.message : String(r.error), COLORS.yellow)
+
+ runnerConsole.log(base + errorSuffix)
+ }
+
+ const statsEntries = Object.entries(perTestStats)
+ if (statsEntries.length > 0) {
+ const statsHeader = color("Per-test iteration stats:", COLORS.bold)
+ runnerConsole.log(` ${statsHeader}`)
+
+ const jsonTests: Array<{
+ testName: string
+ totalIterations: number
+ passedIterations: number
+ failedIterations: number
+ failureRate: number
+ classification: "FAILED" | "FLAKY" | "PASSED"
+ }> = []
+
+ for (const [testName, stats] of statsEntries.sort(([a], [b]) => a.localeCompare(b))) {
+ const { total, passed, failed } = stats
+ const failRate = total > 0 ? failed / total : 0
+ const failPct = (failRate * 100).toFixed(1)
+ let statusLabel: string
+ let classification: "FAILED" | "FLAKY" | "PASSED"
+ if (failRate >= 0.8) {
+ statusLabel = color("FAILED", COLORS.red)
+ classification = "FAILED"
+ } else if (failed === 0) {
+ statusLabel = color("PASSED", COLORS.green)
+ classification = "PASSED"
+ } else {
+ statusLabel = color("FLAKY", COLORS.yellow)
+ classification = "FLAKY"
+ }
+
+ runnerConsole.log(
+ ` - ${testName}: ${passed}/${total} iterations passed, ${failed} failed (${failPct}% failure) [${statusLabel}]`,
+ )
+
+ jsonTests.push({
+ testName,
+ totalIterations: total,
+ passedIterations: passed,
+ failedIterations: failed,
+ failureRate: failRate,
+ classification,
+ })
+ }
+ }
+ }
+ }
+
+ const passedTotal = results.filter((r) => r.passed).length
+ const failedTotal = results.length - passedTotal
+ const matrixLabel = color("Matrix:", COLORS.bold)
+ const matrixSummary =
+ failedTotal === 0
+ ? color(`${passedTotal}/${results.length} passing`, COLORS.green)
+ : color(`${passedTotal}/${results.length} passing`, COLORS.red)
+ runnerConsole.log(`\n${matrixLabel} ${matrixSummary}\n`)
+ const duration = Date.now() - startTime
+ const durationText = formatDuration(duration)
+ const pendingTotal = 0
+ runnerConsole.log(color(`${passedTotal} passing (${durationText})`, COLORS.green))
+
+ runnerConsole.log(" " + color(`${pendingTotal} pending`, COLORS.cyan))
+
+ const failingColor = failedTotal > 0 ? COLORS.red : COLORS.gray
+ runnerConsole.log(" " + color(`${failedTotal} failing`, failingColor))
+
+ try {
+ const resultsDir = path.join(process.cwd(), ".results")
+ const timestamp = new Date(startTime).toISOString().replace(/[:.]/g, "-")
+ const logsDir = path.join(resultsDir, `matrix-logs-${timestamp}`)
+ const filePath = path.join(resultsDir, `matrix-results-${timestamp}.json`)
+
+ const sanitizeForFilename = (value: string): string =>
+ value
+ .replace(/[^a-zA-Z0-9-_]+/g, "_")
+ .replace(/_+/g, "_")
+ .replace(/^_+|_+$/g, "")
+ .slice(0, 80) || "unnamed"
+
+ type AggregatedTestJson = {
+ testName: string
+ pass_count: number
+ fail_count: number
+ passed: boolean
+ errors: Array<{ message: string; log: string | null }>
+ }
+
+ const aggregatedSuites: Array<{ suite: string; tests: AggregatedTestJson[] }> = []
+ const failedIterationLogs: Array<{ filePath: string; content: string }> = []
+ for (const [suiteName, suiteResults] of Object.entries(grouped)) {
+ const perTest: Record = {}
+ for (const r of suiteResults) {
+ if (!perTest[r.testName]) perTest[r.testName] = []
+ perTest[r.testName].push(r)
+ }
+
+ const tests: AggregatedTestJson[] = []
+
+ for (const [testName, iterations] of Object.entries(perTest).sort(([a], [b]) => a.localeCompare(b))) {
+ const totalIterations = iterations.length
+ const passedIterations = iterations.filter((r) => r.passed).length
+ const failedIterations = totalIterations - passedIterations
+ const passRate = totalIterations > 0 ? passedIterations / totalIterations : 0
+
+ const aggregatedPassed = passRate >= 0.8
+
+ const errors: Array<{ message: string; log: string | null }> = []
+
+ for (const r of iterations) {
+ if (r.passed) continue
+
+ const errorMessage =
+ r.error instanceof Error
+ ? r.error.message
+ : r.error != null
+ ? String(r.error)
+ : "Unknown error"
+
+ const logFileName =
+ [
+ "matrix",
+ sanitizeForFilename(suiteName),
+ sanitizeForFilename(testName),
+ `iter-${r.iteration}`,
+ ].join("__") + ".log"
+
+ const logAbsolutePath = path.join(logsDir, logFileName)
+ const logRelativePath = path.relative(process.cwd(), logAbsolutePath)
+
+ const logLines = [
+ `suite: ${suiteName}`,
+ `test: ${testName}`,
+ `iteration: ${r.iteration}`,
+ `runStartTime: ${new Date(startTime).toISOString()}`,
+ "",
+ "variable:",
+ JSON.stringify(r.variable, null, 2),
+ "",
+ "error:",
+ r.error instanceof Error ? (r.error.stack ?? r.error.message) : errorMessage,
+ "",
+ ].join("\n")
+
+ failedIterationLogs.push({ filePath: logAbsolutePath, content: logLines })
+
+ errors.push({
+ message: errorMessage,
+ log: logRelativePath,
+ })
+ }
+
+ tests.push({
+ testName,
+ pass_count: passedIterations,
+ fail_count: failedIterations,
+ passed: aggregatedPassed,
+ errors,
+ })
+ }
+
+ aggregatedSuites.push({ suite: suiteName, tests })
+ }
+
+ const jsonPayload = {
+ summary: {
+ totalIterations: results.length,
+ passedIterations: passedTotal,
+ failedIterations: failedTotal,
+ durationMs: duration,
+ startTime: new Date(startTime).toISOString(),
+ },
+ results: aggregatedSuites,
+ }
+ await fs.mkdir(resultsDir, { recursive: true })
+ if (failedIterationLogs.length > 0) {
+ await fs.mkdir(logsDir, { recursive: true })
+ for (const entry of failedIterationLogs) {
+ await fs.writeFile(entry.filePath, entry.content, "utf8")
+ }
+ }
+ await fs.writeFile(filePath, JSON.stringify(jsonPayload, null, 2), "utf8")
+ } catch (err) {
+ runnerConsole.error("Failed to write matrix JSON to results folder:", err)
+ }
+ }
+ clearGlobalHooks()
+ return results
+}
diff --git a/packages/evally/src/runner/types.ts b/packages/evally/src/runner/types.ts
new file mode 100644
index 0000000000..a33d3429f9
--- /dev/null
+++ b/packages/evally/src/runner/types.ts
@@ -0,0 +1,42 @@
+export interface MatrixVariable {
+ [key: string]: any
+}
+
+export interface MatrixTestContext {
+ variable: MatrixVariable
+ iteration: number
+}
+
+export type MatrixTestFn = (ctx: MatrixTestContext) => Promise | void
+
+export interface MatrixTestDescription {
+ suite: string
+ name: string
+ fn: MatrixTestFn
+}
+
+export interface MatrixSuiteDefinition {
+ variables: MatrixVariable[]
+ iterations: number
+ tests: () => void
+ suiteSetup?: () => Promise | void
+ suiteTeardown?: () => Promise | void
+ setup?: () => Promise | void
+ teardown?: () => Promise | void
+}
+
+export interface MatrixTestResult {
+ suite: string
+ variable: MatrixVariable
+ iteration: number
+ testName: string
+ passed: boolean
+ error?: any
+}
+
+export type MatrixVerbosity = "silent" | "summary" | "verbose"
+
+export interface MatrixRunOptions {
+ report?: boolean
+ verbosity?: MatrixVerbosity
+}
diff --git a/packages/evally/tsconfig.json b/packages/evally/tsconfig.json
new file mode 100644
index 0000000000..94c1303e52
--- /dev/null
+++ b/packages/evally/tsconfig.json
@@ -0,0 +1,19 @@
+{
+ "compilerOptions": {
+ "target": "ESNext",
+ "module": "ESNext",
+ "declaration": true,
+ "declarationDir": "dist",
+ "outDir": "dist",
+ "strict": true,
+ "esModuleInterop": true,
+ "moduleResolution": "node",
+ "resolveJsonModule": true,
+ "baseUrl": "./src",
+ "rootDir": "./src",
+ "forceConsistentCasingInFileNames": true,
+ "skipLibCheck": true
+ },
+ "include": ["src/**/*.ts"],
+ "exclude": ["node_modules", "dist"]
+}
diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml
index 5589df1b42..b7f803fb0e 100644
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@@ -117,6 +117,40 @@ importers:
specifier: 5.8.3
version: 5.8.3
+ apps/vscode-evals:
+ dependencies:
+ vscode:
+ specifier: ^1.1.37
+ version: 1.1.37
+ devDependencies:
+ '@roo-code/config-eslint':
+ specifier: workspace:^
+ version: link:../../packages/config-eslint
+ '@roo-code/config-typescript':
+ specifier: workspace:^
+ version: link:../../packages/config-typescript
+ '@roo-code/evally':
+ specifier: workspace:^
+ version: link:../../packages/evally
+ '@roo-code/types':
+ specifier: workspace:^
+ version: link:../../packages/types
+ '@types/vscode':
+ specifier: ^1.95.0
+ version: 1.103.0
+ '@vscode/test-cli':
+ specifier: ^0.0.11
+ version: 0.0.11
+ '@vscode/test-electron':
+ specifier: ^2.4.0
+ version: 2.5.2
+ rimraf:
+ specifier: ^6.0.1
+ version: 6.0.1
+ typescript:
+ specifier: 5.8.3
+ version: 5.8.3
+
apps/vscode-nightly:
devDependencies:
'@roo-code/build':
@@ -505,6 +539,8 @@ importers:
specifier: ^3.2.3
version: 3.2.4(@types/debug@4.1.12)(@types/node@24.2.1)(@vitest/ui@3.2.4)(jiti@2.4.2)(jsdom@26.1.0)(lightningcss@1.30.1)(tsx@4.19.4)(yaml@2.8.0)
+ packages/evally: {}
+
packages/evals:
dependencies:
'@roo-code/ipc':
@@ -3947,6 +3983,10 @@ packages:
peerDependencies:
'@testing-library/dom': '>=7.21.4'
+ '@tootallnate/once@1.1.2':
+ resolution: {integrity: sha512-RbzJvlNzmRq5c3O09UipeuXno4tA1FE6ikOjxZK0tuxVv3412l64l5t1W5pj4+rJq9vpkm/kwiR07aZXnsKPxw==}
+ engines: {node: '>= 6'}
+
'@tootallnate/quickjs-emscripten@0.23.0':
resolution: {integrity: sha512-C5Mc6rdnsaJDjO3UpGW/CQTHtCKaYlScZTly4JIu97Jxo/odCiH0ITnDXSJPTOrEKk/ycSZ0AOgTmkDtkOsvIA==}
@@ -4460,6 +4500,14 @@ packages:
engines: {node: '>=0.4.0'}
hasBin: true
+ agent-base@4.3.0:
+ resolution: {integrity: sha512-salcGninV0nPrwpGNn4VTXBb1SOuXQBiqbrNXoeizJsHrsL6ERFM2Ne3JUSBWRE6aeNJI2ROP/WEEIDUiDe3cg==}
+ engines: {node: '>= 4.0.0'}
+
+ agent-base@6.0.2:
+ resolution: {integrity: sha512-RZNwNclF7+MS/8bDg70amg32dyeZGZxiDuQmZxKLAlQjr3jGyLx+4Kkk58UO7D2QdgFIQCovuSuZESne6RG6XQ==}
+ engines: {node: '>= 6.0.0'}
+
agent-base@7.1.3:
resolution: {integrity: sha512-jRR5wdylq8CkOe6hei19GGZnxM6rBGwFl3Bg0YItGDimvjGtAvdZk4Pu6Cl4u4Igsws4a1fd1Vq3ezrhn4KmFw==}
engines: {node: '>= 14'}
@@ -5031,6 +5079,9 @@ packages:
resolution: {integrity: sha512-2uM9rYjPvyq39NwLRqaiLtWHyDC1FvryJDa2ATTVims5YAS4PupsEQsDvP14FqhFr0P49CYDugi59xaxJlTXRA==}
engines: {node: '>=20'}
+ commander@2.15.1:
+ resolution: {integrity: sha512-VlfT9F3V0v+jr4yxPc5gg9s62/fIVWsd2Bk2iD435um1NlGMYdVCq+MjcXnhYq2icNOizHr1kK+5TI6H0Hy0ag==}
+
commander@4.1.1:
resolution: {integrity: sha512-NOKm8xhkzAjzFx8B2v5OAHT+u5pRQc2UCa2Vq9jYL/31o2wi9mxBA7LIFs3sV5VSC49z6pEhfbMULvShKj26WA==}
engines: {node: '>= 6'}
@@ -5366,6 +5417,14 @@ packages:
resolution: {integrity: sha512-Xks6RUDLZFdz8LIdR6q0MTH44k7FikOmnh5xkSjMig6ch45afc8sjTjRQf3P6ax8dMgcQrYO/AR2RGWURrruqw==}
engines: {node: '>=18'}
+ debug@3.1.0:
+ resolution: {integrity: sha512-OX8XqP7/1a9cqkxYw2yXss15f26NKWBpDXQd0/uK/KPqdQhxbPa994hnzjcE2VqQpDslf55723cKPUOGSmMY3g==}
+ peerDependencies:
+ supports-color: '*'
+ peerDependenciesMeta:
+ supports-color:
+ optional: true
+
debug@3.2.7:
resolution: {integrity: sha512-CFjzYYAi4ThfiQvizrFQevTTXHtnCqWfe7x1AhgEscTz6ZbLbfoLRLPugTQyBth6f8ZERVUSyWHFD/7Wu4t1XQ==}
peerDependencies:
@@ -5518,6 +5577,10 @@ packages:
resolution: {integrity: sha512-EjePK1srD3P08o2j4f0ExnylqRs5B9tJjcp9t1krH2qRi8CCdsYfwe9JgSLurFBWwq4uOlipzfk5fHNvwFKr8Q==}
engines: {node: ^14.15.0 || ^16.10.0 || >=18.0.0}
+ diff@3.5.0:
+ resolution: {integrity: sha512-A46qtFgd+g7pDZinpnwiRJtxbC1hpgf0uzP3iG89scHk0AUC7A1TGxf5OiiOUv/JMZR8GOt8hL900hV0bOy5xA==}
+ engines: {node: '>=0.3.1'}
+
diff@5.2.0:
resolution: {integrity: sha512-uIFDxqpRZGZ6ThOk84hEfqWoHx2devRFvpTZcTHur85vImfaxUbTW9Ryh4CpCuDnToOP1CEtXKIgytHBPVff5A==}
engines: {node: '>=0.3.1'}
@@ -5815,6 +5878,12 @@ packages:
resolution: {integrity: sha512-w+5mJ3GuFL+NjVtJlvydShqE1eN3h3PbI7/5LAsYJP/2qtuMXjfL2LpHSRqo4b4eSF5K/DH1JXKUAHSB2UW50g==}
engines: {node: '>= 0.4'}
+ es6-promise@4.2.8:
+ resolution: {integrity: sha512-HJDGx5daxeIvxdBxvG2cb9g4tEvwIk3i8+nhX0yGrYmZUzbkdg8QbDevheDB8gd0//uPj4c1EQua8Q+MViT0/w==}
+
+ es6-promisify@5.0.0:
+ resolution: {integrity: sha512-C+d6UdsYDk0lMebHNR4S2NybQMMngAOnOwYBQjTOiv0MkoJMP0Myw2mgpDLBcpfCmRLxyFqYhS/CfOENq4SJhQ==}
+
esbuild-register@3.6.0:
resolution: {integrity: sha512-H2/S7Pm8a9CL1uhp9OvjwrBh5Pvx0H8qVOxNu8Wed9Y7qv56MPtq+GGM8RJpq6glYJn9Wspr8uw7l55uyinNeg==}
peerDependencies:
@@ -6452,6 +6521,10 @@ packages:
resolution: {integrity: sha512-5v6yZd4JK3eMI3FqqCouswVqwugaA9r4dNZB1wwcmrD02QkV5H0y7XBQW8QwQqEaZY1pM9aqORSORhJRdNK44Q==}
engines: {node: '>=6.0'}
+ growl@1.10.5:
+ resolution: {integrity: sha512-qBr4OuELkhPenW6goKVXiv47US3clb3/IbuWF9KNKEijAy9oeHxU9IgzjvJhHkUzhaj7rOUD7+YGWqUjLp5oSA==}
+ engines: {node: '>=4.x'}
+
gtoken@7.1.0:
resolution: {integrity: sha512-pCcEwRi+TKpMlxAQObHDQ56KawURgyAf6jtIY046fJ5tIv3zDe/LEIubckAO8fj6JnAxLdmWkUfNyulQ2iKdEw==}
engines: {node: '>=14.0.0'}
@@ -6542,6 +6615,10 @@ packages:
hastscript@9.0.1:
resolution: {integrity: sha512-g7df9rMFX/SPi34tyGCyUBREQoKkapwdY/T04Qn9TDWfHhAYt4/I0gMVirzK5wEzeUqIjEB+LXC/ypb7Aqno5w==}
+ he@1.1.1:
+ resolution: {integrity: sha512-z/GDPjlRMNOa2XJiB4em8wJpuuBfrFOlYKTZxtpkdr1uPdibHI8rYA3MY0KDObpVyaes0e/aunid/t88ZI2EKA==}
+ hasBin: true
+
he@1.2.0:
resolution: {integrity: sha512-F/1DnUGPopORZi0ni+CvrCgHQ5FyEAHRLSApuYWMmrbSwoN2Mn/7k+Gl38gJnR7yyDZk6WLXwiGod1JOWNDKGw==}
hasBin: true
@@ -6584,10 +6661,26 @@ packages:
resolution: {integrity: sha512-FtwrG/euBzaEjYeRqOgly7G0qviiXoJWnvEH2Z1plBdXgbyjv34pHTSb9zoeHMyDy33+DWy5Wt9Wo+TURtOYSQ==}
engines: {node: '>= 0.8'}
+ http-proxy-agent@2.1.0:
+ resolution: {integrity: sha512-qwHbBLV7WviBl0rQsOzH6o5lwyOIvwp/BdFnvVxXORldu5TmjFfjzBcWUWS5kWAZhmv+JtiDhSuQCp4sBfbIgg==}
+ engines: {node: '>= 4.5.0'}
+
+ http-proxy-agent@4.0.1:
+ resolution: {integrity: sha512-k0zdNgqWTGA6aeIRVpvfVob4fL52dTfaehylg0Y4UvSySvOq/Y+BOyPrgpUrA7HylqvU8vIZGsRuXmspskV0Tg==}
+ engines: {node: '>= 6'}
+
http-proxy-agent@7.0.2:
resolution: {integrity: sha512-T1gkAiYYDWYx3V5Bmyu7HcfcvL7mUrTWiM6yOfa3PIphViJ/gFPbvidQ+veqSOHci/PxBcDabeUNCzpOODJZig==}
engines: {node: '>= 14'}
+ https-proxy-agent@2.2.4:
+ resolution: {integrity: sha512-OmvfoQ53WLjtA9HeYP9RNrWMJzzAz1JGaSFr1nijg0PVR1JaD/xbJq1mdEIIlxGpXp9eSe/O2LgU9DJmTPd0Eg==}
+ engines: {node: '>= 4.5.0'}
+
+ https-proxy-agent@5.0.1:
+ resolution: {integrity: sha512-dFcAjpTQFgoLMzC2VwU+C/CbS7uRL0lWmxDITmqm7C+7F0Odmj6s9l6alZc6AELXhrnggM2CeWSXHGOdX2YtwA==}
+ engines: {node: '>= 6'}
+
https-proxy-agent@7.0.6:
resolution: {integrity: sha512-vK9P5/iUfdl95AI+JVyUuIcVtd4ofvtrOr3HNtM2yxC9bnMbEdp3x01OhQNnjb8IJYi38VlTE3mBXwcfvywuSw==}
engines: {node: '>= 14'}
@@ -7757,6 +7850,9 @@ packages:
resolution: {integrity: sha512-enIvLvRAFZYXJzkCYG5RKmPfrFArdLv+R+lbQ53BmIMLIry74bjKzX6iHAm8WYamJkhSSEabrWN5D97XnKObjQ==}
engines: {node: 20 || >=22}
+ minimatch@3.0.4:
+ resolution: {integrity: sha512-yJHVQEhyqPLUTgt9B83PXu6W3rx4MvvHvSUvToogpwoGDOUQ+yDrR0HRot+yOCdCO7u4hX3pWft6kWBBcqh0UA==}
+
minimatch@3.1.2:
resolution: {integrity: sha512-J7p63hRiAjw1NDEww1W7i37+ByIrOWO5XQQAzZ3VOcL0PNybwpfmV/N05zFAzwQ9USyEcX6t3UO+K5aqBQOIHw==}
@@ -7768,6 +7864,9 @@ packages:
resolution: {integrity: sha512-G6T0ZX48xgozx7587koeX9Ys2NYy6Gmv//P89sEte9V9whIapMNF4idKxnW2QtCcLiTWlb/wfCabAtAFWhhBow==}
engines: {node: '>=16 || 14 >=14.17'}
+ minimist@0.0.8:
+ resolution: {integrity: sha512-miQKw5Hv4NS1Psg2517mV4e4dYNaO3++hjAvLOAzKqZ61rH8NS1SK+vbfBWZ5PY/Me/bEWhUwqMghEW5Fb9T7Q==}
+
minimist@1.2.8:
resolution: {integrity: sha512-2yyAR8qBkN3YuheJanUpWC5U3bb5osDywNB8RzDVlDwDHbocAJveqqj1u8+SVD7jkWT4yvsHCpWqqWqAxb0zCA==}
@@ -7785,6 +7884,11 @@ packages:
mkdirp-classic@0.5.3:
resolution: {integrity: sha512-gKLcREMhtuZRwRAfqP3RFW+TK4JqApVBtOIftVgjuABpAtpxhPGaDcfvbhNvD0B8iD1oUr/txX35NjcaY6Ns/A==}
+ mkdirp@0.5.1:
+ resolution: {integrity: sha512-SknJC52obPfGQPnjIkXbmA6+5H15E+fR+E4iR2oQ3zzCLbd7/ONua69R/Gw7AgkTLsRG+r5fzksYwWe1AgTyWA==}
+ deprecated: Legacy versions of mkdirp are no longer supported. Please update to mkdirp 1.x. (Note that the API surface has changed to use Promises in 1.x.)
+ hasBin: true
+
mkdirp@0.5.6:
resolution: {integrity: sha512-FP+p8RB8OWpF3YZBCrP5gtADmtXApB5AMLn+vdyA+PyxCjrCs00mjyUozssO33cwDeT3wNGdLxJ5M//YqtHAJw==}
hasBin: true
@@ -7807,6 +7911,11 @@ packages:
engines: {node: ^18.18.0 || ^20.9.0 || >=21.1.0}
hasBin: true
+ mocha@5.2.0:
+ resolution: {integrity: sha512-2IUgKDhc3J7Uug+FxMXuqIyYzH7gJjXECKe/w43IGgQHTSj3InJi+yAA7T24L9bQMRKiUEHxEX37G5JpVUGLcQ==}
+ engines: {node: '>= 4.0.0'}
+ hasBin: true
+
monaco-vscode-textmate-theme-converter@0.1.7:
resolution: {integrity: sha512-ZMsq1RPWwOD3pvXD0n+9ddnhfzZoiUMwNIWPNUqYqEiQeH2HjyZ9KYOdt/pqe0kkN8WnYWLrxT9C/SrtIsAu2Q==}
hasBin: true
@@ -7827,6 +7936,9 @@ packages:
resolution: {integrity: sha512-Y3wQdFg2Va6etvQ5I82yUhGdsKrcYox6p7FfL1LbK2J4V01F9TGlepTIhnK24t7koZibmg82KGglhA1XK5IsLQ==}
engines: {node: '>=10'}
+ ms@2.0.0:
+ resolution: {integrity: sha512-Tpp60P6IUJDTuOq/5Z8cdskzJujfwqfOTkrwIwj7IRISpnkJnT6SyJ4PCPnGMoFjC9ddhal5KVIYtAt97ix05A==}
+
ms@2.1.3:
resolution: {integrity: sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==}
@@ -9393,6 +9505,10 @@ packages:
engines: {node: '>=16 || 14 >=14.17'}
hasBin: true
+ supports-color@5.4.0:
+ resolution: {integrity: sha512-zjaXglF5nnWpsq470jSv6P9DwPvgLkuapYmfDm3JWOm0vkNTVF2tI4UrN2r6jH1qM/uc/WtxYY1hYoA2dOKj5w==}
+ engines: {node: '>=4'}
+
supports-color@5.5.0:
resolution: {integrity: sha512-QjVjwdXIt408MIiAqCX4oUKsgU2EqAGzs2Ppkm4aQYbjm+ZEWEcW4SfFNTr4uMNZma0ey4f5lgLrkB0aX0QMow==}
engines: {node: '>=4'}
@@ -10086,9 +10202,20 @@ packages:
vscode-material-icons@0.1.1:
resolution: {integrity: sha512-GsoEEF8Tbb0yUFQ6N6FPvh11kFkL9F95x0FkKlbbfRQN9eFms67h+L3t6b9cUv58dSn2gu8kEhNfoESVCrz4ag==}
+ vscode-test@0.4.3:
+ resolution: {integrity: sha512-EkMGqBSefZH2MgW65nY05rdRSko15uvzq4VAPM5jVmwYuFQKE7eikKXNJDRxL+OITXHB6pI+a3XqqD32Y3KC5w==}
+ engines: {node: '>=8.9.3'}
+ deprecated: This package has been renamed to @vscode/test-electron, please update to the new name
+
vscode-uri@3.0.8:
resolution: {integrity: sha512-AyFQ0EVmsOZOlAnxoFOGOq1SQDWAB7C6aqMGS23svWAllfOaxbuFvcT8D1i8z3Gyn8fraVeZNNmN6e9bxxXkKw==}
+ vscode@1.1.37:
+ resolution: {integrity: sha512-vJNj6IlN7IJPdMavlQa1KoFB3Ihn06q1AiN3ZFI/HfzPNzbKZWPPuiU+XkpNOfGU5k15m4r80nxNPlM7wcc0wg==}
+ engines: {node: '>=8.9.3'}
+ deprecated: 'This package is deprecated in favor of @types/vscode and vscode-test. For more information please read: https://code.visualstudio.com/updates/v1_36#_splitting-vscode-package-into-typesvscode-and-vscodetest'
+ hasBin: true
+
vscrui@0.2.2:
resolution: {integrity: sha512-buw2OipqUl7GCBq1mxcAjUwoUsslGzVhdaxDPmEx27xzc3QAJJZHtT30QbakgZVJ1Jb3E6kcsguUIFEGxrgkyQ==}
peerDependencies:
@@ -11180,7 +11307,7 @@ snapshots:
'@babel/parser': 7.27.2
'@babel/template': 7.27.2
'@babel/types': 7.27.1
- debug: 4.4.1(supports-color@8.1.1)
+ debug: 4.4.3
globals: 11.12.0
transitivePeerDependencies:
- supports-color
@@ -13658,6 +13785,8 @@ snapshots:
dependencies:
'@testing-library/dom': 10.4.0
+ '@tootallnate/once@1.1.2': {}
+
'@tootallnate/quickjs-emscripten@0.23.0': {}
'@tybys/wasm-util@0.9.0':
@@ -14043,7 +14172,7 @@ snapshots:
dependencies:
'@typescript-eslint/typescript-estree': 8.32.1(typescript@5.8.3)
'@typescript-eslint/utils': 8.32.1(eslint@9.27.0(jiti@2.4.2))(typescript@5.8.3)
- debug: 4.4.1(supports-color@8.1.1)
+ debug: 4.4.3
eslint: 9.27.0(jiti@2.4.2)
ts-api-utils: 2.1.0(typescript@5.8.3)
typescript: 5.8.3
@@ -14056,7 +14185,7 @@ snapshots:
dependencies:
'@typescript-eslint/types': 8.32.1
'@typescript-eslint/visitor-keys': 8.32.1
- debug: 4.4.1(supports-color@8.1.1)
+ debug: 4.4.3
fast-glob: 3.3.3
is-glob: 4.0.3
minimatch: 9.0.5
@@ -14306,6 +14435,16 @@ snapshots:
acorn@8.15.0: {}
+ agent-base@4.3.0:
+ dependencies:
+ es6-promisify: 5.0.0
+
+ agent-base@6.0.2:
+ dependencies:
+ debug: 4.4.3
+ transitivePeerDependencies:
+ - supports-color
+
agent-base@7.1.3: {}
agentkeepalive@4.6.0:
@@ -14607,7 +14746,7 @@ snapshots:
dependencies:
bytes: 3.1.2
content-type: 1.0.5
- debug: 4.4.1(supports-color@8.1.1)
+ debug: 4.4.3
http-errors: 2.0.0
iconv-lite: 0.6.3
on-finished: 2.4.1
@@ -14953,6 +15092,8 @@ snapshots:
commander@14.0.0: {}
+ commander@2.15.1: {}
+
commander@4.1.1: {}
commander@6.2.1: {}
@@ -15312,6 +15453,12 @@ snapshots:
debounce@2.2.0: {}
+ debug@3.1.0(supports-color@5.4.0):
+ dependencies:
+ ms: 2.0.0
+ optionalDependencies:
+ supports-color: 5.4.0
+
debug@3.2.7:
dependencies:
ms: 2.1.3
@@ -15420,6 +15567,8 @@ snapshots:
diff-sequences@29.6.3: {}
+ diff@3.5.0: {}
+
diff@5.2.0: {}
dijkstrajs@1.0.3: {}
@@ -15698,6 +15847,12 @@ snapshots:
is-date-object: 1.1.0
is-symbol: 1.1.1
+ es6-promise@4.2.8: {}
+
+ es6-promisify@5.0.0:
+ dependencies:
+ es6-promise: 4.2.8
+
esbuild-register@3.6.0(esbuild@0.25.9):
dependencies:
debug: 4.4.1(supports-color@8.1.1)
@@ -16086,7 +16241,7 @@ snapshots:
extract-zip@2.0.1:
dependencies:
- debug: 4.4.1(supports-color@8.1.1)
+ debug: 4.4.3
get-stream: 5.2.0
yauzl: 2.10.0
optionalDependencies:
@@ -16187,7 +16342,7 @@ snapshots:
finalhandler@2.1.0:
dependencies:
- debug: 4.4.1(supports-color@8.1.1)
+ debug: 4.4.3
encodeurl: 2.0.0
escape-html: 1.0.3
on-finished: 2.4.1
@@ -16435,7 +16590,7 @@ snapshots:
dependencies:
basic-ftp: 5.0.5
data-uri-to-buffer: 6.0.2
- debug: 4.4.1(supports-color@8.1.1)
+ debug: 4.4.3
transitivePeerDependencies:
- supports-color
@@ -16524,6 +16679,8 @@ snapshots:
section-matter: 1.0.0
strip-bom-string: 1.0.0
+ growl@1.10.5: {}
+
gtoken@7.1.0:
dependencies:
gaxios: 6.7.1
@@ -16699,6 +16856,8 @@ snapshots:
property-information: 7.1.0
space-separated-tokens: 2.0.2
+ he@1.1.1: {}
+
he@1.2.0: {}
hex-rgb@4.3.0: {}
@@ -16740,6 +16899,21 @@ snapshots:
statuses: 2.0.1
toidentifier: 1.0.1
+ http-proxy-agent@2.1.0:
+ dependencies:
+ agent-base: 4.3.0
+ debug: 3.1.0(supports-color@5.4.0)
+ transitivePeerDependencies:
+ - supports-color
+
+ http-proxy-agent@4.0.1:
+ dependencies:
+ '@tootallnate/once': 1.1.2
+ agent-base: 6.0.2
+ debug: 4.4.3
+ transitivePeerDependencies:
+ - supports-color
+
http-proxy-agent@7.0.2:
dependencies:
agent-base: 7.1.3
@@ -16747,6 +16921,20 @@ snapshots:
transitivePeerDependencies:
- supports-color
+ https-proxy-agent@2.2.4:
+ dependencies:
+ agent-base: 4.3.0
+ debug: 3.2.7
+ transitivePeerDependencies:
+ - supports-color
+
+ https-proxy-agent@5.0.1:
+ dependencies:
+ agent-base: 6.0.2
+ debug: 4.4.3
+ transitivePeerDependencies:
+ - supports-color
+
https-proxy-agent@7.0.6:
dependencies:
agent-base: 7.1.3
@@ -17229,7 +17417,7 @@ snapshots:
lodash.isstring: 4.0.1
lodash.once: 4.1.1
ms: 2.1.3
- semver: 7.7.2
+ semver: 7.7.3
jsx-ast-utils@3.3.5:
dependencies:
@@ -18069,7 +18257,7 @@ snapshots:
micromark@2.11.4:
dependencies:
- debug: 4.4.1(supports-color@8.1.1)
+ debug: 4.4.3
parse-entities: 2.0.0
transitivePeerDependencies:
- supports-color
@@ -18134,6 +18322,10 @@ snapshots:
dependencies:
'@isaacs/brace-expansion': 5.0.0
+ minimatch@3.0.4:
+ dependencies:
+ brace-expansion: 2.0.2
+
minimatch@3.1.2:
dependencies:
brace-expansion: 2.0.2
@@ -18146,6 +18338,8 @@ snapshots:
dependencies:
brace-expansion: 2.0.2
+ minimist@0.0.8: {}
+
minimist@1.2.8: {}
minipass@7.1.2: {}
@@ -18159,6 +18353,10 @@ snapshots:
mkdirp-classic@0.5.3:
optional: true
+ mkdirp@0.5.1:
+ dependencies:
+ minimist: 0.0.8
+
mkdirp@0.5.6:
dependencies:
minimist: 1.2.8
@@ -18197,6 +18395,20 @@ snapshots:
yargs-parser: 21.1.1
yargs-unparser: 2.0.0
+ mocha@5.2.0:
+ dependencies:
+ browser-stdout: 1.3.1
+ commander: 2.15.1
+ debug: 3.1.0(supports-color@5.4.0)
+ diff: 3.5.0
+ escape-string-regexp: 1.0.5
+ glob: 11.1.0
+ growl: 1.10.5
+ he: 1.1.1
+ minimatch: 3.0.4
+ mkdirp: 0.5.1
+ supports-color: 5.4.0
+
monaco-vscode-textmate-theme-converter@0.1.7(tslib@2.8.1):
dependencies:
commander: 8.3.0
@@ -18213,6 +18425,8 @@ snapshots:
mrmime@2.0.1: {}
+ ms@2.0.0: {}
+
ms@2.1.3: {}
mute-stream@0.0.8: {}
@@ -18295,7 +18509,7 @@ snapshots:
node-abi@3.75.0:
dependencies:
- semver: 7.7.2
+ semver: 7.7.3
optional: true
node-addon-api@4.3.0:
@@ -18578,7 +18792,7 @@ snapshots:
dependencies:
'@tootallnate/quickjs-emscripten': 0.23.0
agent-base: 7.1.3
- debug: 4.4.1(supports-color@8.1.1)
+ debug: 4.4.3
get-uri: 6.0.4
http-proxy-agent: 7.0.2
https-proxy-agent: 7.0.6
@@ -18896,7 +19110,7 @@ snapshots:
proxy-agent@6.5.0:
dependencies:
agent-base: 7.1.3
- debug: 4.4.1(supports-color@8.1.1)
+ debug: 4.4.3
http-proxy-agent: 7.0.2
https-proxy-agent: 7.0.6
lru-cache: 7.18.3
@@ -19445,7 +19659,7 @@ snapshots:
router@2.2.0:
dependencies:
- debug: 4.4.1(supports-color@8.1.1)
+ debug: 4.4.3
depd: 2.0.0
is-promise: 4.0.0
parseurl: 1.3.3
@@ -19551,7 +19765,7 @@ snapshots:
send@1.2.0:
dependencies:
- debug: 4.4.1(supports-color@8.1.1)
+ debug: 4.4.3
encodeurl: 2.0.0
escape-html: 1.0.3
etag: 1.8.1
@@ -19768,7 +19982,7 @@ snapshots:
socks-proxy-agent@8.0.5:
dependencies:
agent-base: 7.1.3
- debug: 4.4.1(supports-color@8.1.1)
+ debug: 4.4.3
socks: 2.8.4
transitivePeerDependencies:
- supports-color
@@ -20044,6 +20258,10 @@ snapshots:
pirates: 4.0.7
ts-interface-checker: 0.1.13
+ supports-color@5.4.0:
+ dependencies:
+ has-flag: 3.0.0
+
supports-color@5.5.0:
dependencies:
has-flag: 3.0.0
@@ -20971,8 +21189,27 @@ snapshots:
vscode-material-icons@0.1.1: {}
+ vscode-test@0.4.3:
+ dependencies:
+ http-proxy-agent: 2.1.0
+ https-proxy-agent: 2.2.4
+ transitivePeerDependencies:
+ - supports-color
+
vscode-uri@3.0.8: {}
+ vscode@1.1.37:
+ dependencies:
+ glob: 11.1.0
+ http-proxy-agent: 4.0.1
+ https-proxy-agent: 5.0.1
+ mocha: 5.2.0
+ semver: 5.7.2
+ source-map-support: 0.5.21
+ vscode-test: 0.4.3
+ transitivePeerDependencies:
+ - supports-color
+
vscrui@0.2.2(@types/react@18.3.23)(react@18.3.1):
dependencies:
'@types/react': 18.3.23