From c3ad4d3127bfa273012f77ac39ea9ca11aac85dd Mon Sep 17 00:00:00 2001 From: Dennise Bartlett Date: Wed, 24 Dec 2025 09:28:12 -0800 Subject: [PATCH 1/3] feat: Add evally test runner and implementation for vscode-evals --- .gitignore | 4 +- apps/vscode-evals/.env.local.sample | 1 + apps/vscode-evals/.vscode-test.mjs | 16 + apps/vscode-evals/eslint.config.mjs | 4 + apps/vscode-evals/package.json | 26 + apps/vscode-evals/src/runTest.ts | 33 + .../src/suite/applyDiff.matrix.test.ts | 984 ++++++++++++++++++ apps/vscode-evals/src/suite/index.ts | 52 + apps/vscode-evals/src/suite/utils.ts | 40 + apps/vscode-evals/tsconfig.esm.json | 8 + apps/vscode-evals/tsconfig.json | 19 + knip.json | 3 +- packages/evally/examples/sampleMatrix.test.ts | 49 + packages/evally/package.json | 28 + packages/evally/src/cli/standaloneRunner.ts | 66 ++ packages/evally/src/index.ts | 2 + .../evally/src/runner/TestMatrixRunner.ts | 408 ++++++++ packages/evally/src/runner/types.ts | 42 + packages/evally/tsconfig.json | 19 + pnpm-lock.yaml | 264 ++++- 20 files changed, 2051 insertions(+), 17 deletions(-) create mode 100644 apps/vscode-evals/.env.local.sample create mode 100644 apps/vscode-evals/.vscode-test.mjs create mode 100644 apps/vscode-evals/eslint.config.mjs create mode 100644 apps/vscode-evals/package.json create mode 100644 apps/vscode-evals/src/runTest.ts create mode 100644 apps/vscode-evals/src/suite/applyDiff.matrix.test.ts create mode 100644 apps/vscode-evals/src/suite/index.ts create mode 100644 apps/vscode-evals/src/suite/utils.ts create mode 100644 apps/vscode-evals/tsconfig.esm.json create mode 100644 apps/vscode-evals/tsconfig.json create mode 100644 packages/evally/examples/sampleMatrix.test.ts create mode 100644 packages/evally/package.json create mode 100644 packages/evally/src/cli/standaloneRunner.ts create mode 100644 packages/evally/src/index.ts create mode 100644 packages/evally/src/runner/TestMatrixRunner.ts create mode 100644 packages/evally/src/runner/types.ts create mode 100644 packages/evally/tsconfig.json diff --git a/.gitignore b/.gitignore index 54cf66cee7a..74d3c4384e9 100644 --- a/.gitignore +++ b/.gitignore @@ -51,4 +51,6 @@ logs qdrant_storage/ # Architect plans -plans/ \ No newline at end of file +plans/ + +.results \ No newline at end of file diff --git a/apps/vscode-evals/.env.local.sample b/apps/vscode-evals/.env.local.sample new file mode 100644 index 00000000000..40c9da1bb44 --- /dev/null +++ b/apps/vscode-evals/.env.local.sample @@ -0,0 +1 @@ +OPENROUTER_API_KEY=sk-or-v1-... diff --git a/apps/vscode-evals/.vscode-test.mjs b/apps/vscode-evals/.vscode-test.mjs new file mode 100644 index 00000000000..c83f12c4bb6 --- /dev/null +++ b/apps/vscode-evals/.vscode-test.mjs @@ -0,0 +1,16 @@ +/** + * See: https://code.visualstudio.com/api/working-with-extensions/testing-extension + */ + +import { defineConfig } from "@vscode/test-cli" + +export default defineConfig({ + label: "integrationTest", + files: "out/suite/**/*.test.js", + workspaceFolder: ".", + mocha: { + ui: "tdd", + timeout: 60000, + }, + launchArgs: ["--enable-proposed-api=RooVeterinaryInc.roo-cline", "--disable-extensions"], +}) diff --git a/apps/vscode-evals/eslint.config.mjs b/apps/vscode-evals/eslint.config.mjs new file mode 100644 index 00000000000..694bf736642 --- /dev/null +++ b/apps/vscode-evals/eslint.config.mjs @@ -0,0 +1,4 @@ +import { config } from "@roo-code/config-eslint/base" + +/** @type {import("eslint").Linter.Config} */ +export default [...config] diff --git a/apps/vscode-evals/package.json b/apps/vscode-evals/package.json new file mode 100644 index 00000000000..0f788d5fe4e --- /dev/null +++ b/apps/vscode-evals/package.json @@ -0,0 +1,26 @@ +{ + "name": "@roo-code/vscode-evals", + "private": true, + "scripts": { + "lint": "eslint src --ext=ts --max-warnings=0", + "check-types": "tsc -p tsconfig.esm.json --noEmit", + "format": "prettier --write src", + "test:ci": "pnpm -w bundle && pnpm --filter @roo-code/vscode-webview build && pnpm test:run", + "test:run": "rimraf out && tsc -p tsconfig.json && npx dotenvx run -f .env.local -- node ./out/runTest.js", + "clean": "rimraf out .turbo" + }, + "devDependencies": { + "@roo-code/evally": "workspace:^", + "@roo-code/config-eslint": "workspace:^", + "@roo-code/config-typescript": "workspace:^", + "@roo-code/types": "workspace:^", + "@types/vscode": "^1.95.0", + "@vscode/test-cli": "^0.0.11", + "@vscode/test-electron": "^2.4.0", + "rimraf": "^6.0.1", + "typescript": "5.8.3" + }, + "dependencies": { + "vscode": "^1.1.37" + } +} diff --git a/apps/vscode-evals/src/runTest.ts b/apps/vscode-evals/src/runTest.ts new file mode 100644 index 00000000000..82394f24abe --- /dev/null +++ b/apps/vscode-evals/src/runTest.ts @@ -0,0 +1,33 @@ +import * as path from "path" +import * as os from "os" +import * as fs from "fs/promises" + +import { runTests } from "@vscode/test-electron" + +async function main() { + try { + const extensionDevelopmentPath = path.resolve(__dirname, "../../../src") + const extensionTestsPath = path.resolve(__dirname, "./suite/index") + const testWorkspace = await fs.mkdtemp(path.join(os.tmpdir(), "roo-evals-workspace-")) + const testGrep = process.argv.find((arg, i) => process.argv[i - 1] === "--grep") || process.env.TEST_GREP + const testFile = process.argv.find((arg, i) => process.argv[i - 1] === "--file") || process.env.TEST_FILE + const extensionTestsEnv = { + ...process.env, + ...(testGrep && { TEST_GREP: testGrep }), + ...(testFile && { TEST_FILE: testFile }), + } + await runTests({ + extensionDevelopmentPath, + extensionTestsPath, + launchArgs: [testWorkspace], + extensionTestsEnv, + version: process.env.VSCODE_VERSION || "1.101.2", + }) + await fs.rm(testWorkspace, { recursive: true, force: true }) + } catch (error) { + console.error("Failed to run vscode evals", error) + process.exit(1) + } +} + +main() diff --git a/apps/vscode-evals/src/suite/applyDiff.matrix.test.ts b/apps/vscode-evals/src/suite/applyDiff.matrix.test.ts new file mode 100644 index 00000000000..810f6ade8d1 --- /dev/null +++ b/apps/vscode-evals/src/suite/applyDiff.matrix.test.ts @@ -0,0 +1,984 @@ +import { afterAll, afterEach, beforeAll, beforeEach, defineMatrix, describe, it } from "@roo-code/evally" +import { RooCodeEventName, type RooCodeAPI, type ClineMessage } from "@roo-code/types" +import { strict as assert } from "assert" +import { waitFor, sleep } from "./utils" + +import * as fs from "fs/promises" +import * as path from "path" +interface NativeProtocolVerification { + hasNativeApiProtocol: boolean + apiProtocol: string | null + responseIsNotXML: boolean + toolWasExecuted: boolean + executedToolName: string | null +} + +function createVerificationState(): NativeProtocolVerification { + return { + hasNativeApiProtocol: false, + apiProtocol: null, + responseIsNotXML: true, + toolWasExecuted: false, + executedToolName: null, + } +} + +function assertNativeProtocolUsed(verification: NativeProtocolVerification, testName: string): void { + assert.ok( + verification.apiProtocol !== null, + `[${testName}] apiProtocol should be set in api_req_started message. This indicates an API request was made.`, + ) + + assert.strictEqual( + verification.hasNativeApiProtocol, + true, + `[${testName}] Native API protocol should be used. Expected apiProtocol to be "anthropic" or "openai", but got: ${verification.apiProtocol}`, + ) + + assert.strictEqual( + verification.responseIsNotXML, + true, + `[${testName}] Response should NOT contain XML tool tags. Found XML tags which indicates XML protocol was used instead of native.`, + ) + + assert.strictEqual( + verification.toolWasExecuted, + true, + `[${testName}] Tool should have been executed. Executed tool: ${verification.executedToolName || "none"}`, + ) +} + +function createNativeVerificationHandler( + verification: NativeProtocolVerification, + messages: ClineMessage[], + options: { + onError?: (error: string) => void + onApplyDiffExecuted?: () => void + debugLogging?: boolean + } = {}, +): (event: { message: ClineMessage }) => void { + const { onError, onApplyDiffExecuted, debugLogging = true } = options + + return ({ message }: { message: ClineMessage }) => { + messages.push(message) + + if (debugLogging) { + console.log(`[DEBUG] Message: type=${message.type}, say=${message.say}, ask=${message.ask}`) + } + + if (message.type === "say" && message.say === "error") { + const errorText = (message.text as string | undefined) || "Unknown error" + console.error("[ERROR]:", errorText) + onError?.(errorText) + } + + if (message.type === "ask" && message.ask === "tool") { + if (debugLogging && typeof message.text === "string") { + console.log("[DEBUG] Tool callback:", message.text.substring(0, 300)) + } + + try { + const toolData = JSON.parse((message.text as string) || "{}") + + if (toolData.tool) { + verification.toolWasExecuted = true + verification.executedToolName = toolData.tool + console.log(`[VERIFIED] Tool executed: ${toolData.tool}`) + } + + if (toolData.tool === "appliedDiff" || toolData.tool === "apply_diff") { + console.log("[TOOL] apply_diff tool executed") + onApplyDiffExecuted?.() + } + } catch { + if (debugLogging && typeof message.text === "string") { + console.log("[DEBUG] Tool callback not JSON:", message.text.substring(0, 100)) + } + } + } + + if (message.type === "say" && message.say === "api_req_started" && typeof message.text === "string") { + const rawText = message.text + if (debugLogging) { + console.log("[DEBUG] API request started:", rawText.substring(0, 200)) + } + + if (rawText.includes("apply_diff") || rawText.includes("appliedDiff")) { + verification.toolWasExecuted = true + verification.executedToolName = verification.executedToolName || "apply_diff" + console.log("[VERIFIED] Tool executed via raw text check: apply_diff") + onApplyDiffExecuted?.() + } + + try { + const requestData = JSON.parse(rawText) + + if (requestData.apiProtocol) { + verification.apiProtocol = requestData.apiProtocol + if (requestData.apiProtocol === "anthropic" || requestData.apiProtocol === "openai") { + verification.hasNativeApiProtocol = true + console.log(`[VERIFIED] API Protocol: ${requestData.apiProtocol}`) + } + } + + if ( + requestData.request && + (requestData.request.includes("apply_diff") || requestData.request.includes("appliedDiff")) + ) { + verification.toolWasExecuted = true + verification.executedToolName = "apply_diff" + console.log("[VERIFIED] Tool executed via parsed request: apply_diff") + onApplyDiffExecuted?.() + } + } catch (e) { + console.log("[DEBUG] Failed to parse api_req_started message:", e) + } + } + + if (message.type === "say" && message.say === "text" && typeof message.text === "string") { + const hasXMLToolTags = + message.text.includes("") || + message.text.includes("") || + message.text.includes("") || + message.text.includes("") + + if (hasXMLToolTags) { + verification.responseIsNotXML = false + console.log("[WARNING] Found XML tool tags in response - this indicates XML protocol") + } + } + + if (message.type === "say" && message.say === "completion_result" && typeof message.text === "string") { + if (debugLogging) { + console.log("[DEBUG] AI completion:", message.text.substring(0, 200)) + } + } + } +} + +const evalGroupVariables: { openRouterModelId: string }[] = [{ openRouterModelId: "openai/gpt-5.1" }] + +const now = Date.now() +const testFiles: Record< + string, + { + path: string + name: string + content: string + } +> = { + simpleModify: { + name: `test-file-simple-native-${now}.txt`, + content: "Hello World\nThis is a test file\nWith multiple lines", + path: ``, + }, + multipleReplace: { + name: `test-func-multiple-native-${now}.js`, + content: `function calculate(x, y) {\n const sum = x + y\n const product = x * y\n return { sum: sum, product: product }\n}`, + path: ``, + }, + lineNumbers: { + name: `test-lines-native-${now}.js`, + content: `// Header comment\nfunction oldFunction() {\n console.log("Old implementation")\n}\n\n// Another function\nfunction keepThis() {\n console.log("Keep this")\n}\n\n// Footer comment`, + path: ``, + }, + errorHandling: { + name: `test-error-native-${now}.txt`, + content: "Original content", + path: ``, + }, + multiSearchReplace: { + name: `test-multi-search-native-${now}.js`, + content: `function processData(data) {\n console.log("Processing data")\n return data.map(item => item * 2)\n}\n\n// Some other code in between\nconst config = {\n timeout: 5000,\n retries: 3\n}\n\nfunction validateInput(input) {\n console.log("Validating input")\n if (!input) {\n throw new Error("Invalid input")\n }\n return true\n}`, + path: ``, + }, +} + +function getTestWorkspaceDir(): string { + const fromGlobal = (globalThis as { rooTestWorkspaceDir?: string }).rooTestWorkspaceDir + if (typeof fromGlobal === "string" && fromGlobal.length > 0) { + return fromGlobal + } + return process.cwd() +} + +async function createTestFile(file: { name: string; content: string }): Promise { + const tmpPath = path.join(getTestWorkspaceDir(), file.name) + await fs.writeFile(tmpPath, file.content) + return tmpPath +} +async function resetTestFile(file: { name: string; content: string }): Promise { + const tmpPath = path.join(getTestWorkspaceDir(), file.name) + await fs.writeFile(tmpPath, file.content) + return tmpPath +} +async function removeTestFile(file: { name: string }): Promise { + const tmpPath = path.join(getTestWorkspaceDir(), file.name) + try { + await fs.unlink(tmpPath) + } catch { + void 0 + } +} + +export default defineMatrix({ + variables: evalGroupVariables, + iterations: 10, + tests: function () { + describe("Apply_diff Tool (Native Tool Calling)", function () { + let workspaceDir: string + beforeAll(async () => { + console.log("beforeAll Executed") + workspaceDir = getTestWorkspaceDir() + console.log("[INFO] Using workspace directory:", workspaceDir) + + console.log("Creating test files in workspace...") + for (const [key, file] of Object.entries(testFiles)) { + file.path = path.join(workspaceDir, file.name) + await fs.writeFile(file.path, file.content) + console.log(`Created ${key} test file at:`, file.path) + } + + for (const [key, file] of Object.entries(testFiles)) { + const exists = await fs + .access(file.path) + .then(() => true) + .catch(() => false) + if (!exists) { + throw new Error(`Failed to create ${key} test file at ${file.path}`) + } + } + }) + + beforeEach(async () => { + console.log("beforeEach Executed - resetting test files to original content") + await resetAllTestFiles() + }) + afterEach(async () => { + console.log("afterEach Executed") + }) + afterAll(async () => { + console.log("afterAll Executed") + }) + + it("should apply diff to modify file content and events (extension harness integrated)", async function ({ + variable, + }) { + const api: RooCodeAPI | undefined = (globalThis as { api?: RooCodeAPI }).api as RooCodeAPI | undefined + + if (!api) { + console.warn( + "[applyDiff.matrix] globalThis.api is not set; not running inside VSCode extension host. Skipping test.", + ) + return + } + + const file = testFiles.simpleModify + if (!file) throw new Error("Missing test file definition") + const expectedContent = "Hello Universe\nThis is a test file\nWith multiple lines" + await createTestFile(file) + + const messages: ClineMessage[] = [] + let taskStarted = false + let taskCompleted = false + let errorOccurred: string | null = null + let applyDiffExecuted = false + + const verification = createVerificationState() + + let taskId: string = "" + const messageHandler = createNativeVerificationHandler(verification, messages, { + onError: (error) => { + errorOccurred = error + }, + onApplyDiffExecuted: () => { + applyDiffExecuted = true + }, + debugLogging: true, + }) + const taskStartedHandler = (id: string) => { + if (id === taskId) taskStarted = true + } + const taskCompletedHandler = (id: string) => { + if (id === taskId) taskCompleted = true + } + api.on(RooCodeEventName.Message, messageHandler) + api.on(RooCodeEventName.TaskStarted, taskStartedHandler) + api.on(RooCodeEventName.TaskCompleted, taskCompletedHandler) + + let verboseLog = "" + function logMsg(msg: string) { + verboseLog += msg + "\n" + } + try { + console.log(variable.openRouterModelId) + taskId = await api.startNewTask({ + configuration: { + mode: "code", + autoApprovalEnabled: true, + alwaysAllowWrite: true, + alwaysAllowReadOnly: true, + alwaysAllowReadOnlyOutsideWorkspace: true, + toolProtocol: "native", + apiProvider: "openrouter", + openRouterModelId: variable.openRouterModelId, + }, + text: `Use apply_diff on the file ${file.name} to change "Hello World" to "Hello Universe". The file already exists with this content:\n${file.content}\nAssume the file exists and you can modify it directly.`, + }) + await waitFor(() => taskStarted, { timeout: 60000 }) + if (errorOccurred) { + logMsg("Task failed early with error: " + errorOccurred) + throw createVerboseError("Early error: " + errorOccurred, verboseLog, messages) + } + await waitFor(() => taskCompleted, { timeout: 60000 }) + await sleep(2000) + const actualContent = await fs.readFile( + file.path || path.join(getTestWorkspaceDir(), file.name), + "utf-8", + ) + try { + assert.strictEqual( + actualContent.trim(), + expectedContent.trim(), + "File was not modified by extension and diff!", + ) + } catch (e) { + logMsg("File content did not match expected output.") + logMsg("Expected:\n" + expectedContent) + logMsg("Actual:\n" + actualContent) + throw createVerboseError(e instanceof Error ? e.message : String(e), verboseLog, messages) + } + + assertNativeProtocolUsed(verification, "simpleModify") + + if (!applyDiffExecuted) { + logMsg("apply_diff tool was not executed!") + throw createVerboseError("apply_diff tool was not executed!", verboseLog, messages) + } + } catch (err) { + if (verboseLog || messages.length > 0) { + const lines = [ + "", + "========== DEBUG LOG ==========", + verboseLog.trim(), + "---------- Message History ----------", + ...messages.map((m) => JSON.stringify(m)), + "=====================================", + ] + console.error(lines.filter(Boolean).join("\n")) + } + throw err + } finally { + api.off(RooCodeEventName.Message, messageHandler) + api.off(RooCodeEventName.TaskStarted, taskStartedHandler) + api.off(RooCodeEventName.TaskCompleted, taskCompletedHandler) + await removeTestFile(file) + } + + function createVerboseError(text: string, log: string, msgArr: ClineMessage[]): Error { + let summary = `\n\n========== DEBUG LOG ==========` + if (log) summary += `\n${log.trim()}` + summary += + `\n---------- Message History ----------\n` + msgArr.map((m) => JSON.stringify(m)).join("\n") + summary += `\n=====================================` + return new Error(text + summary) + } + }) + it("Should apply multiple search/replace blocks in single diff using native tool calling", async function ({ + variable, + }) { + const api = (globalThis as { api?: RooCodeAPI }).api as RooCodeAPI + const messages: ClineMessage[] = [] + const testFile = testFiles.multipleReplace + if (!testFile) { + throw new Error("Missing test file definition: multipleReplace") + } + const expectedContent = + "function compute(a, b) {\n" + + " const total = a + b\n" + + " const result = a * b\n" + + " return { total: total, result: result }\n" + + "}" + let taskStarted = false + let taskCompleted = false + let applyDiffExecuted = false + + const verification = createVerificationState() + + const messageHandler = createNativeVerificationHandler(verification, messages, { + onApplyDiffExecuted: () => { + applyDiffExecuted = true + }, + debugLogging: true, + }) + api.on(RooCodeEventName.Message, messageHandler) + + const taskStartedHandler = (id: string) => { + if (id === taskId) { + taskStarted = true + console.log("Task started:", id) + } + } + api.on(RooCodeEventName.TaskStarted, taskStartedHandler) + + const taskCompletedHandler = (id: string) => { + if (id === taskId) { + taskCompleted = true + console.log("Task completed:", id) + } + } + api.on(RooCodeEventName.TaskCompleted, taskCompletedHandler) + + let taskId: string + try { + taskId = await api.startNewTask({ + configuration: { + mode: "code", + autoApprovalEnabled: true, + alwaysAllowWrite: true, + alwaysAllowReadOnly: true, + alwaysAllowReadOnlyOutsideWorkspace: true, + toolProtocol: "native", + apiProvider: "openrouter", + openRouterModelId: variable.openRouterModelId, + }, + text: `Use apply_diff on the file ${testFile.name} to make ALL of these changes: +1. Rename function "calculate" to "compute" +2. Rename parameters "x, y" to "a, b" +3. Rename variable "sum" to "total" (including in the return statement) +4. Rename variable "product" to "result" (including in the return statement) +5. In the return statement, change { sum: sum, product: product } to { total: total, result: result } + +The file already exists with this content: +${testFile.content} + +Assume the file exists and you can modify it directly.`, + }) + + console.log("Task ID:", taskId) + console.log("Test filename:", testFile.name) + + await waitFor(() => taskStarted, { timeout: 60_000 }) + + await waitFor(() => taskCompleted, { timeout: 60_000 }) + + await sleep(2000) + + const actualContent = await fs.readFile(testFile.path, "utf-8") + console.log("File content after modification:", actualContent) + + assertNativeProtocolUsed(verification, "multipleReplace") + + assert.strictEqual(applyDiffExecuted, true, "apply_diff tool should have been executed") + + assert.strictEqual( + actualContent.trim(), + expectedContent.trim(), + "All replacements should be applied correctly", + ) + + console.log( + "Test passed! apply_diff tool executed with VERIFIED native protocol and multiple replacements applied successfully", + ) + } finally { + api.off(RooCodeEventName.Message, messageHandler) + api.off(RooCodeEventName.TaskStarted, taskStartedHandler) + api.off(RooCodeEventName.TaskCompleted, taskCompletedHandler) + } + }) + it("Should handle apply_diff with line number hints using native tool calling", async function ({ + variable, + }) { + const api = (globalThis as { api?: RooCodeAPI }).api as RooCodeAPI + const messages: ClineMessage[] = [] + const testFile = testFiles.lineNumbers + if (!testFile) { + throw new Error("Missing test file definition: lineNumbers") + } + const expectedContent = + "// Header comment\n" + + "function newFunction() {\n" + + ' console.log("New implementation")\n' + + "}\n" + + "\n" + + "// Another function\n" + + "function keepThis() {\n" + + ' console.log("Keep this")\n' + + "}\n" + + "\n" + + "// Footer comment" + + let taskStarted = false + let taskCompleted = false + let applyDiffExecuted = false + + const verification = createVerificationState() + + const messageHandler = createNativeVerificationHandler(verification, messages, { + onApplyDiffExecuted: () => { + applyDiffExecuted = true + }, + debugLogging: true, + }) + api.on(RooCodeEventName.Message, messageHandler) + + const taskStartedHandler = (id: string) => { + if (id === taskId) { + taskStarted = true + } + } + api.on(RooCodeEventName.TaskStarted, taskStartedHandler) + + const taskCompletedHandler = (id: string) => { + if (id === taskId) { + taskCompleted = true + } + } + api.on(RooCodeEventName.TaskCompleted, taskCompletedHandler) + + let taskId: string + try { + taskId = await api.startNewTask({ + configuration: { + mode: "code", + autoApprovalEnabled: true, + alwaysAllowWrite: true, + alwaysAllowReadOnly: true, + alwaysAllowReadOnlyOutsideWorkspace: true, + toolProtocol: "native", + apiProvider: "openrouter", + openRouterModelId: variable.openRouterModelId, + }, + text: `Use apply_diff on the file ${testFile.name} to change "oldFunction" to "newFunction" and update its console.log to "New implementation". Keep the rest of the file unchanged. + +The file already exists with this content: +${testFile.content} + +Assume the file exists and you can modify it directly.`, + }) + + console.log("Task ID:", taskId) + console.log("Test filename:", testFile.name) + + await waitFor(() => taskStarted, { timeout: 60_000 }) + + await waitFor(() => taskCompleted, { timeout: 60_000 }) + + await sleep(2000) + + const actualContent = await fs.readFile(testFile.path, "utf-8") + console.log("File content after modification:", actualContent) + + assertNativeProtocolUsed(verification, "lineNumbers") + + assert.strictEqual(applyDiffExecuted, true, "apply_diff tool should have been executed") + + assert.strictEqual( + actualContent.trim(), + expectedContent.trim(), + "Only specified function should be modified", + ) + + console.log( + "Test passed! apply_diff tool executed with VERIFIED native protocol and targeted modification successful", + ) + } finally { + api.off(RooCodeEventName.Message, messageHandler) + api.off(RooCodeEventName.TaskStarted, taskStartedHandler) + api.off(RooCodeEventName.TaskCompleted, taskCompletedHandler) + } + }) + it("Should handle apply_diff errors gracefully using native tool calling", async function ({ variable }) { + const api = (globalThis as { api?: RooCodeAPI }).api as RooCodeAPI + const messages: ClineMessage[] = [] + const testFile = testFiles.errorHandling + if (!testFile) { + throw new Error("Missing test file definition: errorHandling") + } + let taskStarted = false + let taskCompleted = false + let errorDetected = false + let applyDiffAttempted = false + let writeToFileUsed = false + + const messageHandler = ({ message }: { message: ClineMessage }) => { + messages.push(message) + + if (message.type === "say" && message.say === "error") { + errorDetected = true + console.log("Error detected:", message.text) + } + + if (message.type === "ask" && message.ask === "tool") { + console.log("Tool ASK request:", message.text?.substring(0, 500)) + try { + const toolData = JSON.parse(message.text || "{}") + if (toolData.tool === "appliedDiff") { + applyDiffAttempted = true + console.log("apply_diff tool attempted via ASK!") + } + if (toolData.tool === "editedExistingFile" || toolData.tool === "newFileCreated") { + writeToFileUsed = true + console.log("write_to_file tool used!") + } + } catch (e) { + console.error(e) + } + } + + if (message.type === "say" && message.say === "diff_error") { + applyDiffAttempted = true + console.log("diff_error detected - apply_diff was attempted") + } + + if (message.type === "say" && message.say === "api_req_started" && message.text) { + console.log("API request started:", message.text.substring(0, 200)) + } + } + api.on(RooCodeEventName.Message, messageHandler) + + const taskStartedHandler = (id: string) => { + if (id === taskId) { + taskStarted = true + } + } + api.on(RooCodeEventName.TaskStarted, taskStartedHandler) + + const taskCompletedHandler = (id: string) => { + if (id === taskId) { + taskCompleted = true + } + } + api.on(RooCodeEventName.TaskCompleted, taskCompletedHandler) + + let taskId: string + try { + taskId = await api.startNewTask({ + configuration: { + mode: "code", + autoApprovalEnabled: true, + alwaysAllowWrite: true, + alwaysAllowReadOnly: true, + alwaysAllowReadOnlyOutsideWorkspace: true, + reasoningEffort: "none", + toolProtocol: "native", + apiProvider: "openrouter", + openRouterModelId: variable.openRouterModelId, + }, + text: ` +--- +description: Test apply_diff tool error handling with non-existent patterns +argument-hint: [search-pattern] +--- + + +Test the apply_diff tool's error handling by attempting to replace a pattern that does not exist in the target file. +Target File: ${testFile.name} +Search pattern: "PATTERN_THAT_DOES_NOT_EXIST_xyz123" +Replacement: "REPLACEMENT_xyz123" + + + +This command verifies that apply_diff correctly handles and reports errors when: +- A search pattern is not found in the target file +- The tool gracefully fails with an informative error message +- Error handling works as expected for debugging workflows + + + + + Execute apply_diff directly +
+ Call apply_diff on the specified file with a non-existent search pattern. + Do NOT analyze the file first - the goal is to test error handling. +
+
+ + + Observe the error response +
+ The apply_diff tool should report that the pattern was not found. + This is the EXPECTED outcome - not a failure of the test. +
+
+ + + Report results +
+ Confirm whether the error handling worked correctly by reporting: + - The error message received + - Whether the tool behaved as expected +
+
+
+ + + + - YOU MUST call the apply_diff tool - this is non-negotiable + - Use the EXACT search pattern provided (or default: "PATTERN_THAT_DOES_NOT_EXIST_xyz123") + - Do NOT use write_to_file or any other file modification tool + - Do NOT analyze the file contents before calling apply_diff + - Do NOT refuse to call the tool - error handling verification is the purpose + + + + PATTERN_THAT_DOES_NOT_EXIST_xyz123 + REPLACEMENT_xyz123 + + + + + + Use this structure for the apply_diff call: + - path: The file specified by the user + - diff: A SEARCH/REPLACE block with the non-existent pattern + + + + \`\`\` + <<<<<<< SEARCH + :start_line:1 + ------- + PATTERN_THAT_DOES_NOT_EXIST_xyz123 + ======= + REPLACEMENT_xyz123 + >>>>>>> REPLACE + \`\`\` + + + + + + The test succeeds when apply_diff returns an error indicating the pattern was not found. + This confirms the tool's error handling is working correctly. + + + + After executing, report: + - Whether apply_diff was called: YES/NO + - Error message received: [actual error] + - Error handling status: WORKING/FAILED + + + + + - Only use the apply_diff tool + - Accept that "pattern not found" errors are the expected result + - Do not attempt to "fix" the test by finding real patterns + - This is a diagnostic/testing command, not a production workflow +`, + }) + + console.log("Task ID:", taskId) + console.log("Test filename:", testFile.name) + await waitFor(() => taskStarted, { timeout: 90_000 }) + + await waitFor(() => taskCompleted || errorDetected, { timeout: 90_000 }) + + await sleep(2000) + + const actualContent = await fs.readFile(testFile.path, "utf-8") + console.log("File content after task:", actualContent) + console.log("applyDiffAttempted:", applyDiffAttempted) + console.log("writeToFileUsed:", writeToFileUsed) + + assert.strictEqual(applyDiffAttempted, true, "apply_diff tool should have been attempted") + + assert.strictEqual( + writeToFileUsed, + false, + "write_to_file should NOT be used when apply_diff fails - the AI should report the error instead", + ) + + assert.strictEqual( + actualContent.trim(), + testFile.content.trim(), + "File content should remain unchanged when search pattern not found", + ) + + console.log("Test passed! apply_diff attempted with native protocol and error handled gracefully") + } finally { + api.off(RooCodeEventName.Message, messageHandler) + api.off(RooCodeEventName.TaskStarted, taskStartedHandler) + api.off(RooCodeEventName.TaskCompleted, taskCompletedHandler) + } + }) + it("Should apply multiple search/replace blocks to edit two separate functions using native tool calling", async function ({ + variable, + }) { + const api = (globalThis as { api?: RooCodeAPI }).api as RooCodeAPI + const messages: ClineMessage[] = [] + const testFile = testFiles.multiSearchReplace + if (!testFile) { + throw new Error("Missing test file definition: multiSearchReplace") + } + const expectedContent = + "function transformData(data) {\n" + + ' console.log("Transforming data")\n' + + " return data.map(item => item * 2)\n" + + "}\n" + + "\n" + + "// Some other code in between\n" + + "const config = {\n" + + " timeout: 5000,\n" + + " retries: 3\n" + + "}\n" + + "\n" + + "function checkInput(input) {\n" + + ' console.log("Checking input")\n' + + " if (!input) {\n" + + ' throw new Error("Invalid input")\n' + + " }\n" + + " return true\n" + + "}" + let taskStarted = false + let taskCompleted = false + let errorOccurred: string | null = null + let applyDiffExecuted = false + let applyDiffCount = 0 + + const verification = createVerificationState() + + const messageHandler = ({ message }: { message: ClineMessage }) => { + messages.push(message) + + if (message.type === "say" && message.say === "error") { + errorOccurred = message.text || "Unknown error" + console.error("Error:", message.text) + } + if (message.type === "ask" && message.ask === "tool") { + console.log("Tool request:", message.text?.substring(0, 200)) + try { + const toolData = JSON.parse(message.text || "{}") + if (toolData.tool) { + verification.toolWasExecuted = true + verification.executedToolName = toolData.tool + console.log(`[VERIFIED] Tool executed: ${toolData.tool}`) + } + if (toolData.tool === "appliedDiff") { + applyDiffExecuted = true + applyDiffCount++ + console.log(`apply_diff tool executed! (count: ${applyDiffCount})`) + } + } catch (_e) { + void _e + } + } + if (message.type === "say" && (message.say === "completion_result" || message.say === "text")) { + console.log("AI response:", message.text?.substring(0, 200)) + if (message.say === "text" && message.text) { + const hasXMLToolTags = + message.text.includes("") || message.text.includes("") + if (hasXMLToolTags) { + verification.responseIsNotXML = false + console.log("[WARNING] Found XML tool tags in response") + } + } + } + + if (message.type === "say" && message.say === "api_req_started" && message.text) { + console.log("API request started:", message.text.substring(0, 200)) + try { + const requestData = JSON.parse(message.text) + if (requestData.apiProtocol) { + verification.apiProtocol = requestData.apiProtocol + if (requestData.apiProtocol === "anthropic" || requestData.apiProtocol === "openai") { + verification.hasNativeApiProtocol = true + console.log(`[VERIFIED] API Protocol: ${requestData.apiProtocol}`) + } + } + } catch (e) { + console.log("Failed to parse api_req_started message:", e) + } + } + } + api.on(RooCodeEventName.Message, messageHandler) + + const taskStartedHandler = (id: string) => { + if (id === taskId) { + taskStarted = true + console.log("Task started:", id) + } + } + api.on(RooCodeEventName.TaskStarted, taskStartedHandler) + + const taskCompletedHandler = (id: string) => { + if (id === taskId) { + taskCompleted = true + console.log("Task completed:", id) + } + } + api.on(RooCodeEventName.TaskCompleted, taskCompletedHandler) + + let taskId: string + try { + taskId = await api.startNewTask({ + configuration: { + mode: "code", + autoApprovalEnabled: true, + alwaysAllowWrite: true, + alwaysAllowReadOnly: true, + alwaysAllowReadOnlyOutsideWorkspace: true, + toolProtocol: "native", + apiProvider: "openrouter", + openRouterModelId: variable.openRouterModelId, + }, + text: `Use apply_diff on the file ${testFile.name} to make these changes. You MUST use TWO SEPARATE search/replace blocks within a SINGLE apply_diff call: + +FIRST search/replace block: Edit the processData function to rename it to "transformData" and change "Processing data" to "Transforming data" + +SECOND search/replace block: Edit the validateInput function to rename it to "checkInput" and change "Validating input" to "Checking input" + +Important: Use multiple SEARCH/REPLACE blocks in one apply_diff call, NOT multiple apply_diff calls. Each function should have its own search/replace block. + +The file already exists with this content: +${testFile.content} + +Assume the file exists and you can modify it directly.`, + }) + + console.log("Task ID:", taskId) + console.log("Test filename:", testFile.name) + + await waitFor(() => taskStarted, { timeout: 60_000 }) + + if (errorOccurred) { + console.error("Early error detected:", errorOccurred) + } + + await waitFor(() => taskCompleted, { timeout: 60_000 }) + + await sleep(2000) + + const actualContent = await fs.readFile(testFile.path, "utf-8") + console.log("File content after modification:", actualContent) + + assertNativeProtocolUsed(verification, "multiSearchReplace") + + assert.strictEqual(applyDiffExecuted, true, "apply_diff tool should have been executed") + console.log(`apply_diff was executed ${applyDiffCount} time(s)`) + + assert.strictEqual( + actualContent.trim(), + expectedContent.trim(), + "Both functions should be modified with separate search/replace blocks", + ) + + console.log( + "Test passed! apply_diff tool executed with VERIFIED native protocol and multiple search/replace blocks applied successfully", + ) + } finally { + api.off(RooCodeEventName.Message, messageHandler) + api.off(RooCodeEventName.TaskStarted, taskStartedHandler) + api.off(RooCodeEventName.TaskCompleted, taskCompletedHandler) + } + }) + }) + }, +}) + +async function resetAllTestFiles() { + for (const file of Object.values(testFiles)) { + await resetTestFile(file) + } +} diff --git a/apps/vscode-evals/src/suite/index.ts b/apps/vscode-evals/src/suite/index.ts new file mode 100644 index 00000000000..41d8a34bfd2 --- /dev/null +++ b/apps/vscode-evals/src/suite/index.ts @@ -0,0 +1,52 @@ +import * as path from "path" +import * as vscode from "vscode" + +import { runMatrix } from "@roo-code/evally" +import type { MatrixSuiteDefinition } from "@roo-code/evally" +import type { RooCodeAPI } from "@roo-code/types" + +import { waitFor } from "./utils" + +type TestGlobals = typeof globalThis & { + api?: RooCodeAPI + rooTestWorkspaceDir?: string +} + +const getTestGlobals = (): TestGlobals => globalThis as TestGlobals + +export async function run() { + const extension = vscode.extensions.getExtension("RooVeterinaryInc.roo-cline") + + if (!extension) { + throw new Error("Extension not found") + } + + const api = extension.isActive ? extension.exports : await extension.activate() + + await api.setConfiguration({ + apiProvider: "openrouter" as const, + openRouterApiKey: process.env.OPENROUTER_API_KEY!, + openRouterModelId: process.env.OPENROUTER_MODEL_ID || "openai/gpt-5.1", + }) + + await vscode.commands.executeCommand("roo-cline.SidebarProvider.focus") + await waitFor(() => api.isReady()) + const globals = getTestGlobals() + globals.api = api + + const workspaceDir = vscode.workspace.workspaceFolders?.[0]?.uri.fsPath + if (workspaceDir) { + globals.rooTestWorkspaceDir = workspaceDir + } + + const suiteModule = await import(path.resolve(__dirname, "./applyDiff.matrix.test")) + const moduleDefault = (suiteModule as { default?: MatrixSuiteDefinition }).default + + if (!moduleDefault || typeof moduleDefault !== "object" || typeof moduleDefault.tests !== "function") { + throw new Error("Skipping applyDiff.matrix.test: No valid matrix suite export") + } + + const suiteDef: MatrixSuiteDefinition = moduleDefault + + await runMatrix(suiteDef) +} diff --git a/apps/vscode-evals/src/suite/utils.ts b/apps/vscode-evals/src/suite/utils.ts new file mode 100644 index 00000000000..85fe43f0fda --- /dev/null +++ b/apps/vscode-evals/src/suite/utils.ts @@ -0,0 +1,40 @@ +type WaitForOptions = { + timeout?: number + interval?: number +} + +export const waitFor = ( + condition: (() => Promise) | (() => boolean), + { timeout = 30_000, interval = 250 }: WaitForOptions = {}, +) => { + let timeoutId: NodeJS.Timeout | undefined = undefined + + return Promise.race([ + new Promise((resolve) => { + const check = async () => { + const result = condition() + const isSatisfied = result instanceof Promise ? await result : result + + if (isSatisfied) { + if (timeoutId) { + clearTimeout(timeoutId) + timeoutId = undefined + } + + resolve() + } else { + setTimeout(check, interval) + } + } + + check() + }), + new Promise((_, reject) => { + timeoutId = setTimeout(() => { + reject(new Error(`Timeout after ${Math.floor(timeout / 1000)}s`)) + }, timeout) + }), + ]) +} + +export const sleep = (ms: number) => new Promise((resolve) => setTimeout(resolve, ms)) diff --git a/apps/vscode-evals/tsconfig.esm.json b/apps/vscode-evals/tsconfig.esm.json new file mode 100644 index 00000000000..e2f212fab99 --- /dev/null +++ b/apps/vscode-evals/tsconfig.esm.json @@ -0,0 +1,8 @@ +{ + "extends": "@roo-code/config-typescript/base.json", + "compilerOptions": { + "outDir": "out" + }, + "include": ["src"], + "exclude": ["node_modules"] +} diff --git a/apps/vscode-evals/tsconfig.json b/apps/vscode-evals/tsconfig.json new file mode 100644 index 00000000000..a712ea84d78 --- /dev/null +++ b/apps/vscode-evals/tsconfig.json @@ -0,0 +1,19 @@ +{ + "compilerOptions": { + "module": "CommonJS", + "moduleResolution": "Node", + "esModuleInterop": true, + "target": "ES2022", + "lib": ["ES2022", "ESNext.Disposable", "DOM"], + "sourceMap": true, + "strict": true, + "skipLibCheck": true, + "useUnknownInCatchVariables": false, + "outDir": "out", + "composite": false, + "types": ["node"], + "baseUrl": "./src" + }, + "include": ["src/**/*"], + "exclude": [".vscode-test", "**/node_modules/**", "out"] +} diff --git a/knip.json b/knip.json index e15c62bda1b..e8a3bfd24d1 100644 --- a/knip.json +++ b/knip.json @@ -3,6 +3,7 @@ "ignore": [ "**/__tests__/**", "apps/vscode-e2e/**", + "apps/vscode-evals/**", "src/extension/api.ts", "src/activate/**", "src/workers/countTokens.ts", @@ -19,7 +20,7 @@ "entry": ["src/index.tsx", "src/browser-panel.tsx"], "project": ["src/**/*.{ts,tsx}", "../src/shared/*.ts"] }, - "packages/{build,cloud,evals,ipc,telemetry,types}": { + "packages/{build,cloud,evally,evals,ipc,telemetry,types}": { "project": ["src/**/*.ts"] } } diff --git a/packages/evally/examples/sampleMatrix.test.ts b/packages/evally/examples/sampleMatrix.test.ts new file mode 100644 index 00000000000..ac86ac26eb7 --- /dev/null +++ b/packages/evally/examples/sampleMatrix.test.ts @@ -0,0 +1,49 @@ +import { defineMatrix, it, describe } from "../src/runner/TestMatrixRunner" +import type { MatrixTestContext } from "../src/runner/types" + +export default defineMatrix({ + variables: [ + { api: "API_A", url: "https://api-a.test", region: "us-east" }, + { api: "API_B", url: "https://api-b.test", region: "eu-west" }, + { api: "API_C", url: "https://api-c.test", region: "asia-pac" }, + ], + iterations: 3, + tests: () => { + // First suite + describe("API Health Checks", () => { + it("should respond with status 200", async ({ variable, iteration }: MatrixTestContext) => { + if (!variable.url.startsWith("https://")) throw new Error("Invalid URL") + }) + it("should have a valid api name", ({ variable }: MatrixTestContext) => { + if (!variable.api) throw new Error("Missing api name") + }) + it("should include a valid region", ({ variable }: MatrixTestContext) => { + if (!["us-east", "eu-west", "asia-pac"].includes(variable.region)) throw new Error("Unexpected region") + }) + }) + // Second suite + describe("Authentication", () => { + it("should fail for no token", async ({ variable }: MatrixTestContext) => { + if (variable.api === "API_B") throw new Error("No token failure") + }) + it("should pass with valid token", async ({ variable }: MatrixTestContext) => { + if (!variable.url.includes("api-")) throw new Error("No api in URL") + }) + it("should region match policy", ({ variable }: MatrixTestContext) => { + if (variable.region === "asia-pac" && variable.api !== "API_C") throw new Error("policy fail") + }) + }) + // Third suite + describe("Data validation", () => { + it("should have url with a dot", ({ variable }: MatrixTestContext) => { + if (!variable.url.includes(".")) throw new Error("URL missing dot") + }) + it("api name upper-case only", ({ variable }: MatrixTestContext) => { + if (!/^[A-Z_]+$/.test(variable.api)) throw new Error("API name format") + }) + it("region code format", ({ variable }: MatrixTestContext) => { + if (!variable.region.includes("-")) throw new Error("Bad region code") + }) + }) + }, +}) diff --git a/packages/evally/package.json b/packages/evally/package.json new file mode 100644 index 00000000000..dd76bbf0377 --- /dev/null +++ b/packages/evally/package.json @@ -0,0 +1,28 @@ +{ + "name": "@roo-code/evally", + "version": "0.1.0", + "private": false, + "main": "dist/index.js", + "types": "dist/index.d.ts", + "exports": { + ".": { + "import": "./dist/index.js", + "require": "./dist/index.js", + "types": "./dist/index.d.ts" + }, + "./runner/TestMatrixRunner": { + "import": "./dist/runner/TestMatrixRunner.js", + "require": "./dist/runner/TestMatrixRunner.js", + "types": "./dist/runner/types.d.ts" + }, + "./runner/types": { + "import": "./dist/runner/types.js", + "require": "./dist/runner/types.js", + "types": "./dist/runner/types.d.ts" + } + }, + "scripts": { + "build": "tsc --outDir dist --declaration --declarationDir dist", + "test:run": "tsx src/cli/standaloneRunner.ts examples/sampleMatrixTest.ts" + } +} diff --git a/packages/evally/src/cli/standaloneRunner.ts b/packages/evally/src/cli/standaloneRunner.ts new file mode 100644 index 00000000000..40d9d2188a7 --- /dev/null +++ b/packages/evally/src/cli/standaloneRunner.ts @@ -0,0 +1,66 @@ +#!/usr/bin/env node +import * as path from "path" +import { pathToFileURL } from "url" + +async function main() { + const file = process.argv[2] + if (!file) { + console.error("Usage: standaloneRunner ") + process.exit(1) + } + const absPath = path.resolve(process.cwd(), file) + let mod + try { + mod = await import(pathToFileURL(absPath).toString()) + } catch (err) { + console.error(`Failed to load test file: ${err}`) + process.exit(1) + } + if (!mod.default || !mod.default.variables || typeof mod.default.tests !== "function") { + console.error("Test file does not export a valid matrix test definition as default.") + process.exit(1) + } + const { runMatrix } = await import("../runner/TestMatrixRunner") + const results = await runMatrix(mod.default) + console.log("\n--- MATRIX TEST RESULTS ---") + for (const r of results) { + const vdesc = JSON.stringify(r.variable) + if (r.passed) { + console.log(`PASS [${vdesc}] [iteration: ${r.iteration}] ${r.testName}`) + } else { + console.log(`FAIL [${vdesc}] [iteration: ${r.iteration}] ${r.testName} Error: ${r.error}`) + } + } + const suiteSummary = new Map() + for (const r of results) { + if (!suiteSummary.has(r.suite)) suiteSummary.set(r.suite, new Map()) + const testMap = suiteSummary.get(r.suite) + if (!testMap.has(r.testName)) testMap.set(r.testName, new Map()) + const varKey = JSON.stringify(r.variable) + const varMap = testMap.get(r.testName) + if (!varMap.has(varKey)) varMap.set(varKey, { variable: r.variable, total: 0, passes: 0, fails: 0 }) + const counts = varMap.get(varKey) + counts.total++ + if (r.passed) counts.passes++ + else counts.fails++ + } + console.log("\n--- SUITE/TEST-LEVEL SUMMARY (Pass Rate per variable set, grouped by suite/test) ---") + for (const [suite, testMap] of suiteSummary.entries()) { + console.log(`Suite: ${suite}`) + for (const [testName, varMap] of testMap.entries()) { + console.log(` Test: ${testName}`) + for (const { variable, total, passes, fails } of varMap.values()) { + const percent = ((passes / total) * 100).toFixed(1) + console.log( + ` Vars: ${JSON.stringify(variable)}\n Pass: ${passes}/${total} (${percent}%) Fail: ${fails}/${total}`, + ) + } + } + } + const passes = results.filter((r) => r.passed).length + const fails = results.length - passes + console.log(`\nGlobal summary: ${passes} passed, ${fails} failed, total ${results.length}`) + process.exit(0) +} + +main() diff --git a/packages/evally/src/index.ts b/packages/evally/src/index.ts new file mode 100644 index 00000000000..bb2beef8217 --- /dev/null +++ b/packages/evally/src/index.ts @@ -0,0 +1,2 @@ +export * from "./runner/TestMatrixRunner.js" +export * from "./runner/types.js" diff --git a/packages/evally/src/runner/TestMatrixRunner.ts b/packages/evally/src/runner/TestMatrixRunner.ts new file mode 100644 index 00000000000..5a832f725de --- /dev/null +++ b/packages/evally/src/runner/TestMatrixRunner.ts @@ -0,0 +1,408 @@ +import type { + MatrixRunOptions, + MatrixTestDescription, + MatrixTestFn, + MatrixSuiteDefinition, + MatrixTestResult, +} from "./types" +import * as path from "path" +import * as fs from "fs/promises" + +const COLORS = { + reset: "\x1b[0m", + bold: "\x1b[1m", + dim: "\x1b[2m", + gray: "\x1b[90m", + green: "\x1b[32m", + red: "\x1b[31m", + yellow: "\x1b[33m", + cyan: "\x1b[36m", +} as const + +function color(text: string, code: string): string { + return `${code}${text}${COLORS.reset}` +} + +function formatDuration(ms: number): string { + if (ms >= 60_000) { + const minutes = Math.round(ms / 60_000) + return `${minutes}m` + } + if (ms >= 1_000) { + const seconds = ms / 1_000 + return `${seconds.toFixed(1)}s` + } + return `${ms}ms` +} +const testRegistry: MatrixTestDescription[] = [] +let currentSuite: string | null = null + +export function describe(suiteName: string, fn: () => void) { + const prevSuite = currentSuite + currentSuite = suiteName + fn() + currentSuite = prevSuite +} + +export function it(name: string, fn: MatrixTestFn) { + if (!currentSuite) throw new Error("Cannot declare test outside a describe(suiteName, ...) block.") + if (testRegistry.some((test) => test.suite === currentSuite && test.name === name)) { + throw new Error( + `Duplicate test name found in suite '${currentSuite}': ${name}. Test names must be unique within a suite.`, + ) + } + testRegistry.push({ suite: currentSuite, name, fn }) +} + +export const suite = describe +export const test = it + +type SetupFn = () => Promise | void +const _globalBeforeAll: SetupFn[] = [] +const _globalAfterAll: SetupFn[] = [] +const _globalBeforeEach: SetupFn[] = [] +const _globalAfterEach: SetupFn[] = [] + +export function beforeAll(fn: SetupFn) { + _globalBeforeAll.push(fn) +} +export const suiteSetup = beforeAll + +export function afterAll(fn: SetupFn) { + _globalAfterAll.push(fn) +} +export const suiteTeardown = afterAll + +export function beforeEach(fn: SetupFn) { + _globalBeforeEach.push(fn) +} +export const setup = beforeEach + +export function teardown(fn: SetupFn) { + _globalAfterEach.push(fn) +} +export const afterEach = teardown + +function clearGlobalHooks() { + _globalBeforeAll.length = 0 + _globalAfterAll.length = 0 + _globalBeforeEach.length = 0 + _globalAfterEach.length = 0 +} +export function defineMatrix(def: MatrixSuiteDefinition): MatrixSuiteDefinition { + if (typeof def.suiteSetup === "function") { + suiteSetup(def.suiteSetup) + } + if (typeof def.setup === "function") { + setup(def.setup) + } + if (typeof def.suiteTeardown === "function") { + suiteTeardown(def.suiteTeardown) + } + if (typeof def.teardown === "function") { + teardown(def.teardown) + } + return def +} + +export async function runMatrix(def: MatrixSuiteDefinition, opts?: MatrixRunOptions): Promise { + const startTime = Date.now() + const verbosity: MatrixRunOptions["verbosity"] = opts?.verbosity ?? "silent" + + const originalConsole = { + log: console.log, + info: console.info, + warn: console.warn, + error: console.error, + debug: console.debug, + } as const + + const runnerConsole = { + log: originalConsole.log, + error: originalConsole.error, + } as const + + if (verbosity === "silent") { + console.log = () => {} + console.info = () => {} + console.warn = () => {} + console.error = () => {} + console.debug = () => {} + } + testRegistry.length = 0 + currentSuite = null + + const results: MatrixTestResult[] = [] + + try { + def.tests() + + for (const hook of _globalBeforeAll) { + await Promise.resolve(hook()) + } + + for (const variable of def.variables) { + for (let i = 0; i < def.iterations; ++i) { + for (const t of testRegistry) { + for (const hook of _globalBeforeEach) { + await Promise.resolve(hook()) + } + + try { + await Promise.resolve(t.fn({ variable, iteration: i })) + results.push({ suite: t.suite, variable, iteration: i, testName: t.name, passed: true }) + } catch (e) { + results.push({ + suite: t.suite, + variable, + iteration: i, + testName: t.name, + passed: false, + error: e, + }) + } + for (const hook of _globalAfterEach) { + await Promise.resolve(hook()) + } + } + } + } + + for (const hook of _globalAfterAll) { + await Promise.resolve(hook()) + } + } finally { + console.log = originalConsole.log + console.info = originalConsole.info + console.warn = originalConsole.warn + console.error = originalConsole.error + console.debug = originalConsole.debug + } + + const shouldPrintSuitesAndTests = verbosity !== "silent" + + if (!opts || opts.report !== false) { + const grouped: Record = {} + for (const r of results) { + if (!grouped[r.suite]) grouped[r.suite] = [] + grouped[r.suite].push(r) + } + + if (shouldPrintSuitesAndTests) { + for (const suite in grouped) { + const suiteLabel = color("Suite:", COLORS.bold) + const suiteName = color(suite, COLORS.dim) + runnerConsole.log(`\n${suiteLabel} ${suiteName}`) + + const perTestStats: Record = {} + + for (const r of grouped[suite]) { + if (!perTestStats[r.testName]) { + perTestStats[r.testName] = { total: 0, passed: 0, failed: 0 } + } + perTestStats[r.testName].total += 1 + if (r.passed) perTestStats[r.testName].passed += 1 + else perTestStats[r.testName].failed += 1 + + const mark = r.passed ? color("✓", COLORS.green) : color("✗", COLORS.red) + const name = r.testName + const iterationTag = `[iteration ${r.iteration}]` + + const base = ` ${mark} ${name} ${color(iterationTag, COLORS.gray)}` + const errorSuffix = + r.passed || !r.error + ? "" + : " -- " + + color(r.error instanceof Error ? r.error.message : String(r.error), COLORS.yellow) + + runnerConsole.log(base + errorSuffix) + } + + const statsEntries = Object.entries(perTestStats) + if (statsEntries.length > 0) { + const statsHeader = color("Per-test iteration stats:", COLORS.bold) + runnerConsole.log(` ${statsHeader}`) + + const jsonTests: Array<{ + testName: string + totalIterations: number + passedIterations: number + failedIterations: number + failureRate: number + classification: "FAILED" | "FLAKY" | "PASSED" + }> = [] + + for (const [testName, stats] of statsEntries.sort(([a], [b]) => a.localeCompare(b))) { + const { total, passed, failed } = stats + const failRate = total > 0 ? failed / total : 0 + const failPct = (failRate * 100).toFixed(1) + let statusLabel: string + let classification: "FAILED" | "FLAKY" | "PASSED" + if (failRate >= 0.8) { + statusLabel = color("FAILED", COLORS.red) + classification = "FAILED" + } else if (failed === 0) { + statusLabel = color("PASSED", COLORS.green) + classification = "PASSED" + } else { + statusLabel = color("FLAKY", COLORS.yellow) + classification = "FLAKY" + } + + runnerConsole.log( + ` - ${testName}: ${passed}/${total} iterations passed, ${failed} failed (${failPct}% failure) [${statusLabel}]`, + ) + + jsonTests.push({ + testName, + totalIterations: total, + passedIterations: passed, + failedIterations: failed, + failureRate: failRate, + classification, + }) + } + } + } + } + + const passedTotal = results.filter((r) => r.passed).length + const failedTotal = results.length - passedTotal + const matrixLabel = color("Matrix:", COLORS.bold) + const matrixSummary = + failedTotal === 0 + ? color(`${passedTotal}/${results.length} passing`, COLORS.green) + : color(`${passedTotal}/${results.length} passing`, COLORS.red) + runnerConsole.log(`\n${matrixLabel} ${matrixSummary}\n`) + const duration = Date.now() - startTime + const durationText = formatDuration(duration) + const pendingTotal = 0 + runnerConsole.log(color(`${passedTotal} passing (${durationText})`, COLORS.green)) + + runnerConsole.log(" " + color(`${pendingTotal} pending`, COLORS.cyan)) + + const failingColor = failedTotal > 0 ? COLORS.red : COLORS.gray + runnerConsole.log(" " + color(`${failedTotal} failing`, failingColor)) + + try { + const resultsDir = path.join(process.cwd(), ".results") + const timestamp = new Date(startTime).toISOString().replace(/[:.]/g, "-") + const logsDir = path.join(resultsDir, `matrix-logs-${timestamp}`) + const filePath = path.join(resultsDir, `matrix-results-${timestamp}.json`) + + const sanitizeForFilename = (value: string): string => + value + .replace(/[^a-zA-Z0-9-_]+/g, "_") + .replace(/_+/g, "_") + .replace(/^_+|_+$/g, "") + .slice(0, 80) || "unnamed" + + type AggregatedTestJson = { + testName: string + pass_count: number + fail_count: number + passed: boolean + errors: Array<{ message: string; log: string | null }> + } + + const aggregatedSuites: Array<{ suite: string; tests: AggregatedTestJson[] }> = [] + const failedIterationLogs: Array<{ filePath: string; content: string }> = [] + for (const [suiteName, suiteResults] of Object.entries(grouped)) { + const perTest: Record = {} + for (const r of suiteResults) { + if (!perTest[r.testName]) perTest[r.testName] = [] + perTest[r.testName].push(r) + } + + const tests: AggregatedTestJson[] = [] + + for (const [testName, iterations] of Object.entries(perTest).sort(([a], [b]) => a.localeCompare(b))) { + const totalIterations = iterations.length + const passedIterations = iterations.filter((r) => r.passed).length + const failedIterations = totalIterations - passedIterations + const passRate = totalIterations > 0 ? passedIterations / totalIterations : 0 + + const aggregatedPassed = passRate >= 0.8 + + const errors: Array<{ message: string; log: string | null }> = [] + + for (const r of iterations) { + if (r.passed) continue + + const errorMessage = + r.error instanceof Error + ? r.error.message + : r.error != null + ? String(r.error) + : "Unknown error" + + const logFileName = + [ + "matrix", + sanitizeForFilename(suiteName), + sanitizeForFilename(testName), + `iter-${r.iteration}`, + ].join("__") + ".log" + + const logAbsolutePath = path.join(logsDir, logFileName) + const logRelativePath = path.relative(process.cwd(), logAbsolutePath) + + const logLines = [ + `suite: ${suiteName}`, + `test: ${testName}`, + `iteration: ${r.iteration}`, + `runStartTime: ${new Date(startTime).toISOString()}`, + "", + "variable:", + JSON.stringify(r.variable, null, 2), + "", + "error:", + r.error instanceof Error ? (r.error.stack ?? r.error.message) : errorMessage, + "", + ].join("\n") + + failedIterationLogs.push({ filePath: logAbsolutePath, content: logLines }) + + errors.push({ + message: errorMessage, + log: logRelativePath, + }) + } + + tests.push({ + testName, + pass_count: passedIterations, + fail_count: failedIterations, + passed: aggregatedPassed, + errors, + }) + } + + aggregatedSuites.push({ suite: suiteName, tests }) + } + + const jsonPayload = { + summary: { + totalIterations: results.length, + passedIterations: passedTotal, + failedIterations: failedTotal, + durationMs: duration, + startTime: new Date(startTime).toISOString(), + }, + results: aggregatedSuites, + } + await fs.mkdir(resultsDir, { recursive: true }) + if (failedIterationLogs.length > 0) { + await fs.mkdir(logsDir, { recursive: true }) + for (const entry of failedIterationLogs) { + await fs.writeFile(entry.filePath, entry.content, "utf8") + } + } + await fs.writeFile(filePath, JSON.stringify(jsonPayload, null, 2), "utf8") + } catch (err) { + runnerConsole.error("Failed to write matrix JSON to results folder:", err) + } + } + clearGlobalHooks() + return results +} diff --git a/packages/evally/src/runner/types.ts b/packages/evally/src/runner/types.ts new file mode 100644 index 00000000000..a33d3429f94 --- /dev/null +++ b/packages/evally/src/runner/types.ts @@ -0,0 +1,42 @@ +export interface MatrixVariable { + [key: string]: any +} + +export interface MatrixTestContext { + variable: MatrixVariable + iteration: number +} + +export type MatrixTestFn = (ctx: MatrixTestContext) => Promise | void + +export interface MatrixTestDescription { + suite: string + name: string + fn: MatrixTestFn +} + +export interface MatrixSuiteDefinition { + variables: MatrixVariable[] + iterations: number + tests: () => void + suiteSetup?: () => Promise | void + suiteTeardown?: () => Promise | void + setup?: () => Promise | void + teardown?: () => Promise | void +} + +export interface MatrixTestResult { + suite: string + variable: MatrixVariable + iteration: number + testName: string + passed: boolean + error?: any +} + +export type MatrixVerbosity = "silent" | "summary" | "verbose" + +export interface MatrixRunOptions { + report?: boolean + verbosity?: MatrixVerbosity +} diff --git a/packages/evally/tsconfig.json b/packages/evally/tsconfig.json new file mode 100644 index 00000000000..94c1303e520 --- /dev/null +++ b/packages/evally/tsconfig.json @@ -0,0 +1,19 @@ +{ + "compilerOptions": { + "target": "ESNext", + "module": "ESNext", + "declaration": true, + "declarationDir": "dist", + "outDir": "dist", + "strict": true, + "esModuleInterop": true, + "moduleResolution": "node", + "resolveJsonModule": true, + "baseUrl": "./src", + "rootDir": "./src", + "forceConsistentCasingInFileNames": true, + "skipLibCheck": true + }, + "include": ["src/**/*.ts"], + "exclude": ["node_modules", "dist"] +} diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 5589df1b424..72979fc891e 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -117,6 +117,37 @@ importers: specifier: 5.8.3 version: 5.8.3 + apps/vscode-evals: + dependencies: + vscode: + specifier: ^1.1.37 + version: 1.1.37 + devDependencies: + '@roo-code/config-eslint': + specifier: workspace:^ + version: link:../../packages/config-eslint + '@roo-code/config-typescript': + specifier: workspace:^ + version: link:../../packages/config-typescript + '@roo-code/evally': + specifier: workspace:^ + version: link:../../packages/evally + '@roo-code/types': + specifier: workspace:^ + version: link:../../packages/types + '@types/vscode': + specifier: ^1.95.0 + version: 1.103.0 + '@vscode/test-cli': + specifier: ^0.0.11 + version: 0.0.11 + '@vscode/test-electron': + specifier: ^2.4.0 + version: 2.5.2 + typescript: + specifier: 5.8.3 + version: 5.8.3 + apps/vscode-nightly: devDependencies: '@roo-code/build': @@ -505,6 +536,8 @@ importers: specifier: ^3.2.3 version: 3.2.4(@types/debug@4.1.12)(@types/node@24.2.1)(@vitest/ui@3.2.4)(jiti@2.4.2)(jsdom@26.1.0)(lightningcss@1.30.1)(tsx@4.19.4)(yaml@2.8.0) + packages/evally: {} + packages/evals: dependencies: '@roo-code/ipc': @@ -3947,6 +3980,10 @@ packages: peerDependencies: '@testing-library/dom': '>=7.21.4' + '@tootallnate/once@1.1.2': + resolution: {integrity: sha512-RbzJvlNzmRq5c3O09UipeuXno4tA1FE6ikOjxZK0tuxVv3412l64l5t1W5pj4+rJq9vpkm/kwiR07aZXnsKPxw==} + engines: {node: '>= 6'} + '@tootallnate/quickjs-emscripten@0.23.0': resolution: {integrity: sha512-C5Mc6rdnsaJDjO3UpGW/CQTHtCKaYlScZTly4JIu97Jxo/odCiH0ITnDXSJPTOrEKk/ycSZ0AOgTmkDtkOsvIA==} @@ -4460,6 +4497,14 @@ packages: engines: {node: '>=0.4.0'} hasBin: true + agent-base@4.3.0: + resolution: {integrity: sha512-salcGninV0nPrwpGNn4VTXBb1SOuXQBiqbrNXoeizJsHrsL6ERFM2Ne3JUSBWRE6aeNJI2ROP/WEEIDUiDe3cg==} + engines: {node: '>= 4.0.0'} + + agent-base@6.0.2: + resolution: {integrity: sha512-RZNwNclF7+MS/8bDg70amg32dyeZGZxiDuQmZxKLAlQjr3jGyLx+4Kkk58UO7D2QdgFIQCovuSuZESne6RG6XQ==} + engines: {node: '>= 6.0.0'} + agent-base@7.1.3: resolution: {integrity: sha512-jRR5wdylq8CkOe6hei19GGZnxM6rBGwFl3Bg0YItGDimvjGtAvdZk4Pu6Cl4u4Igsws4a1fd1Vq3ezrhn4KmFw==} engines: {node: '>= 14'} @@ -5031,6 +5076,9 @@ packages: resolution: {integrity: sha512-2uM9rYjPvyq39NwLRqaiLtWHyDC1FvryJDa2ATTVims5YAS4PupsEQsDvP14FqhFr0P49CYDugi59xaxJlTXRA==} engines: {node: '>=20'} + commander@2.15.1: + resolution: {integrity: sha512-VlfT9F3V0v+jr4yxPc5gg9s62/fIVWsd2Bk2iD435um1NlGMYdVCq+MjcXnhYq2icNOizHr1kK+5TI6H0Hy0ag==} + commander@4.1.1: resolution: {integrity: sha512-NOKm8xhkzAjzFx8B2v5OAHT+u5pRQc2UCa2Vq9jYL/31o2wi9mxBA7LIFs3sV5VSC49z6pEhfbMULvShKj26WA==} engines: {node: '>= 6'} @@ -5366,6 +5414,14 @@ packages: resolution: {integrity: sha512-Xks6RUDLZFdz8LIdR6q0MTH44k7FikOmnh5xkSjMig6ch45afc8sjTjRQf3P6ax8dMgcQrYO/AR2RGWURrruqw==} engines: {node: '>=18'} + debug@3.1.0: + resolution: {integrity: sha512-OX8XqP7/1a9cqkxYw2yXss15f26NKWBpDXQd0/uK/KPqdQhxbPa994hnzjcE2VqQpDslf55723cKPUOGSmMY3g==} + peerDependencies: + supports-color: '*' + peerDependenciesMeta: + supports-color: + optional: true + debug@3.2.7: resolution: {integrity: sha512-CFjzYYAi4ThfiQvizrFQevTTXHtnCqWfe7x1AhgEscTz6ZbLbfoLRLPugTQyBth6f8ZERVUSyWHFD/7Wu4t1XQ==} peerDependencies: @@ -5518,6 +5574,10 @@ packages: resolution: {integrity: sha512-EjePK1srD3P08o2j4f0ExnylqRs5B9tJjcp9t1krH2qRi8CCdsYfwe9JgSLurFBWwq4uOlipzfk5fHNvwFKr8Q==} engines: {node: ^14.15.0 || ^16.10.0 || >=18.0.0} + diff@3.5.0: + resolution: {integrity: sha512-A46qtFgd+g7pDZinpnwiRJtxbC1hpgf0uzP3iG89scHk0AUC7A1TGxf5OiiOUv/JMZR8GOt8hL900hV0bOy5xA==} + engines: {node: '>=0.3.1'} + diff@5.2.0: resolution: {integrity: sha512-uIFDxqpRZGZ6ThOk84hEfqWoHx2devRFvpTZcTHur85vImfaxUbTW9Ryh4CpCuDnToOP1CEtXKIgytHBPVff5A==} engines: {node: '>=0.3.1'} @@ -5815,6 +5875,12 @@ packages: resolution: {integrity: sha512-w+5mJ3GuFL+NjVtJlvydShqE1eN3h3PbI7/5LAsYJP/2qtuMXjfL2LpHSRqo4b4eSF5K/DH1JXKUAHSB2UW50g==} engines: {node: '>= 0.4'} + es6-promise@4.2.8: + resolution: {integrity: sha512-HJDGx5daxeIvxdBxvG2cb9g4tEvwIk3i8+nhX0yGrYmZUzbkdg8QbDevheDB8gd0//uPj4c1EQua8Q+MViT0/w==} + + es6-promisify@5.0.0: + resolution: {integrity: sha512-C+d6UdsYDk0lMebHNR4S2NybQMMngAOnOwYBQjTOiv0MkoJMP0Myw2mgpDLBcpfCmRLxyFqYhS/CfOENq4SJhQ==} + esbuild-register@3.6.0: resolution: {integrity: sha512-H2/S7Pm8a9CL1uhp9OvjwrBh5Pvx0H8qVOxNu8Wed9Y7qv56MPtq+GGM8RJpq6glYJn9Wspr8uw7l55uyinNeg==} peerDependencies: @@ -6452,6 +6518,10 @@ packages: resolution: {integrity: sha512-5v6yZd4JK3eMI3FqqCouswVqwugaA9r4dNZB1wwcmrD02QkV5H0y7XBQW8QwQqEaZY1pM9aqORSORhJRdNK44Q==} engines: {node: '>=6.0'} + growl@1.10.5: + resolution: {integrity: sha512-qBr4OuELkhPenW6goKVXiv47US3clb3/IbuWF9KNKEijAy9oeHxU9IgzjvJhHkUzhaj7rOUD7+YGWqUjLp5oSA==} + engines: {node: '>=4.x'} + gtoken@7.1.0: resolution: {integrity: sha512-pCcEwRi+TKpMlxAQObHDQ56KawURgyAf6jtIY046fJ5tIv3zDe/LEIubckAO8fj6JnAxLdmWkUfNyulQ2iKdEw==} engines: {node: '>=14.0.0'} @@ -6542,6 +6612,10 @@ packages: hastscript@9.0.1: resolution: {integrity: sha512-g7df9rMFX/SPi34tyGCyUBREQoKkapwdY/T04Qn9TDWfHhAYt4/I0gMVirzK5wEzeUqIjEB+LXC/ypb7Aqno5w==} + he@1.1.1: + resolution: {integrity: sha512-z/GDPjlRMNOa2XJiB4em8wJpuuBfrFOlYKTZxtpkdr1uPdibHI8rYA3MY0KDObpVyaes0e/aunid/t88ZI2EKA==} + hasBin: true + he@1.2.0: resolution: {integrity: sha512-F/1DnUGPopORZi0ni+CvrCgHQ5FyEAHRLSApuYWMmrbSwoN2Mn/7k+Gl38gJnR7yyDZk6WLXwiGod1JOWNDKGw==} hasBin: true @@ -6584,10 +6658,26 @@ packages: resolution: {integrity: sha512-FtwrG/euBzaEjYeRqOgly7G0qviiXoJWnvEH2Z1plBdXgbyjv34pHTSb9zoeHMyDy33+DWy5Wt9Wo+TURtOYSQ==} engines: {node: '>= 0.8'} + http-proxy-agent@2.1.0: + resolution: {integrity: sha512-qwHbBLV7WviBl0rQsOzH6o5lwyOIvwp/BdFnvVxXORldu5TmjFfjzBcWUWS5kWAZhmv+JtiDhSuQCp4sBfbIgg==} + engines: {node: '>= 4.5.0'} + + http-proxy-agent@4.0.1: + resolution: {integrity: sha512-k0zdNgqWTGA6aeIRVpvfVob4fL52dTfaehylg0Y4UvSySvOq/Y+BOyPrgpUrA7HylqvU8vIZGsRuXmspskV0Tg==} + engines: {node: '>= 6'} + http-proxy-agent@7.0.2: resolution: {integrity: sha512-T1gkAiYYDWYx3V5Bmyu7HcfcvL7mUrTWiM6yOfa3PIphViJ/gFPbvidQ+veqSOHci/PxBcDabeUNCzpOODJZig==} engines: {node: '>= 14'} + https-proxy-agent@2.2.4: + resolution: {integrity: sha512-OmvfoQ53WLjtA9HeYP9RNrWMJzzAz1JGaSFr1nijg0PVR1JaD/xbJq1mdEIIlxGpXp9eSe/O2LgU9DJmTPd0Eg==} + engines: {node: '>= 4.5.0'} + + https-proxy-agent@5.0.1: + resolution: {integrity: sha512-dFcAjpTQFgoLMzC2VwU+C/CbS7uRL0lWmxDITmqm7C+7F0Odmj6s9l6alZc6AELXhrnggM2CeWSXHGOdX2YtwA==} + engines: {node: '>= 6'} + https-proxy-agent@7.0.6: resolution: {integrity: sha512-vK9P5/iUfdl95AI+JVyUuIcVtd4ofvtrOr3HNtM2yxC9bnMbEdp3x01OhQNnjb8IJYi38VlTE3mBXwcfvywuSw==} engines: {node: '>= 14'} @@ -7757,6 +7847,9 @@ packages: resolution: {integrity: sha512-enIvLvRAFZYXJzkCYG5RKmPfrFArdLv+R+lbQ53BmIMLIry74bjKzX6iHAm8WYamJkhSSEabrWN5D97XnKObjQ==} engines: {node: 20 || >=22} + minimatch@3.0.4: + resolution: {integrity: sha512-yJHVQEhyqPLUTgt9B83PXu6W3rx4MvvHvSUvToogpwoGDOUQ+yDrR0HRot+yOCdCO7u4hX3pWft6kWBBcqh0UA==} + minimatch@3.1.2: resolution: {integrity: sha512-J7p63hRiAjw1NDEww1W7i37+ByIrOWO5XQQAzZ3VOcL0PNybwpfmV/N05zFAzwQ9USyEcX6t3UO+K5aqBQOIHw==} @@ -7768,6 +7861,9 @@ packages: resolution: {integrity: sha512-G6T0ZX48xgozx7587koeX9Ys2NYy6Gmv//P89sEte9V9whIapMNF4idKxnW2QtCcLiTWlb/wfCabAtAFWhhBow==} engines: {node: '>=16 || 14 >=14.17'} + minimist@0.0.8: + resolution: {integrity: sha512-miQKw5Hv4NS1Psg2517mV4e4dYNaO3++hjAvLOAzKqZ61rH8NS1SK+vbfBWZ5PY/Me/bEWhUwqMghEW5Fb9T7Q==} + minimist@1.2.8: resolution: {integrity: sha512-2yyAR8qBkN3YuheJanUpWC5U3bb5osDywNB8RzDVlDwDHbocAJveqqj1u8+SVD7jkWT4yvsHCpWqqWqAxb0zCA==} @@ -7785,6 +7881,11 @@ packages: mkdirp-classic@0.5.3: resolution: {integrity: sha512-gKLcREMhtuZRwRAfqP3RFW+TK4JqApVBtOIftVgjuABpAtpxhPGaDcfvbhNvD0B8iD1oUr/txX35NjcaY6Ns/A==} + mkdirp@0.5.1: + resolution: {integrity: sha512-SknJC52obPfGQPnjIkXbmA6+5H15E+fR+E4iR2oQ3zzCLbd7/ONua69R/Gw7AgkTLsRG+r5fzksYwWe1AgTyWA==} + deprecated: Legacy versions of mkdirp are no longer supported. Please update to mkdirp 1.x. (Note that the API surface has changed to use Promises in 1.x.) + hasBin: true + mkdirp@0.5.6: resolution: {integrity: sha512-FP+p8RB8OWpF3YZBCrP5gtADmtXApB5AMLn+vdyA+PyxCjrCs00mjyUozssO33cwDeT3wNGdLxJ5M//YqtHAJw==} hasBin: true @@ -7807,6 +7908,11 @@ packages: engines: {node: ^18.18.0 || ^20.9.0 || >=21.1.0} hasBin: true + mocha@5.2.0: + resolution: {integrity: sha512-2IUgKDhc3J7Uug+FxMXuqIyYzH7gJjXECKe/w43IGgQHTSj3InJi+yAA7T24L9bQMRKiUEHxEX37G5JpVUGLcQ==} + engines: {node: '>= 4.0.0'} + hasBin: true + monaco-vscode-textmate-theme-converter@0.1.7: resolution: {integrity: sha512-ZMsq1RPWwOD3pvXD0n+9ddnhfzZoiUMwNIWPNUqYqEiQeH2HjyZ9KYOdt/pqe0kkN8WnYWLrxT9C/SrtIsAu2Q==} hasBin: true @@ -7827,6 +7933,9 @@ packages: resolution: {integrity: sha512-Y3wQdFg2Va6etvQ5I82yUhGdsKrcYox6p7FfL1LbK2J4V01F9TGlepTIhnK24t7koZibmg82KGglhA1XK5IsLQ==} engines: {node: '>=10'} + ms@2.0.0: + resolution: {integrity: sha512-Tpp60P6IUJDTuOq/5Z8cdskzJujfwqfOTkrwIwj7IRISpnkJnT6SyJ4PCPnGMoFjC9ddhal5KVIYtAt97ix05A==} + ms@2.1.3: resolution: {integrity: sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==} @@ -9393,6 +9502,10 @@ packages: engines: {node: '>=16 || 14 >=14.17'} hasBin: true + supports-color@5.4.0: + resolution: {integrity: sha512-zjaXglF5nnWpsq470jSv6P9DwPvgLkuapYmfDm3JWOm0vkNTVF2tI4UrN2r6jH1qM/uc/WtxYY1hYoA2dOKj5w==} + engines: {node: '>=4'} + supports-color@5.5.0: resolution: {integrity: sha512-QjVjwdXIt408MIiAqCX4oUKsgU2EqAGzs2Ppkm4aQYbjm+ZEWEcW4SfFNTr4uMNZma0ey4f5lgLrkB0aX0QMow==} engines: {node: '>=4'} @@ -10086,9 +10199,20 @@ packages: vscode-material-icons@0.1.1: resolution: {integrity: sha512-GsoEEF8Tbb0yUFQ6N6FPvh11kFkL9F95x0FkKlbbfRQN9eFms67h+L3t6b9cUv58dSn2gu8kEhNfoESVCrz4ag==} + vscode-test@0.4.3: + resolution: {integrity: sha512-EkMGqBSefZH2MgW65nY05rdRSko15uvzq4VAPM5jVmwYuFQKE7eikKXNJDRxL+OITXHB6pI+a3XqqD32Y3KC5w==} + engines: {node: '>=8.9.3'} + deprecated: This package has been renamed to @vscode/test-electron, please update to the new name + vscode-uri@3.0.8: resolution: {integrity: sha512-AyFQ0EVmsOZOlAnxoFOGOq1SQDWAB7C6aqMGS23svWAllfOaxbuFvcT8D1i8z3Gyn8fraVeZNNmN6e9bxxXkKw==} + vscode@1.1.37: + resolution: {integrity: sha512-vJNj6IlN7IJPdMavlQa1KoFB3Ihn06q1AiN3ZFI/HfzPNzbKZWPPuiU+XkpNOfGU5k15m4r80nxNPlM7wcc0wg==} + engines: {node: '>=8.9.3'} + deprecated: 'This package is deprecated in favor of @types/vscode and vscode-test. For more information please read: https://code.visualstudio.com/updates/v1_36#_splitting-vscode-package-into-typesvscode-and-vscodetest' + hasBin: true + vscrui@0.2.2: resolution: {integrity: sha512-buw2OipqUl7GCBq1mxcAjUwoUsslGzVhdaxDPmEx27xzc3QAJJZHtT30QbakgZVJ1Jb3E6kcsguUIFEGxrgkyQ==} peerDependencies: @@ -11180,7 +11304,7 @@ snapshots: '@babel/parser': 7.27.2 '@babel/template': 7.27.2 '@babel/types': 7.27.1 - debug: 4.4.1(supports-color@8.1.1) + debug: 4.4.3 globals: 11.12.0 transitivePeerDependencies: - supports-color @@ -13658,6 +13782,8 @@ snapshots: dependencies: '@testing-library/dom': 10.4.0 + '@tootallnate/once@1.1.2': {} + '@tootallnate/quickjs-emscripten@0.23.0': {} '@tybys/wasm-util@0.9.0': @@ -14043,7 +14169,7 @@ snapshots: dependencies: '@typescript-eslint/typescript-estree': 8.32.1(typescript@5.8.3) '@typescript-eslint/utils': 8.32.1(eslint@9.27.0(jiti@2.4.2))(typescript@5.8.3) - debug: 4.4.1(supports-color@8.1.1) + debug: 4.4.3 eslint: 9.27.0(jiti@2.4.2) ts-api-utils: 2.1.0(typescript@5.8.3) typescript: 5.8.3 @@ -14056,7 +14182,7 @@ snapshots: dependencies: '@typescript-eslint/types': 8.32.1 '@typescript-eslint/visitor-keys': 8.32.1 - debug: 4.4.1(supports-color@8.1.1) + debug: 4.4.3 fast-glob: 3.3.3 is-glob: 4.0.3 minimatch: 9.0.5 @@ -14306,6 +14432,16 @@ snapshots: acorn@8.15.0: {} + agent-base@4.3.0: + dependencies: + es6-promisify: 5.0.0 + + agent-base@6.0.2: + dependencies: + debug: 4.4.3 + transitivePeerDependencies: + - supports-color + agent-base@7.1.3: {} agentkeepalive@4.6.0: @@ -14607,7 +14743,7 @@ snapshots: dependencies: bytes: 3.1.2 content-type: 1.0.5 - debug: 4.4.1(supports-color@8.1.1) + debug: 4.4.3 http-errors: 2.0.0 iconv-lite: 0.6.3 on-finished: 2.4.1 @@ -14953,6 +15089,8 @@ snapshots: commander@14.0.0: {} + commander@2.15.1: {} + commander@4.1.1: {} commander@6.2.1: {} @@ -15312,6 +15450,12 @@ snapshots: debounce@2.2.0: {} + debug@3.1.0(supports-color@5.4.0): + dependencies: + ms: 2.0.0 + optionalDependencies: + supports-color: 5.4.0 + debug@3.2.7: dependencies: ms: 2.1.3 @@ -15420,6 +15564,8 @@ snapshots: diff-sequences@29.6.3: {} + diff@3.5.0: {} + diff@5.2.0: {} dijkstrajs@1.0.3: {} @@ -15698,6 +15844,12 @@ snapshots: is-date-object: 1.1.0 is-symbol: 1.1.1 + es6-promise@4.2.8: {} + + es6-promisify@5.0.0: + dependencies: + es6-promise: 4.2.8 + esbuild-register@3.6.0(esbuild@0.25.9): dependencies: debug: 4.4.1(supports-color@8.1.1) @@ -16086,7 +16238,7 @@ snapshots: extract-zip@2.0.1: dependencies: - debug: 4.4.1(supports-color@8.1.1) + debug: 4.4.3 get-stream: 5.2.0 yauzl: 2.10.0 optionalDependencies: @@ -16187,7 +16339,7 @@ snapshots: finalhandler@2.1.0: dependencies: - debug: 4.4.1(supports-color@8.1.1) + debug: 4.4.3 encodeurl: 2.0.0 escape-html: 1.0.3 on-finished: 2.4.1 @@ -16435,7 +16587,7 @@ snapshots: dependencies: basic-ftp: 5.0.5 data-uri-to-buffer: 6.0.2 - debug: 4.4.1(supports-color@8.1.1) + debug: 4.4.3 transitivePeerDependencies: - supports-color @@ -16524,6 +16676,8 @@ snapshots: section-matter: 1.0.0 strip-bom-string: 1.0.0 + growl@1.10.5: {} + gtoken@7.1.0: dependencies: gaxios: 6.7.1 @@ -16699,6 +16853,8 @@ snapshots: property-information: 7.1.0 space-separated-tokens: 2.0.2 + he@1.1.1: {} + he@1.2.0: {} hex-rgb@4.3.0: {} @@ -16740,6 +16896,21 @@ snapshots: statuses: 2.0.1 toidentifier: 1.0.1 + http-proxy-agent@2.1.0: + dependencies: + agent-base: 4.3.0 + debug: 3.1.0(supports-color@5.4.0) + transitivePeerDependencies: + - supports-color + + http-proxy-agent@4.0.1: + dependencies: + '@tootallnate/once': 1.1.2 + agent-base: 6.0.2 + debug: 4.4.3 + transitivePeerDependencies: + - supports-color + http-proxy-agent@7.0.2: dependencies: agent-base: 7.1.3 @@ -16747,6 +16918,20 @@ snapshots: transitivePeerDependencies: - supports-color + https-proxy-agent@2.2.4: + dependencies: + agent-base: 4.3.0 + debug: 3.2.7 + transitivePeerDependencies: + - supports-color + + https-proxy-agent@5.0.1: + dependencies: + agent-base: 6.0.2 + debug: 4.4.3 + transitivePeerDependencies: + - supports-color + https-proxy-agent@7.0.6: dependencies: agent-base: 7.1.3 @@ -17229,7 +17414,7 @@ snapshots: lodash.isstring: 4.0.1 lodash.once: 4.1.1 ms: 2.1.3 - semver: 7.7.2 + semver: 7.7.3 jsx-ast-utils@3.3.5: dependencies: @@ -18069,7 +18254,7 @@ snapshots: micromark@2.11.4: dependencies: - debug: 4.4.1(supports-color@8.1.1) + debug: 4.4.3 parse-entities: 2.0.0 transitivePeerDependencies: - supports-color @@ -18134,6 +18319,10 @@ snapshots: dependencies: '@isaacs/brace-expansion': 5.0.0 + minimatch@3.0.4: + dependencies: + brace-expansion: 2.0.2 + minimatch@3.1.2: dependencies: brace-expansion: 2.0.2 @@ -18146,6 +18335,8 @@ snapshots: dependencies: brace-expansion: 2.0.2 + minimist@0.0.8: {} + minimist@1.2.8: {} minipass@7.1.2: {} @@ -18159,6 +18350,10 @@ snapshots: mkdirp-classic@0.5.3: optional: true + mkdirp@0.5.1: + dependencies: + minimist: 0.0.8 + mkdirp@0.5.6: dependencies: minimist: 1.2.8 @@ -18197,6 +18392,20 @@ snapshots: yargs-parser: 21.1.1 yargs-unparser: 2.0.0 + mocha@5.2.0: + dependencies: + browser-stdout: 1.3.1 + commander: 2.15.1 + debug: 3.1.0(supports-color@5.4.0) + diff: 3.5.0 + escape-string-regexp: 1.0.5 + glob: 11.1.0 + growl: 1.10.5 + he: 1.1.1 + minimatch: 3.0.4 + mkdirp: 0.5.1 + supports-color: 5.4.0 + monaco-vscode-textmate-theme-converter@0.1.7(tslib@2.8.1): dependencies: commander: 8.3.0 @@ -18213,6 +18422,8 @@ snapshots: mrmime@2.0.1: {} + ms@2.0.0: {} + ms@2.1.3: {} mute-stream@0.0.8: {} @@ -18295,7 +18506,7 @@ snapshots: node-abi@3.75.0: dependencies: - semver: 7.7.2 + semver: 7.7.3 optional: true node-addon-api@4.3.0: @@ -18578,7 +18789,7 @@ snapshots: dependencies: '@tootallnate/quickjs-emscripten': 0.23.0 agent-base: 7.1.3 - debug: 4.4.1(supports-color@8.1.1) + debug: 4.4.3 get-uri: 6.0.4 http-proxy-agent: 7.0.2 https-proxy-agent: 7.0.6 @@ -18896,7 +19107,7 @@ snapshots: proxy-agent@6.5.0: dependencies: agent-base: 7.1.3 - debug: 4.4.1(supports-color@8.1.1) + debug: 4.4.3 http-proxy-agent: 7.0.2 https-proxy-agent: 7.0.6 lru-cache: 7.18.3 @@ -19445,7 +19656,7 @@ snapshots: router@2.2.0: dependencies: - debug: 4.4.1(supports-color@8.1.1) + debug: 4.4.3 depd: 2.0.0 is-promise: 4.0.0 parseurl: 1.3.3 @@ -19551,7 +19762,7 @@ snapshots: send@1.2.0: dependencies: - debug: 4.4.1(supports-color@8.1.1) + debug: 4.4.3 encodeurl: 2.0.0 escape-html: 1.0.3 etag: 1.8.1 @@ -19768,7 +19979,7 @@ snapshots: socks-proxy-agent@8.0.5: dependencies: agent-base: 7.1.3 - debug: 4.4.1(supports-color@8.1.1) + debug: 4.4.3 socks: 2.8.4 transitivePeerDependencies: - supports-color @@ -20044,6 +20255,10 @@ snapshots: pirates: 4.0.7 ts-interface-checker: 0.1.13 + supports-color@5.4.0: + dependencies: + has-flag: 3.0.0 + supports-color@5.5.0: dependencies: has-flag: 3.0.0 @@ -20971,8 +21186,27 @@ snapshots: vscode-material-icons@0.1.1: {} + vscode-test@0.4.3: + dependencies: + http-proxy-agent: 2.1.0 + https-proxy-agent: 2.2.4 + transitivePeerDependencies: + - supports-color + vscode-uri@3.0.8: {} + vscode@1.1.37: + dependencies: + glob: 11.1.0 + http-proxy-agent: 4.0.1 + https-proxy-agent: 5.0.1 + mocha: 5.2.0 + semver: 5.7.2 + source-map-support: 0.5.21 + vscode-test: 0.4.3 + transitivePeerDependencies: + - supports-color + vscrui@0.2.2(@types/react@18.3.23)(react@18.3.1): dependencies: '@types/react': 18.3.23 From c4ca6f745188f8525faa2c1a827e6ae86a0044d5 Mon Sep 17 00:00:00 2001 From: Dennise Bartlett Date: Wed, 24 Dec 2025 10:26:13 -0800 Subject: [PATCH 2/3] fix: Fix lock-file --- pnpm-lock.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 72979fc891e..b7f803fb0e4 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -144,6 +144,9 @@ importers: '@vscode/test-electron': specifier: ^2.4.0 version: 2.5.2 + rimraf: + specifier: ^6.0.1 + version: 6.0.1 typescript: specifier: 5.8.3 version: 5.8.3 From 032d4a5300e855a9f3c45e1726c532a03d623525 Mon Sep 17 00:00:00 2001 From: Dennise Bartlett Date: Wed, 24 Dec 2025 11:13:44 -0800 Subject: [PATCH 3/3] chore: Add precheck-types script to build before checking types --- package.json | 1 + 1 file changed, 1 insertion(+) diff --git a/package.json b/package.json index 0f3c3b7ba04..b00ecf3f81a 100644 --- a/package.json +++ b/package.json @@ -10,6 +10,7 @@ "install": "node scripts/bootstrap.mjs", "install:all": "node scripts/bootstrap.mjs", "lint": "turbo lint --log-order grouped --output-logs new-only", + "precheck-types": "pnpm build", "check-types": "turbo check-types --log-order grouped --output-logs new-only", "test": "turbo test --log-order grouped --output-logs new-only", "format": "turbo format --log-order grouped --output-logs new-only",