From d77a07d57e41ae47a7340909c6fdc2a0c10c1a0c Mon Sep 17 00:00:00 2001 From: Hugo Dutka Date: Wed, 17 Dec 2025 12:59:50 +0100 Subject: [PATCH 1/2] revert previous compaction strategy --- bun.lock | 23 +- packages/scout-agent/lib/compaction.test.ts | 625 -------------------- packages/scout-agent/lib/compaction.ts | 435 -------------- packages/scout-agent/lib/core.test.ts | 473 +-------------- packages/scout-agent/lib/core.ts | 55 +- packages/scout-agent/lib/index.ts | 1 - packages/scout-agent/package.json | 3 +- 7 files changed, 10 insertions(+), 1605 deletions(-) delete mode 100644 packages/scout-agent/lib/compaction.test.ts delete mode 100644 packages/scout-agent/lib/compaction.ts diff --git a/bun.lock b/bun.lock index 8811d39..51f8e99 100644 --- a/bun.lock +++ b/bun.lock @@ -257,7 +257,7 @@ }, "packages/scout-agent": { "name": "@blink-sdk/scout-agent", - "version": "0.0.10", + "version": "0.0.9", "dependencies": { "@blink-sdk/compute": "^0.0.15", "@blink-sdk/github": "^0.0.22", @@ -265,7 +265,6 @@ "@blink-sdk/multiplexer": "^0.0.1", "@blink-sdk/slack": "^1.1.2", "@octokit/webhooks": "^14.1.3", - "ai-tokenizer": "^1.0.6", "exa-js": "^2.0.3", }, "devDependencies": { @@ -385,7 +384,7 @@ "@ai-sdk/anthropic": ["@ai-sdk/anthropic@2.0.23", "", { "dependencies": { "@ai-sdk/provider": "2.0.0", "@ai-sdk/provider-utils": "3.0.10" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-ZEBiiv1UhjGjBwUU63pFhLK5LCSlNDb1idY9K1oZHm5/Fda1cuTojf32tOp0opH0RPbPAN/F8fyyNjbU33n9Kw=="], - "@ai-sdk/gateway": ["@ai-sdk/gateway@2.0.21", "", { "dependencies": { "@ai-sdk/provider": "2.0.0", "@ai-sdk/provider-utils": "3.0.19", "@vercel/oidc": "3.0.5" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-BwV7DU/lAm3Xn6iyyvZdWgVxgLu3SNXzl5y57gMvkW4nGhAOV5269IrJzQwGt03bb107sa6H6uJwWxc77zXoGA=="], + "@ai-sdk/gateway": ["@ai-sdk/gateway@2.0.19", "", { "dependencies": { "@ai-sdk/provider": "2.0.0", "@ai-sdk/provider-utils": "3.0.18", "@vercel/oidc": "3.0.5" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-cybb+k/3Kj9BX+Am1mun3dafZsHQLIzW2A4fu5FVTLSIGXXbcuXwXNNdYMGs+B0y6RYOQ8VHbf1QslMSDIxQMA=="], "@ai-sdk/google": ["@ai-sdk/google@2.0.17", "", { "dependencies": { "@ai-sdk/provider": "2.0.0", "@ai-sdk/provider-utils": "3.0.10" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-6LyuUrCZuiULg0rUV+kT4T2jG19oUntudorI4ttv1ARkSbwl8A39ue3rA487aDDy6fUScdbGFiV5Yv/o4gidVA=="], @@ -395,7 +394,7 @@ "@ai-sdk/provider": ["@ai-sdk/provider@2.0.0", "", { "dependencies": { "json-schema": "^0.4.0" } }, "sha512-6o7Y2SeO9vFKB8lArHXehNuusnpddKPk7xqL7T2/b+OvXMRIXUO1rR4wcv1hAFUAT9avGZshty3Wlua/XA7TvA=="], - "@ai-sdk/provider-utils": ["@ai-sdk/provider-utils@3.0.19", "", { "dependencies": { "@ai-sdk/provider": "2.0.0", "@standard-schema/spec": "^1.0.0", "eventsource-parser": "^3.0.6" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-W41Wc9/jbUVXVwCN/7bWa4IKe8MtxO3EyA0Hfhx6grnmiYlCvpI8neSYWFE0zScXJkgA/YK3BRybzgyiXuu6JA=="], + "@ai-sdk/provider-utils": ["@ai-sdk/provider-utils@3.0.18", "", { "dependencies": { "@ai-sdk/provider": "2.0.0", "@standard-schema/spec": "^1.0.0", "eventsource-parser": "^3.0.6" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-ypv1xXMsgGcNKUP+hglKqtdDuMg68nWHucPPAhIENrbFAI+xCHiqPVN8Zllxyv1TNZwGWUghPxJXU+Mqps0YRQ=="], "@ai-sdk/react": ["@ai-sdk/react@2.0.60", "", { "dependencies": { "@ai-sdk/provider-utils": "3.0.10", "ai": "5.0.60", "swr": "^2.2.5", "throttleit": "2.1.0" }, "peerDependencies": { "react": "^18 || ^19 || ^19.0.0-rc", "zod": "^3.25.76 || ^4.1.8" }, "optionalPeers": ["zod"] }, "sha512-Ev0MC0I7eDcCH4FnrHzK48g9bJjyF3F67MMq76qoVsbtcs6fGIO5RjmYgPoFeSo8/yQ5EM6i/14yfcD0oB+moA=="], @@ -1685,7 +1684,7 @@ "@types/mysql": ["@types/mysql@2.15.27", "", { "dependencies": { "@types/node": "*" } }, "sha512-YfWiV16IY0OeBfBCk8+hXKmdTKrKlwKN1MNKAPBu5JYxLwBEZl7QzeEpGnlZb3VMGJrrGmB84gXiH+ofs/TezA=="], - "@types/node": ["@types/node@25.0.2", "", { "dependencies": { "undici-types": "~7.16.0" } }, "sha512-gWEkeiyYE4vqjON/+Obqcoeffmk0NF15WSBwSs7zwVA2bAbTaE0SJ7P0WNGoJn8uE7fiaV5a7dKYIJriEqOrmA=="], + "@types/node": ["@types/node@25.0.0", "", { "dependencies": { "undici-types": "~7.16.0" } }, "sha512-rl78HwuZlaDIUSeUKkmogkhebA+8K1Hy7tddZuJ3D0xV8pZSfsYGTsliGUol1JPzu9EKnTxPC4L1fiWouStRew=="], "@types/normalize-package-data": ["@types/normalize-package-data@2.4.4", "", {}, "sha512-37i+OaWTh9qeK4LSHPsyRC7NahnGotNuZvjLSgcPzblpHB3rrCJxAOgI5gCdKm7coonsaX1Of0ILiTcnZjbfxA=="], @@ -1807,9 +1806,7 @@ "aggregate-error": ["aggregate-error@3.1.0", "", { "dependencies": { "clean-stack": "^2.0.0", "indent-string": "^4.0.0" } }, "sha512-4I7Td01quW/RpocfNayFdFVk1qSuoh0E7JrbRJ16nH01HhKFQ88INq9Sd+nd72zqRySlr9BmDA8xlEJ6vJMrYA=="], - "ai": ["ai@5.0.113", "", { "dependencies": { "@ai-sdk/gateway": "2.0.21", "@ai-sdk/provider": "2.0.0", "@ai-sdk/provider-utils": "3.0.19", "@opentelemetry/api": "1.9.0" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-26vivpSO/mzZj0k1Si2IpsFspp26ttQICHRySQiMrtWcRd5mnJMX2a8sG28vmZ38C+JUn1cWmfZrsLMxkSMw9g=="], - - "ai-tokenizer": ["ai-tokenizer@1.0.6", "", { "peerDependencies": { "ai": "^5.0.0" }, "optionalPeers": ["ai"] }, "sha512-GaakQFxen0pRH/HIA4v68ZM40llCH27HUYUSBLK+gVuZ57e53pYJe1xFvSTj4sJJjbWU92m1X6NjPWyeWkFDow=="], + "ai": ["ai@5.0.110", "", { "dependencies": { "@ai-sdk/gateway": "2.0.19", "@ai-sdk/provider": "2.0.0", "@ai-sdk/provider-utils": "3.0.18", "@opentelemetry/api": "1.9.0" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-ZBq+5bvef4e5qoIG4U6NJ1UpCPWGjuaWERHXbHu2T2ND3c02nJ2zlnjm+N6zAAplQPxwqm7Sb16mrRX5uQNWtQ=="], "ajv": ["ajv@8.17.1", "", { "dependencies": { "fast-deep-equal": "^3.1.3", "fast-uri": "^3.0.1", "json-schema-traverse": "^1.0.0", "require-from-string": "^2.0.2" } }, "sha512-B/gBuNg5SiMTrPkC+A2+cW0RszwxYmn6VYxB/inlBStS5nx6xHIt/ehKRhIMhqusl7a8LjQoZnjCs5vhwxOQ1g=="], @@ -3971,7 +3968,7 @@ "yoga-layout": ["yoga-layout@3.2.1", "", {}, "sha512-0LPOt3AxKqMdFBZA3HBAt/t/8vIKq7VaQYbuA8WxCgung+p9TVyKRYdpvCb80HcdTN2NkbIKbhNwKUfm3tQywQ=="], - "zod": ["zod@4.2.1", "", {}, "sha512-0wZ1IRqGGhMP76gLqz8EyfBXKk0J2qo2+H3fi4mcUP/KtTocoX08nmIAHl1Z2kJIZbZee8KOpBCSNPRgauucjw=="], + "zod": ["zod@4.1.13", "", {}, "sha512-AvvthqfqrAhNH9dnfmrfKzX5upOdjUVJYFqNSlkmGf64gRaTzlPwz99IHYnVs28qYAybvAlBV+H7pn0saFY4Ig=="], "zod-to-json-schema": ["zod-to-json-schema@3.24.6", "", { "peerDependencies": { "zod": "^3.24.1" } }, "sha512-h/z3PKvcTcTetyjl1fkj79MHNEjm+HpD6NXheWjzOekY7kV+lwDYnHw+ivHkijnCSMz1yJaWBD9vu/Fcmk+vEg=="], @@ -4035,8 +4032,6 @@ "@blink-sdk/scout-agent/tsdown": ["tsdown@0.3.1", "", { "dependencies": { "cac": "^6.7.14", "chokidar": "^4.0.1", "consola": "^3.2.3", "debug": "^4.3.7", "picocolors": "^1.1.1", "pkg-types": "^1.2.1", "rolldown": "nightly", "tinyglobby": "^0.2.10", "unconfig": "^0.6.0", "unplugin-isolated-decl": "^0.7.2", "unplugin-unused": "^0.2.3" }, "bin": { "tsdown": "bin/tsdown.js" } }, "sha512-5WLFU7f2NRnsez0jxi7m2lEQNPvBOdos0W8vHvKDnS6tYTfOfmZ5D2z/G9pFTQSjeBhoi6BFRMybc4LzCOKR8A=="], - "@blink.so/api/zod": ["zod@4.1.13", "", {}, "sha512-AvvthqfqrAhNH9dnfmrfKzX5upOdjUVJYFqNSlkmGf64gRaTzlPwz99IHYnVs28qYAybvAlBV+H7pn0saFY4Ig=="], - "@blink.so/compute-protocol-worker/@blink-sdk/compute-protocol": ["@blink-sdk/compute-protocol@0.0.2", "", { "peerDependencies": { "ws": ">= 8", "zod": ">= 4" } }, "sha512-QD89Y4b3EbZjncROb6kwUr1uQV4N3UD9q7Hp2PzL4A2BAzsqk50w7KfN9RxfDiZ3fU7Pectg71T4M8ZCwdJcdQ=="], "@blink.so/site/dotenv": ["dotenv@16.6.1", "", {}, "sha512-uBq4egWHTcTt33a72vpSG0z3HnPuIl6NqYcTrKEg2azoEyl2hpW0zqlxysq2pK9HlDIHyHyakeYaYnSAwd8bow=="], @@ -4047,8 +4042,6 @@ "@blink/desktop/@blink.so/api": ["@blink.so/api@0.0.11", "", { "optionalDependencies": { "@blink-sdk/compute-protocol": ">= 0.0.2" }, "peerDependencies": { "ai": ">= 5", "react": ">= 18", "zod": ">= 4" }, "optionalPeers": ["react"] }, "sha512-4JW0fsGFn8IN5r+FpdbkqXkFqyCXQ8sDXoETdIBczLe3/+JP0Q2ItvN9XtR/eLNIshIL9Yz+gZtB6AVWQIcIWg=="], - "@blink/desktop/ai": ["ai@5.0.110", "", { "dependencies": { "@ai-sdk/gateway": "2.0.19", "@ai-sdk/provider": "2.0.0", "@ai-sdk/provider-utils": "3.0.18", "@opentelemetry/api": "1.9.0" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-ZBq+5bvef4e5qoIG4U6NJ1UpCPWGjuaWERHXbHu2T2ND3c02nJ2zlnjm+N6zAAplQPxwqm7Sb16mrRX5uQNWtQ=="], - "@blink/desktop/esbuild": ["esbuild@0.25.10", "", { "optionalDependencies": { "@esbuild/aix-ppc64": "0.25.10", "@esbuild/android-arm": "0.25.10", "@esbuild/android-arm64": "0.25.10", "@esbuild/android-x64": "0.25.10", "@esbuild/darwin-arm64": "0.25.10", "@esbuild/darwin-x64": "0.25.10", "@esbuild/freebsd-arm64": "0.25.10", "@esbuild/freebsd-x64": "0.25.10", "@esbuild/linux-arm": "0.25.10", "@esbuild/linux-arm64": "0.25.10", "@esbuild/linux-ia32": "0.25.10", "@esbuild/linux-loong64": "0.25.10", "@esbuild/linux-mips64el": "0.25.10", "@esbuild/linux-ppc64": "0.25.10", "@esbuild/linux-riscv64": "0.25.10", "@esbuild/linux-s390x": "0.25.10", "@esbuild/linux-x64": "0.25.10", "@esbuild/netbsd-arm64": "0.25.10", "@esbuild/netbsd-x64": "0.25.10", "@esbuild/openbsd-arm64": "0.25.10", "@esbuild/openbsd-x64": "0.25.10", "@esbuild/openharmony-arm64": "0.25.10", "@esbuild/sunos-x64": "0.25.10", "@esbuild/win32-arm64": "0.25.10", "@esbuild/win32-ia32": "0.25.10", "@esbuild/win32-x64": "0.25.10" }, "bin": { "esbuild": "bin/esbuild" } }, "sha512-9RiGKvCwaqxO2owP61uQ4BgNborAQskMR6QusfWzQqv7AZOg5oGehdY2pRJMTKuwxd1IDBP4rSbI5lHzU7SMsQ=="], "@blink/desktop/lucide-react": ["lucide-react@0.544.0", "", { "peerDependencies": { "react": "^16.5.1 || ^17.0.0 || ^18.0.0 || ^19.0.0" } }, "sha512-t5tS44bqd825zAW45UQxpG2CvcC4urOwn2TrwSH8u+MjeE+1NnWl6QqeQ/6NdjMqdOygyiT9p3Ev0p1NJykxjw=="], @@ -4893,10 +4886,6 @@ "@blink.so/site/next-auth/@auth/core": ["@auth/core@0.41.0", "", { "dependencies": { "@panva/hkdf": "^1.2.1", "jose": "^6.0.6", "oauth4webapi": "^3.3.0", "preact": "10.24.3", "preact-render-to-string": "6.5.11" }, "peerDependencies": { "@simplewebauthn/browser": "^9.0.1", "@simplewebauthn/server": "^9.0.2", "nodemailer": "^6.8.0" }, "optionalPeers": ["@simplewebauthn/browser", "@simplewebauthn/server", "nodemailer"] }, "sha512-Wd7mHPQ/8zy6Qj7f4T46vg3aoor8fskJm6g2Zyj064oQ3+p0xNZXAV60ww0hY+MbTesfu29kK14Zk5d5JTazXQ=="], - "@blink/desktop/ai/@ai-sdk/gateway": ["@ai-sdk/gateway@2.0.19", "", { "dependencies": { "@ai-sdk/provider": "2.0.0", "@ai-sdk/provider-utils": "3.0.18", "@vercel/oidc": "3.0.5" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-cybb+k/3Kj9BX+Am1mun3dafZsHQLIzW2A4fu5FVTLSIGXXbcuXwXNNdYMGs+B0y6RYOQ8VHbf1QslMSDIxQMA=="], - - "@blink/desktop/ai/@ai-sdk/provider-utils": ["@ai-sdk/provider-utils@3.0.18", "", { "dependencies": { "@ai-sdk/provider": "2.0.0", "@standard-schema/spec": "^1.0.0", "eventsource-parser": "^3.0.6" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-ypv1xXMsgGcNKUP+hglKqtdDuMg68nWHucPPAhIENrbFAI+xCHiqPVN8Zllxyv1TNZwGWUghPxJXU+Mqps0YRQ=="], - "@blink/desktop/esbuild/@esbuild/aix-ppc64": ["@esbuild/aix-ppc64@0.25.10", "", { "os": "aix", "cpu": "ppc64" }, "sha512-0NFWnA+7l41irNuaSVlLfgNT12caWJVLzp5eAVhZ0z1qpxbockccEt3s+149rE64VUI3Ml2zt8Nv5JVc4QXTsw=="], "@blink/desktop/esbuild/@esbuild/android-arm": ["@esbuild/android-arm@0.25.10", "", { "os": "android", "cpu": "arm" }, "sha512-dQAxF1dW1C3zpeCDc5KqIYuZ1tgAdRXNoZP7vkBIRtKZPYe2xVr/d3SkirklCHudW1B45tGiUlz2pUWDfbDD4w=="], diff --git a/packages/scout-agent/lib/compaction.test.ts b/packages/scout-agent/lib/compaction.test.ts deleted file mode 100644 index 0d5b62b..0000000 --- a/packages/scout-agent/lib/compaction.test.ts +++ /dev/null @@ -1,625 +0,0 @@ -/** biome-ignore-all lint/suspicious/noExplicitAny: testing */ -import { describe, expect, test } from "bun:test"; -import { - applyCompaction, - COMPACT_CONVERSATION_TOOL_NAME, - createCompactionMessage, - createCompactionTool, - findCompactionSummary, - prepareTruncatedMessages, -} from "./compaction"; -import type { Message } from "./types"; - -describe("compaction", () => { - describe("findCompactionSummary", () => { - test("returns null when no compaction exists", () => { - const messages: Message[] = [ - { - id: "1", - role: "user", - parts: [{ type: "text", text: "Hello" }], - }, - { - id: "2", - role: "assistant", - parts: [{ type: "text", text: "Hi there!" }], - }, - ]; - - expect(findCompactionSummary(messages)).toBeNull(); - }); - - test("finds compaction summary in assistant message", () => { - const messages: Message[] = [ - { - id: "1", - role: "user", - parts: [{ type: "text", text: "Hello" }], - }, - { - id: "2", - role: "assistant", - parts: [ - { - type: `tool-${COMPACT_CONVERSATION_TOOL_NAME}`, - state: "output-available", - output: { - summary: "This is the summary of the conversation.", - compacted_at: "2024-01-01T00:00:00.000Z", - }, - } as any, - ], - }, - { - id: "3", - role: "user", - parts: [{ type: "text", text: "Continue" }], - }, - ]; - - const result = findCompactionSummary(messages); - expect(result).not.toBeNull(); - expect(result?.index).toBe(1); - expect(result?.summary).toBe("This is the summary of the conversation."); - }); - - test("finds most recent compaction when multiple exist", () => { - const messages: Message[] = [ - { - id: "1", - role: "assistant", - parts: [ - { - type: `tool-${COMPACT_CONVERSATION_TOOL_NAME}`, - state: "output-available", - output: { summary: "First summary" }, - } as any, - ], - }, - { - id: "2", - role: "user", - parts: [{ type: "text", text: "More conversation" }], - }, - { - id: "3", - role: "assistant", - parts: [ - { - type: `tool-${COMPACT_CONVERSATION_TOOL_NAME}`, - state: "output-available", - output: { summary: "Second summary" }, - } as any, - ], - }, - ]; - - const result = findCompactionSummary(messages); - expect(result?.index).toBe(2); - expect(result?.summary).toBe("Second summary"); - }); - - test("ignores compaction tool in non-output-available state", () => { - const messages: Message[] = [ - { - id: "1", - role: "assistant", - parts: [ - { - type: `tool-${COMPACT_CONVERSATION_TOOL_NAME}`, - state: "input-available", - input: { summary: "Not yet complete" }, - } as any, - ], - }, - ]; - - expect(findCompactionSummary(messages)).toBeNull(); - }); - - test("returns preservedMessageIds when present in output", () => { - const preservedIds = ["msg-4", "msg-5"]; - const messages: Message[] = [ - { - id: "1", - role: "assistant", - parts: [ - { - type: `tool-${COMPACT_CONVERSATION_TOOL_NAME}`, - state: "output-available", - output: { - summary: "Emergency summary", - preservedMessageIds: preservedIds, - }, - } as any, - ], - }, - ]; - - const result = findCompactionSummary(messages); - expect(result).not.toBeNull(); - expect(result?.preservedMessageIds).toEqual(preservedIds); - }); - }); - - describe("applyCompaction", () => { - test("returns original messages when no compaction exists", () => { - const messages: Message[] = [ - { - id: "1", - role: "user", - parts: [{ type: "text", text: "Hello" }], - }, - ]; - - const result = applyCompaction(messages); - expect(result).toEqual(messages); - }); - - test("replaces messages before compaction with summary", () => { - const messages: Message[] = [ - { - id: "1", - role: "user", - parts: [{ type: "text", text: "Old message 1" }], - }, - { - id: "2", - role: "assistant", - parts: [{ type: "text", text: "Old response 1" }], - }, - { - id: "3", - role: "assistant", - parts: [ - { - type: `tool-${COMPACT_CONVERSATION_TOOL_NAME}`, - state: "output-available", - output: { summary: "Summary of old messages" }, - } as any, - ], - }, - { - id: "4", - role: "user", - parts: [{ type: "text", text: "New message" }], - }, - ]; - - const result = applyCompaction(messages); - - // Should have: summary message + new message (compaction message excluded) - expect(result.length).toBe(2); - - // First message should be the summary - expect(result[0]?.id).toBe("compaction-summary"); - expect(result[0]?.role).toBe("user"); - expect(result[0]?.parts[0]?.type).toBe("text"); - expect((result[0]?.parts[0] as { text: string }).text).toInclude( - "Summary of old messages" - ); - - // Should include messages after the compaction point (excluding compaction itself) - expect(result[1]?.id).toBe("4"); - }); - - test("keeps preserved messages by ID when preservedMessageIds is present", () => { - const messages: Message[] = [ - { - id: "1", - role: "user", - parts: [{ type: "text", text: "Old message 1" }], - }, - { - id: "2", - role: "assistant", - parts: [{ type: "text", text: "Old response 1" }], - }, - { - id: "3", - role: "assistant", - parts: [ - { - type: `tool-${COMPACT_CONVERSATION_TOOL_NAME}`, - state: "output-available", - output: { - summary: "Summary of old messages", - preservedMessageIds: ["4", "5"], // Preserve specific messages - }, - } as any, - ], - }, - { - id: "4", - role: "user", - parts: [{ type: "text", text: "Preserved message 1" }], - }, - { - id: "5", - role: "assistant", - parts: [{ type: "text", text: "Preserved message 2" }], - }, - { - id: "6", - role: "user", - parts: [{ type: "text", text: "New message after compaction" }], - }, - ]; - - const result = applyCompaction(messages); - - // Should have: summary message + preserved messages (4, 5) + new message (6) - // Compaction tool call (3) is excluded since summary already contains the info - expect(result.length).toBe(4); - - // First message should be the summary - expect(result[0]?.id).toBe("compaction-summary"); - expect((result[0]?.parts[0] as { text: string }).text).toInclude( - "Summary of old messages" - ); - - // Should include messages after compaction point (excluding the compaction itself) - expect(result[1]?.id).toBe("4"); - expect(result[2]?.id).toBe("5"); - expect(result[3]?.id).toBe("6"); // new message after compaction is preserved - }); - }); - - describe("createCompactionTool", () => { - test("creates tool with correct name and schema", () => { - const tools = createCompactionTool(); - - expect(tools[COMPACT_CONVERSATION_TOOL_NAME]).toBeDefined(); - expect(tools[COMPACT_CONVERSATION_TOOL_NAME].description).toInclude( - "Compact the conversation history" - ); - }); - - test("tool execute returns summary in result", async () => { - const tools = createCompactionTool(); - const compactionTool = tools[COMPACT_CONVERSATION_TOOL_NAME]; - - const result = (await compactionTool.execute?.( - { summary: "Test summary content" }, - { abortSignal: new AbortController().signal } as any - )) as { summary: string; compacted_at: string; message: string }; - - expect(result.summary).toBe("Test summary content"); - expect(result.compacted_at).toBeDefined(); - expect(result.message).toInclude("compacted"); - }); - - test("tool execute includes preservedMessageIds when provided", async () => { - const preservedIds = ["msg-4", "msg-5", "msg-6"]; - const tools = createCompactionTool(preservedIds); - const compactionTool = tools[COMPACT_CONVERSATION_TOOL_NAME]; - - const result = (await compactionTool.execute?.( - { summary: "Emergency summary" }, - { abortSignal: new AbortController().signal } as any - )) as { - summary: string; - compacted_at: string; - message: string; - preservedMessageIds?: string[]; - }; - - expect(result.summary).toBe("Emergency summary"); - expect(result.preservedMessageIds).toEqual(preservedIds); - }); - - test("tool execute does not include preservedMessageIds when not provided", async () => { - const tools = createCompactionTool(); - const compactionTool = tools[COMPACT_CONVERSATION_TOOL_NAME]; - - const result = (await compactionTool.execute?.( - { summary: "Normal summary" }, - { abortSignal: new AbortController().signal } as any - )) as { - summary: string; - compacted_at: string; - message: string; - preservedMessageIds?: string[]; - }; - - expect(result.preservedMessageIds).toBeUndefined(); - }); - }); - - describe("createCompactionMessage", () => { - test("creates compaction message with token info when provided", () => { - const message = createCompactionMessage({ - tokenCount: 80000, - threshold: 100000, - }); - - expect(message.id).toStartWith("compaction-request-"); - expect(message.role).toBe("user"); - const textPart = message.parts[0] as { text: string }; - expect(textPart.text).toInclude("80%"); - expect(textPart.text).toInclude("80,000"); - expect(textPart.text).toInclude("compact_conversation"); - }); - - test("creates compaction message without token info when not provided", () => { - const message = createCompactionMessage(); - - expect(message.id).toStartWith("compaction-request-"); - expect(message.role).toBe("user"); - const textPart = message.parts[0] as { text: string }; - expect(textPart.text).toInclude("compact_conversation"); - expect(textPart.text).not.toInclude("%"); // No percentage - }); - }); - - describe("prepareTruncatedMessages", () => { - test("returns empty arrays for empty messages", async () => { - const result = await prepareTruncatedMessages({ - messages: [], - tokenLimit: 1000, - modelName: "anthropic/claude-sonnet-4", - }); - - expect(result.messagesToProcess).toEqual([]); - expect(result.messagesToPreserve).toEqual([]); - }); - - test("includes all messages when under token limit", async () => { - const messages: Message[] = [ - { - id: "1", - role: "user", - parts: [{ type: "text", text: "Hello" }], - }, - { - id: "2", - role: "assistant", - parts: [{ type: "text", text: "Hi there!" }], - }, - ]; - - const result = await prepareTruncatedMessages({ - messages, - tokenLimit: 100000, // Very high limit - modelName: "anthropic/claude-sonnet-4", - }); - - expect(result.messagesToProcess.length).toBe(2); - expect(result.messagesToPreserve.length).toBe(0); - }); - - test("truncates messages when over token limit", async () => { - // Create messages with enough content to have measurable tokens - const messages: Message[] = Array.from({ length: 10 }, (_, i) => ({ - id: `${i + 1}`, - role: i % 2 === 0 ? "user" : "assistant", - parts: [ - { - type: "text", - text: `This is message number ${i + 1} with some additional content to increase token count.`, - }, - ], - })) as Message[]; - - const result = await prepareTruncatedMessages({ - messages, - tokenLimit: 100, // Low limit to force truncation - modelName: "anthropic/claude-sonnet-4", - }); - - // Should have truncated - not all messages in messagesToProcess - expect(result.messagesToProcess.length).toBeLessThan(10); - expect(result.messagesToProcess.length).toBeGreaterThan(0); - - // The rest should be in messagesToPreserve - expect( - result.messagesToProcess.length + result.messagesToPreserve.length - ).toBe(10); - - // First message should be in messagesToProcess (oldest first) - expect(result.messagesToProcess[0]?.id).toBe("1"); - }); - - test("includes at least one message even if it exceeds token limit", async () => { - const messages: Message[] = [ - { - id: "1", - role: "user", - parts: [ - { - type: "text", - text: "This is a message with enough content to exceed a very small token limit.", - }, - ], - }, - ]; - - const result = await prepareTruncatedMessages({ - messages, - tokenLimit: 1, // Impossibly small limit - modelName: "anthropic/claude-sonnet-4", - }); - - // Should still include the one message - expect(result.messagesToProcess.length).toBe(1); - expect(result.messagesToPreserve.length).toBe(0); - }); - - }); - - describe("processCompaction", () => { - const noopLogger = { - info: () => {}, - warn: () => {}, - error: () => {}, - }; - - test("returns empty compactionTool when under soft threshold", async () => { - const { processCompaction } = await import("./compaction"); - - const messages: Message[] = [ - { - id: "1", - role: "user", - parts: [{ type: "text", text: "Hello" }], - }, - ]; - - const result = await processCompaction({ - messages, - softTokenThreshold: 1_000_000, // Very high threshold - hardTokenThreshold: 1_100_000, - model: "anthropic/claude-sonnet-4", - logger: noopLogger, - }); - - expect(result.messages).toEqual(messages); - expect(Object.keys(result.compactionTool)).toHaveLength(0); - }); - - test("returns compactionTool when soft threshold exceeded", async () => { - const { processCompaction } = await import("./compaction"); - - const messages: Message[] = [ - { - id: "1", - role: "user", - parts: [ - { type: "text", text: "Hello world, this is a test message." }, - ], - }, - ]; - - const result = await processCompaction({ - messages, - softTokenThreshold: 1, // Very low threshold - hardTokenThreshold: 100_000, // High hard threshold so no truncation - model: "anthropic/claude-sonnet-4", - logger: noopLogger, - }); - - // Should have compaction tool - expect(Object.keys(result.compactionTool)).toHaveLength(1); - expect( - result.compactionTool[COMPACT_CONVERSATION_TOOL_NAME] - ).toBeDefined(); - - // Should have injected compaction message - expect(result.messages.length).toBe(2); - const compactionRequest = result.messages.find((m) => - m.id.startsWith("compaction-request-") - ); - expect(compactionRequest).toBeDefined(); - }); - - test("applies existing compaction summary", async () => { - const { processCompaction } = await import("./compaction"); - - const messages: Message[] = [ - { - id: "1", - role: "user", - parts: [{ type: "text", text: "Old message" }], - }, - { - id: "2", - role: "assistant", - parts: [ - { - type: `tool-${COMPACT_CONVERSATION_TOOL_NAME}`, - state: "output-available", - output: { summary: "Summary of conversation" }, - } as any, - ], - }, - { - id: "3", - role: "user", - parts: [{ type: "text", text: "New message" }], - }, - ]; - - const result = await processCompaction({ - messages, - softTokenThreshold: 1_000_000, // High threshold so no new compaction - hardTokenThreshold: 1_100_000, - model: "anthropic/claude-sonnet-4", - logger: noopLogger, - }); - - // Should have applied compaction (summary + new message, compaction tool call excluded) - expect(result.messages.length).toBe(2); - expect(result.messages[0]?.id).toBe("compaction-summary"); - expect(result.messages[1]?.id).toBe("3"); - }); - - test("throws error when soft threshold >= hard threshold", async () => { - const { processCompaction } = await import("./compaction"); - - const messages: Message[] = [ - { - id: "1", - role: "user", - parts: [{ type: "text", text: "Hello" }], - }, - ]; - - await expect( - processCompaction({ - messages, - softTokenThreshold: 100_000, - hardTokenThreshold: 100_000, // Equal to soft - invalid - model: "anthropic/claude-sonnet-4", - logger: noopLogger, - }) - ).rejects.toThrow("Soft token threshold"); - - await expect( - processCompaction({ - messages, - softTokenThreshold: 200_000, - hardTokenThreshold: 100_000, // Less than soft - invalid - model: "anthropic/claude-sonnet-4", - logger: noopLogger, - }) - ).rejects.toThrow("Soft token threshold"); - }); - - test("truncates messages at hard threshold and preserves rest", async () => { - const { processCompaction } = await import("./compaction"); - - // Create enough messages to exceed soft threshold but require truncation at hard - // Each message is ~25 tokens, so 20 messages = ~500 tokens - const messages: Message[] = Array.from({ length: 20 }, (_, i) => ({ - id: `${i + 1}`, - role: i % 2 === 0 ? "user" : "assistant", - parts: [ - { - type: "text", - text: `Message ${i + 1}: This is a longer message with additional content to generate more tokens for testing purposes.`, - }, - ], - })) as Message[]; - - const result = await processCompaction({ - messages, - softTokenThreshold: 1, // Trigger compaction immediately - hardTokenThreshold: 300, // ~12 messages worth, forces truncation - model: "anthropic/claude-sonnet-4", - logger: noopLogger, - }); - - // Should have compaction tool with preserved message IDs - expect(Object.keys(result.compactionTool)).toHaveLength(1); - - // Messages should be truncated (fewer than original 20 + compaction message) - // With 300 token limit and ~25 tokens per message, expect ~12 messages + compaction = 13 - expect(result.messages.length).toBeLessThan(21); - expect(result.messages.length).toBeGreaterThan(0); - - // Last message should be compaction request - const lastMessage = result.messages[result.messages.length - 1]; - expect(lastMessage?.id).toMatch(/^compaction-request-/); - }); - }); -}); diff --git a/packages/scout-agent/lib/compaction.ts b/packages/scout-agent/lib/compaction.ts deleted file mode 100644 index ef1cc38..0000000 --- a/packages/scout-agent/lib/compaction.ts +++ /dev/null @@ -1,435 +0,0 @@ -import { - convertToModelMessages, - type LanguageModel, - type ModelMessage, - type Tool, - tool, -} from "ai"; -import { z } from "zod"; -import type { Logger, Message } from "./types"; - -/** - * Tool name for conversation compaction. - * Used to identify compaction tool results in message history. - */ -export const COMPACT_CONVERSATION_TOOL_NAME = "compact_conversation" as const; - -/** - * Default soft token threshold for triggering compaction. - * When conversation tokens reach this limit, compaction is triggered. - */ -export const DEFAULT_SOFT_TOKEN_THRESHOLD = 180_000; - -/** - * Default hard token threshold for compaction. - * Messages beyond this limit are excluded from compaction and preserved. - * Must be greater than soft threshold. - */ -export const DEFAULT_HARD_TOKEN_THRESHOLD = 190_000; - -/** - * Get the model configuration for token counting. - * Defaults to Claude Sonnet if model not found. - */ -function getModelConfig(models: Record, modelName: string) { - // Try to find exact match first - if (modelName in models) { - return models[modelName as keyof typeof models]; - } - // Default to Claude Sonnet for Anthropic models - if (modelName.includes("anthropic") || modelName.includes("claude")) { - return models["anthropic/claude-sonnet-4"]; - } - // Default to GPT-5 for OpenAI models - if (modelName.includes("openai") || modelName.includes("gpt")) { - return models["openai/gpt-5"]; - } - // Fallback - return models["anthropic/claude-sonnet-4"]; -} - -/** - * Result of counting tokens for messages. - */ -export interface TokenCountResult { - /** Total tokens across all messages */ - total: number; - /** Token count for each message */ - perMessage: number[]; -} - -/** - * Counts tokens for messages using ai-tokenizer. - * Returns both total and per-message token counts for efficient processing. - */ -export async function countConversationTokens( - messages: ModelMessage[], - modelName: string = "anthropic/claude-sonnet-4" -): Promise { - // we import the modules dynamically because otherwise the - // agent starts up super slow and blink cloud times out during deployment - const aiTokenizer = await import("ai-tokenizer"); - const encoding = await import("ai-tokenizer/encoding/o200k_base"); - const tokenizerSdk = await import("ai-tokenizer/sdk"); - - const model = getModelConfig(aiTokenizer.models, modelName); - const tokenizer = new aiTokenizer.Tokenizer(encoding); - - const result = tokenizerSdk.count({ - // biome-ignore lint/suspicious/noExplicitAny: weird typing error - tokenizer: tokenizer as any, - // biome-ignore lint/suspicious/noExplicitAny: weird typing error - model: model as any, - messages, - }); - - return { - total: result.total, - perMessage: result.messages.map((m) => m.total), - }; -} - -/** - * Finds the most recent compaction summary in the message history. - * Returns the index of the message containing the compaction, the summary text, - * and optionally the preserved message IDs. - */ -export function findCompactionSummary(messages: Message[]): { - index: number; - summary: string; - preservedMessageIds?: string[]; -} | null { - // Search from the end to find the most recent compaction - for (let i = messages.length - 1; i >= 0; i--) { - const message = messages[i]; - if (message?.role !== "assistant") { - continue; - } - - for (const part of message.parts) { - // Check if this is our compaction tool - if (part.type === `tool-${COMPACT_CONVERSATION_TOOL_NAME}`) { - const toolPart = part as { - state: string; - output?: { summary?: string; preservedMessageIds?: string[] }; - }; - if (toolPart.state === "output-available" && toolPart.output?.summary) { - return { - index: i, - summary: toolPart.output.summary, - preservedMessageIds: toolPart.output.preservedMessageIds, - }; - } - } - } - } - return null; -} - -/** - * Processes messages to apply compaction if a compaction summary exists. - * Returns messages with history before the compaction replaced by a summary message. - */ -export function applyCompaction(messages: Message[]): Message[] { - const compaction = findCompactionSummary(messages); - if (!compaction) { - return messages; - } - - // Create a synthetic user message with the compacted summary - const summaryMessage: Message = { - id: "compaction-summary", - role: "user", - parts: [ - { - type: "text", - text: `[CONVERSATION SUMMARY - Previous messages have been compacted to save context space]\n\n${compaction.summary}\n\n[END OF SUMMARY - Conversation continues below]`, - }, - ], - }; - - // Get messages after the compaction point (excludes the compaction tool call itself) - const messagesAfterCompaction = messages.slice(compaction.index + 1); - - // Check for preserved message IDs (from hard threshold truncation) - if ( - compaction.preservedMessageIds && - compaction.preservedMessageIds.length > 0 - ) { - // Keep summary + preserved messages by ID + messages after compaction - const preservedIdSet = new Set(compaction.preservedMessageIds); - const preserved = messages.filter((m) => preservedIdSet.has(m.id)); - - // Combine preserved messages with messages after compaction (deduplicated) - const afterCompactionIds = new Set( - messagesAfterCompaction.map((m) => m.id) - ); - const preservedNotInAfter = preserved.filter( - (m) => !afterCompactionIds.has(m.id) - ); - - return [summaryMessage, ...preservedNotInAfter, ...messagesAfterCompaction]; - } - - // Normal compaction: keep messages from the compaction point onwards - return [summaryMessage, ...messagesAfterCompaction]; -} - -/** - * Creates the compact_conversation tool. - * This tool should be called by the model when the conversation is getting too long. - * - * @param preservedMessageIds - Optional array of message IDs that should be preserved - * after compaction. Used during emergency compaction to track which recent messages - * were not sent to the model but should be restored after the summary. - */ -export function createCompactionTool( - preservedMessageIds?: string[] -): Record { - return { - [COMPACT_CONVERSATION_TOOL_NAME]: tool({ - description: `Compact the conversation history to save context space. Call this tool when instructed that the conversation is approaching context limits. Provide a detailed and thorough summary that captures: -- The main topics discussed -- Key decisions made -- Important code changes or file modifications (include file paths and what was changed) -- Any ongoing tasks or action items -- Critical context needed to continue the conversation -- Relevant technical details, configurations, or environment information -- Any errors encountered and how they were resolved - -Be thorough and detailed. This summary will replace the earlier conversation history, so include all information needed to continue effectively.`, - inputSchema: z.object({ - summary: z - .string() - .describe( - "A detailed and thorough summary of the conversation so far, including all important context needed to continue effectively." - ), - }), - execute: async ({ summary }) => { - // The summary is stored in the tool result and will be processed - // by applyCompaction() on subsequent messages - return { - summary, - compacted_at: new Date().toISOString(), - message: - "Conversation history has been compacted. The summary will be used to maintain context in future messages.", - ...(preservedMessageIds && - preservedMessageIds.length > 0 && { preservedMessageIds }), - }; - }, - }), - }; -} - -/** - * Creates a compaction request message asking the model to summarize the conversation. - * Uses a consistent ID ("compaction-request") for retry detection. - */ -export function createCompactionMessage(options?: { - tokenCount?: number; - threshold?: number; -}): Message { - let contextInfo = ""; - if (options?.tokenCount && options?.threshold) { - const percentUsed = Math.round( - (options.tokenCount / options.threshold) * 100 - ); - contextInfo = `\n\nThe conversation has used approximately ${percentUsed}% of the available context (${options.tokenCount.toLocaleString()} tokens).`; - } - - return { - id: `compaction-request-${Date.now()}`, - role: "user", - parts: [ - { - type: "text", - text: `[SYSTEM NOTICE - CONTEXT LIMIT]${contextInfo} - -To prevent context overflow errors, please call the \`compact_conversation\` tool NOW to summarize the conversation history. - -Provide a detailed and thorough summary that captures all important context, decisions, code changes, file paths, and ongoing tasks. Do not leave out important details.`, - }, - ], - }; -} - -/** - * Options for preparing truncated messages. - */ -export interface PrepareTruncatedMessagesOptions { - /** All messages to consider for truncation */ - messages: Message[]; - /** Maximum token count for messages to process */ - tokenLimit: number; - /** Model name for token counting */ - modelName: string; -} - -/** - * Result of preparing truncated messages. - */ -export interface PrepareTruncatedMessagesResult { - /** Messages to send for summarization (older messages, within token limit) */ - messagesToProcess: Message[]; - /** Messages to preserve and restore after compaction */ - messagesToPreserve: Message[]; -} - -/** - * Prepares messages for a truncated compaction attempt. - * Accumulates messages from the start (oldest first) until adding more would exceed the token limit. - * - * @returns Messages split into those to process (summarize) and those to preserve - */ -export async function prepareTruncatedMessages( - options: PrepareTruncatedMessagesOptions -): Promise { - const { messages, tokenLimit, modelName } = options; - - if (messages.length === 0) { - return { messagesToProcess: [], messagesToPreserve: [] }; - } - - // Convert all messages once and get per-message token counts - const converted = convertToModelMessages(messages, { - ignoreIncompleteToolCalls: true, - }); - const { perMessage } = await countConversationTokens(converted, modelName); - - // Find the split point by accumulating token counts - // until we would exceed the token limit - let splitPoint = 0; - let cumulativeTokens = 0; - - for (let i = 0; i < perMessage.length; i++) { - cumulativeTokens += perMessage[i] ?? 0; - if (cumulativeTokens > tokenLimit) { - // Adding this message would exceed the limit - break; - } - splitPoint = i + 1; - } - - // Ensure we have at least one message to process (if possible) - if (splitPoint === 0 && messages.length > 0) { - // Even the first message exceeds the limit, but we need to process something - splitPoint = 1; - } - - const messagesToProcess = messages.slice(0, splitPoint); - const messagesToPreserve = messages.slice(splitPoint); - - return { - messagesToProcess, - messagesToPreserve, - }; -} - -/** - * Options for processing compaction. - */ -export interface ProcessCompactionOptions { - messages: Message[]; - /** Soft threshold - triggers compaction when reached */ - softTokenThreshold: number; - /** Hard threshold - max tokens to send for compaction; rest are preserved */ - hardTokenThreshold: number; - model: LanguageModel | string; - logger: Logger; -} - -/** - * Result of processing compaction. - */ -export interface ProcessCompactionResult { - messages: Message[]; - compactionTool: Record; -} - -/** - * Extracts model name from a LanguageModel or string. - */ -function getModelName(model: LanguageModel | string): string { - if (typeof model === "string") { - return model; - } - if ("modelId" in model) { - return model.modelId; - } - return "anthropic/claude-sonnet-4"; -} - -/** - * Processes messages for compaction. - * Applies any existing compaction summary, checks token count against soft threshold, - * and truncates at hard threshold when compacting. - */ -export async function processCompaction( - options: ProcessCompactionOptions -): Promise { - const { messages, softTokenThreshold, hardTokenThreshold, model, logger } = - options; - - // Validate thresholds - if (softTokenThreshold >= hardTokenThreshold) { - throw new Error( - `Soft token threshold (${softTokenThreshold}) must be less than hard token threshold (${hardTokenThreshold})` - ); - } - - const modelName = getModelName(model); - - // Apply compaction if a compaction summary exists in the message history - const compactedMessages = applyCompaction(messages); - if (compactedMessages.length === 0) { - return { messages: [], compactionTool: {} }; - } - - // Check token count and handle compaction - let preservedMessageIds: string[] | undefined; - - // We need to convert messages to count tokens accurately - const tempConverted = convertToModelMessages(compactedMessages, { - ignoreIncompleteToolCalls: true, - }); - const { total: tokenCount } = await countConversationTokens( - tempConverted, - modelName - ); - - if (tokenCount < softTokenThreshold) { - return { messages: compactedMessages, compactionTool: {} }; - } - - // Soft threshold reached - trigger compaction - logger.info( - `Conversation approaching context limit: ${tokenCount.toLocaleString()} tokens (soft threshold: ${softTokenThreshold.toLocaleString()})` - ); - - // Truncate messages at hard threshold to ensure compaction request fits - const { messagesToProcess, messagesToPreserve } = - await prepareTruncatedMessages({ - messages: compactedMessages, - tokenLimit: hardTokenThreshold, - modelName, - }); - - // Store preserved message IDs for the compaction tool result - if (messagesToPreserve.length > 0) { - preservedMessageIds = messagesToPreserve.map((m) => m.id); - logger.info( - `Compaction: sending ${messagesToProcess.length} messages for summarization, preserving ${messagesToPreserve.length} recent messages` - ); - } - - return { - messages: [ - ...messagesToProcess, - createCompactionMessage({ - tokenCount, - threshold: softTokenThreshold, - }), - ], - compactionTool: createCompactionTool(preservedMessageIds), - }; -} diff --git a/packages/scout-agent/lib/core.test.ts b/packages/scout-agent/lib/core.test.ts index 0940bf6..7f10136 100644 --- a/packages/scout-agent/lib/core.test.ts +++ b/packages/scout-agent/lib/core.test.ts @@ -17,7 +17,7 @@ import { mockCoderWorkspace, noopLogger, } from "./compute/test-utils"; -import { COMPACT_CONVERSATION_TOOL_NAME, type Message, Scout } from "./index"; +import { type Message, Scout } from "./index"; import { createMockBlinkApiServer, withBlinkApiUrl } from "./test-helpers"; // Add async iterator support to ReadableStream for testing @@ -948,474 +948,3 @@ describe("coder integration", () => { expect(mockClient.getAppHost).toHaveBeenCalled(); }); }); - -describe("compaction", () => { - test("buildStreamTextParams does not include compaction tool when under threshold", async () => { - const agent = new blink.Agent(); - const scout = new Scout({ - agent, - logger: noopLogger, - }); - - const params = await scout.buildStreamTextParams({ - chatID: "test-chat-id" as blink.ID, - messages: [ - { - id: "1", - role: "user", - parts: [{ type: "text", text: "Hello" }], - }, - ], - model: newMockModel({ textResponse: "test" }), - }); - - // Verify compaction tool is NOT included when under threshold - expect(params.tools[COMPACT_CONVERSATION_TOOL_NAME]).toBeUndefined(); - }); - - test("buildStreamTextParams applies existing compaction summary", async () => { - const infoLogs: string[] = []; - const mockLogger = { - ...noopLogger, - info: (...args: unknown[]) => { - infoLogs.push(args.map(String).join(" ")); - }, - }; - - const agent = new blink.Agent(); - const scout = new Scout({ - agent, - logger: mockLogger, - }); - - // Create messages with an existing compaction summary - const messagesWithCompaction: Message[] = [ - { - id: "1", - role: "user", - parts: [{ type: "text", text: "Old message 1" }], - }, - { - id: "2", - role: "assistant", - parts: [{ type: "text", text: "Old response 2" }], - }, - { - id: "3", - role: "user", - parts: [{ type: "text", text: "Old message 3" }], - }, - { - id: "4", - role: "assistant", - parts: [{ type: "text", text: "Old response 4" }], - }, - { - id: "5", - role: "user", - parts: [{ type: "text", text: "Old message 5" }], - }, - { - id: "6", - role: "assistant", - parts: [{ type: "text", text: "Old response 6" }], - }, - { - id: "7", - role: "assistant", - parts: [ - { - type: `tool-${COMPACT_CONVERSATION_TOOL_NAME}`, - toolCallId: "tool-call-1", - state: "output-available", - input: { summary: "Summary of old messages" }, - output: { summary: "Summary of old messages" }, - } as unknown as Message["parts"][number], - ], - }, - { - id: "8", - role: "user", - parts: [{ type: "text", text: "New message after compaction" }], - }, - ]; - - const params = await scout.buildStreamTextParams({ - chatID: "test-chat-id" as blink.ID, - messages: messagesWithCompaction, - model: newMockModel({ textResponse: "test" }), - // Disable threshold to avoid token counting affecting message count - compaction: { - softThreshold: Number.MAX_SAFE_INTEGER - 1, - hardThreshold: Number.MAX_SAFE_INTEGER, - }, - }); - - // Verify messages were processed: should have system + summary + new msg - // The converted messages include: system prompt, compaction-summary user msg, and the new user msg - // (compaction tool call is excluded since the summary already contains the info) - expect(params.messages.length).toBe(3); - }); - - test("buildStreamTextParams injects compaction message when threshold exceeded", async () => { - const warnLogs: string[] = []; - const infoLogs: string[] = []; - const mockLogger = { - ...noopLogger, - warn: (...args: unknown[]) => { - warnLogs.push(args.map(String).join(" ")); - }, - info: (...args: unknown[]) => { - infoLogs.push(args.map(String).join(" ")); - }, - }; - - const agent = new blink.Agent(); - const scout = new Scout({ - agent, - logger: mockLogger, - }); - - // Create a message that will exceed a very low threshold - const params = await scout.buildStreamTextParams({ - chatID: "test-chat-id" as blink.ID, - messages: [ - { - id: "1", - role: "user", - parts: [ - { type: "text", text: "Hello world, this is a test message." }, - ], - }, - ], - model: newMockModel({ textResponse: "test" }), - compaction: { - // Set a very low threshold so any message exceeds it - softThreshold: 1, - hardThreshold: 100_000, // High hard threshold so no truncation - }, - }); - - // Verify compaction message was injected (system + user + compaction request = 3 messages) - expect(params.messages.length).toBe(3); - - // Check that the last message contains compaction request - const compactionRequest = params.messages.find( - (m) => - m.role === "user" && - (typeof m.content === "string" - ? m.content.includes("CONTEXT LIMIT") - : Array.isArray(m.content) && - m.content.some( - (c) => - c.type === "text" && - (c as { text: string }).text.includes("CONTEXT LIMIT") - )) - ); - expect(compactionRequest).toBeDefined(); - - // Verify compaction tool IS available when compaction is triggered - expect(params.tools[COMPACT_CONVERSATION_TOOL_NAME]).toBeDefined(); - }); - - test("buildStreamTextParams respects compaction: false to disable", async () => { - const warnLogs: string[] = []; - const mockLogger = { - ...noopLogger, - warn: (...args: unknown[]) => { - warnLogs.push(args.map(String).join(" ")); - }, - }; - - const agent = new blink.Agent(); - const scout = new Scout({ - agent, - logger: mockLogger, - }); - - const params = await scout.buildStreamTextParams({ - chatID: "test-chat-id" as blink.ID, - messages: [ - { - id: "1", - role: "user", - parts: [ - { type: "text", text: "Hello world, this is a test message." }, - ], - }, - ], - model: newMockModel({ textResponse: "test" }), - compaction: false, - }); - - // Compaction tool should NOT be available when compaction is disabled - expect(params.tools[COMPACT_CONVERSATION_TOOL_NAME]).toBeUndefined(); - - // No warning should be logged even with messages - const warningLog = warnLogs.find((l) => - l.includes("approaching context limit") - ); - expect(warningLog).toBeUndefined(); - - // Only system + user message (no warning injected) - expect(params.messages.length).toBe(2); - }); - - test("buildStreamTextParams truncates messages at hard threshold during compaction", async () => { - const warnLogs: string[] = []; - const infoLogs: string[] = []; - const mockLogger = { - ...noopLogger, - warn: (...args: unknown[]) => { - warnLogs.push(args.map(String).join(" ")); - }, - info: (...args: unknown[]) => { - infoLogs.push(args.map(String).join(" ")); - }, - }; - - const agent = new blink.Agent(); - const scout = new Scout({ - agent, - logger: mockLogger, - }); - - // Create many messages that will exceed soft threshold and require truncation at hard - const messages: Message[] = Array.from({ length: 20 }, (_, i) => ({ - id: `${i + 1}`, - role: i % 2 === 0 ? "user" : "assistant", - parts: [ - { - type: "text", - text: `Message ${i + 1}: This is a longer message with additional content to generate more tokens for testing purposes. ${Array(100).fill("abcdefg").join("")}`, - }, - ], - })) as Message[]; - - const params = await scout.buildStreamTextParams({ - chatID: "test-chat-id" as blink.ID, - messages, - model: newMockModel({ textResponse: "test" }), - compaction: { - // Low soft threshold to trigger compaction - softThreshold: 1, - // Low hard threshold to force truncation - hardThreshold: 500, - }, - }); - - // Verify info log about truncation (when preserving messages) - const truncationLog = infoLogs.find((l) => - l.includes("Compaction: sending") - ); - expect(truncationLog).toBeDefined(); - - // Verify compaction tool IS available - expect(params.tools[COMPACT_CONVERSATION_TOOL_NAME]).toBeDefined(); - - // Verify that messages were truncated (not all 20 messages + system) - // Should have: system + truncated messages + compaction request - expect(params.messages.length).toBeLessThan(10); - - // Verify compaction request message is present - const compactionRequest = params.messages.find( - (m) => - m.role === "user" && - (typeof m.content === "string" - ? m.content.includes("CONTEXT LIMIT") - : Array.isArray(m.content) && - m.content.some( - (c) => - c.type === "text" && - (c as { text: string }).text.includes("CONTEXT LIMIT") - )) - ); - expect(compactionRequest).toBeDefined(); - }); - - test("compaction loop: after model summarizes, second call does not trigger another compaction", async () => { - const infoLogs: string[] = []; - const mockLogger = { - ...noopLogger, - info: (...args: unknown[]) => { - infoLogs.push(args.map(String).join(" ")); - }, - }; - - const agent = new blink.Agent(); - const scout = new Scout({ - agent, - logger: mockLogger, - }); - - // Use thresholds that will be exceeded by original messages but not by compacted ones - // Original messages: ~10 messages with 700 chars each = high token count - // After compaction: summary + preserved messages should be under soft threshold - const softThreshold = 2000; - const hardThreshold = 3000; - - // Step 1: Create large messages that will exceed soft threshold - // Each message has ~700 characters of filler to generate significant tokens - const filler = Array(100).fill("abcdefg").join(""); - const originalMessages: Message[] = Array.from({ length: 10 }, (_, i) => ({ - id: `${i + 1}`, - role: i % 2 === 0 ? "user" : "assistant", - parts: [ - { - type: "text", - text: `Message ${i + 1}: ${filler}`, - }, - ], - })) as Message[]; - - // Create a mock model that returns a tool call to compact_conversation - // The tool is wrapped with withModelIntent, so input needs model_intent and properties - const summaryText = "Brief summary of the conversation."; - const mockModelWithToolCall = new MockLanguageModelV2({ - doStream: async () => { - return { - stream: simulateReadableStream({ - chunks: [ - { - type: "tool-call" as const, - toolName: COMPACT_CONVERSATION_TOOL_NAME, - toolCallId: "tool-call-1", - input: JSON.stringify({ - model_intent: "Compacting conversation history", - properties: { summary: summaryText }, - }), - }, - { - type: "finish" as const, - finishReason: "tool-calls" as const, - logprobs: undefined, - usage: { inputTokens: 100, outputTokens: 50, totalTokens: 150 }, - }, - ], - }), - }; - }, - }); - - // First call - should trigger compaction, model responds with tool call - const firstParams = await scout.buildStreamTextParams({ - chatID: "test-chat-id" as blink.ID, - messages: originalMessages, - model: mockModelWithToolCall, - compaction: { softThreshold, hardThreshold }, - }); - - // Verify compaction was triggered - expect(firstParams.tools[COMPACT_CONVERSATION_TOOL_NAME]).toBeDefined(); - - // Execute streamText and wait for completion (including tool execution) - const firstResult = streamText(firstParams); - - // Wait for the full result including tool calls and their results - const toolCalls = await firstResult.toolCalls; - const toolResults = await firstResult.toolResults; - - // Verify the model called the compaction tool - expect(toolCalls).toHaveLength(1); - expect(toolCalls[0]?.toolName).toBe(COMPACT_CONVERSATION_TOOL_NAME); - expect(toolResults).toHaveLength(1); - - // The tool should have executed and returned a summary - // biome-ignore lint/suspicious/noExplicitAny: test typing - const toolResult = toolResults[0] as any; - expect(toolResult?.output).toBeDefined(); - // The output contains the summary from the compaction tool - expect(toolResult?.output?.summary).toBe(summaryText); - - // Now build the assistant message with the completed tool call - // biome-ignore lint/suspicious/noExplicitAny: test typing - const toolCall = toolCalls[0] as any; - const assistantMessage: Message = { - id: crypto.randomUUID(), - role: "assistant", - parts: [ - { - type: `tool-${COMPACT_CONVERSATION_TOOL_NAME}`, - toolCallId: toolCall?.toolCallId ?? "tool-1", - state: "output-available", - // The input has model_intent wrapper, but we store the unwrapped version - input: { summary: summaryText }, - output: toolResult?.output, - } as Message["parts"][number], - ], - }; - - // Construct the full message history as it would be after the first turn - // Original messages + compaction request + assistant's tool call response - const messagesForSecondCall: Message[] = [ - ...originalMessages, - { - id: "compaction-request", - role: "user", - parts: [ - { - type: "text", - text: "[SYSTEM NOTICE - CONTEXT LIMIT] Please call compact_conversation tool NOW", - }, - ], - }, - // The assistant's response with the completed tool call - assistantMessage, - ]; - - // Clear logs before second call - infoLogs.length = 0; - - // Step 2: Second call - after compaction is applied, should NOT trigger another compaction - const secondParams = await scout.buildStreamTextParams({ - chatID: "test-chat-id" as blink.ID, - messages: messagesForSecondCall, - model: newMockModel({ textResponse: "Continuing the conversation..." }), - compaction: { softThreshold, hardThreshold }, - }); - - // After applying compaction: - // - Original 10 messages + compaction request should be replaced by summary - // - Only summary message + tool call message remain - // - Token count should be much lower now - - // Verify NO new compaction was triggered - const secondCompactionRequest = secondParams.messages.find( - (m) => - m.role === "user" && - (typeof m.content === "string" - ? m.content.includes("CONTEXT LIMIT") - : Array.isArray(m.content) && - m.content.some( - (c) => - c.type === "text" && - (c as { text: string }).text.includes("CONTEXT LIMIT") - )) - ); - expect(secondCompactionRequest).toBeUndefined(); - - // Compaction tool should NOT be included since we're under threshold after applying summary - expect(secondParams.tools[COMPACT_CONVERSATION_TOOL_NAME]).toBeUndefined(); - - // Verify the summary message is present (compaction was applied) - const summaryMessage = secondParams.messages.find( - (m) => - m.role === "user" && - (typeof m.content === "string" - ? m.content.includes("CONVERSATION SUMMARY") - : Array.isArray(m.content) && - m.content.some( - (c) => - c.type === "text" && - (c as { text: string }).text.includes("CONVERSATION SUMMARY") - )) - ); - expect(summaryMessage).toBeDefined(); - - // No "approaching context limit" log should appear in second call - const contextLimitLog = infoLogs.find((l) => - l.includes("approaching context limit") - ); - expect(contextLimitLog).toBeUndefined(); - }); -}); diff --git a/packages/scout-agent/lib/core.ts b/packages/scout-agent/lib/core.ts index b508037..9e334c7 100644 --- a/packages/scout-agent/lib/core.ts +++ b/packages/scout-agent/lib/core.ts @@ -6,11 +6,6 @@ import * as slack from "@blink-sdk/slack"; import type { App } from "@slack/bolt"; import { convertToModelMessages, type LanguageModel, type Tool } from "ai"; import type * as blink from "blink"; -import { - DEFAULT_HARD_TOKEN_THRESHOLD, - DEFAULT_SOFT_TOKEN_THRESHOLD, - processCompaction, -} from "./compaction"; import { type CoderApiClient, type CoderWorkspaceInfo, @@ -59,29 +54,6 @@ export interface BuildStreamTextParamsOptions { * If not provided, the GitHub auth context will be created using the app ID and private key from the GitHub config. */ getGithubAppContext?: () => Promise; - /** - * Configuration for conversation compaction. - * If not provided, compaction features are enabled with default thresholds. - * Set to `false` to disable compaction entirely. - */ - compaction?: - | { - /** - * Soft token threshold at which to trigger compaction. - * When the conversation exceeds this threshold, a message is injected - * asking the model to call the compact_conversation tool. - * Default: 180 000 tokens - */ - softThreshold?: number; - /** - * Hard token threshold - max tokens to send for compaction. - * Messages beyond this limit are preserved and restored after compaction. - * Must be greater than softThreshold. - * Default: 190 000 tokens - */ - hardThreshold?: number; - } - | false; } interface Logger { @@ -354,7 +326,6 @@ export class Scout { tools: providedTools, getGithubAppContext, systemPrompt = defaultSystemPrompt, - compaction: compactionConfig, }: BuildStreamTextParamsOptions): Promise<{ model: LanguageModel; messages: ModelMessage[]; @@ -375,28 +346,7 @@ export class Scout { )() : undefined; - // Process compaction if enabled - const compactionEnabled = compactionConfig !== false; - const softTokenThreshold = - (compactionConfig !== false - ? compactionConfig?.softThreshold - : undefined) ?? DEFAULT_SOFT_TOKEN_THRESHOLD; - const hardTokenThreshold = - (compactionConfig !== false - ? compactionConfig?.hardThreshold - : undefined) ?? DEFAULT_HARD_TOKEN_THRESHOLD; - - const { messages: compactedMessages, compactionTool } = compactionEnabled - ? await processCompaction({ - messages, - softTokenThreshold, - hardTokenThreshold, - model, - logger: this.logger, - }) - : { messages, compactionTool: {} }; - - const slackMetadata = getSlackMetadata(compactedMessages); + const slackMetadata = getSlackMetadata(messages); const respondingInSlack = this.slack.app !== undefined && slackMetadata !== undefined; @@ -497,7 +447,6 @@ export class Scout { } const tools = { - ...compactionTool, ...(this.webSearch.config ? createWebSearchTools({ exaApiKey: this.webSearch.config.exaApiKey }) : {}), @@ -524,7 +473,7 @@ ${slack.formattingRules} `; } - const converted = convertToModelMessages(compactedMessages, { + const converted = convertToModelMessages(messages, { ignoreIncompleteToolCalls: true, tools, }); diff --git a/packages/scout-agent/lib/index.ts b/packages/scout-agent/lib/index.ts index d60f62f..2b6d5a5 100644 --- a/packages/scout-agent/lib/index.ts +++ b/packages/scout-agent/lib/index.ts @@ -1,4 +1,3 @@ -export * from "./compaction"; export type { CoderApiClient, CoderWorkspaceInfo } from "./compute/coder/index"; export type { DaytonaClient, DaytonaSandbox } from "./compute/daytona/index"; export * from "./core"; diff --git a/packages/scout-agent/package.json b/packages/scout-agent/package.json index 366919b..67b087f 100644 --- a/packages/scout-agent/package.json +++ b/packages/scout-agent/package.json @@ -1,7 +1,7 @@ { "name": "@blink-sdk/scout-agent", "description": "A general-purpose AI agent with GitHub, Slack, web search, and compute capabilities built on Blink SDK.", - "version": "0.0.10", + "version": "0.0.9", "type": "module", "keywords": [ "blink", @@ -50,7 +50,6 @@ "@blink-sdk/multiplexer": "^0.0.1", "@blink-sdk/slack": "^1.1.2", "@octokit/webhooks": "^14.1.3", - "ai-tokenizer": "^1.0.6", "exa-js": "^2.0.3" }, "devDependencies": { From 5fe85d85c87ae882df5b58062c54d2545ee80847 Mon Sep 17 00:00:00 2001 From: Hugo Dutka Date: Wed, 17 Dec 2025 15:15:37 +0100 Subject: [PATCH 2/2] implement error-based compaction --- packages/scout-agent/agent.ts | 3 +- packages/scout-agent/lib/compaction.test.ts | 609 +++++++++++++++ packages/scout-agent/lib/compaction.ts | 401 ++++++++++ packages/scout-agent/lib/core.test.ts | 820 +++++++++++++++++++- packages/scout-agent/lib/core.ts | 127 ++- packages/scout-agent/lib/index.ts | 1 + packages/scout-agent/lib/test-helpers.ts | 121 +++ packages/scout-agent/package.json | 2 +- 8 files changed, 2075 insertions(+), 9 deletions(-) create mode 100644 packages/scout-agent/lib/compaction.test.ts create mode 100644 packages/scout-agent/lib/compaction.ts diff --git a/packages/scout-agent/agent.ts b/packages/scout-agent/agent.ts index 42d278a..d68abde 100644 --- a/packages/scout-agent/agent.ts +++ b/packages/scout-agent/agent.ts @@ -64,7 +64,8 @@ agent.on("chat", async ({ id, messages }) => { }), }, }); - return streamText(params); + const stream = streamText(params); + return scout.processStreamTextOutput(stream); }); agent.serve(); diff --git a/packages/scout-agent/lib/compaction.test.ts b/packages/scout-agent/lib/compaction.test.ts new file mode 100644 index 0000000..d5cd4d9 --- /dev/null +++ b/packages/scout-agent/lib/compaction.test.ts @@ -0,0 +1,609 @@ +/** biome-ignore-all lint/style/noNonNullAssertion: fine for tests */ +/** biome-ignore-all lint/suspicious/noExplicitAny: fine for tests */ +import { describe, expect, test } from "bun:test"; +import { + applyCompactionToMessages, + buildCompactionRequestMessage, + COMPACT_CONVERSATION_TOOL_NAME, + COMPACTION_MARKER_TOOL_NAME, + countCompactionMarkers, + createCompactionMarkerPart, + createCompactionTool, + findCompactionSummary, + isOutOfContextError, +} from "./compaction"; +import type { Message } from "./types"; + +// Test helpers to reduce duplication +function userMsg(id: string, text: string): Message { + return { id, role: "user", parts: [{ type: "text", text }] }; +} + +function assistantMsg(id: string, text: string): Message { + return { id, role: "assistant", parts: [{ type: "text", text }] }; +} + +function markerMsg(id: string): Message { + return { + id, + role: "assistant", + parts: [createCompactionMarkerPart() as Message["parts"][number]], + }; +} + +function summaryPart( + summary: string, + compactedAt = "2024-01-01T00:00:00Z", + toolCallId = "test" +): Message["parts"][number] { + return { + type: "dynamic-tool", + toolName: COMPACT_CONVERSATION_TOOL_NAME, + toolCallId, + state: "output-available", + input: { summary: "Test" }, + output: { summary, compacted_at: compactedAt }, + } as Message["parts"][number]; +} + +function summaryMsg( + id: string, + summary: string, + compactedAt = "2024-01-01T00:00:00Z" +): Message { + return { id, role: "assistant", parts: [summaryPart(summary, compactedAt)] }; +} + +describe("isOutOfContextError", () => { + test("returns true for Anthropic context limit errors", () => { + expect(isOutOfContextError(new Error("max_tokens_exceeded"))).toBe(true); + expect( + isOutOfContextError(new Error("The context window has been exceeded")) + ).toBe(true); + }); + + test("returns true for OpenAI context_length_exceeded errors", () => { + expect(isOutOfContextError(new Error("context_length_exceeded"))).toBe( + true + ); + }); + + test("returns true for generic token limit exceeded messages", () => { + expect(isOutOfContextError(new Error("token limit exceeded"))).toBe(true); + expect( + isOutOfContextError(new Error("Token limit has been exceeded")) + ).toBe(true); + expect(isOutOfContextError(new Error("maximum tokens reached"))).toBe(true); + }); + + test("returns true for context window errors", () => { + expect(isOutOfContextError(new Error("context window exceeded"))).toBe( + true + ); + expect(isOutOfContextError(new Error("context length exceeded"))).toBe( + true + ); + }); + + test("returns true for input too long errors", () => { + expect(isOutOfContextError(new Error("input is too long"))).toBe(true); + expect(isOutOfContextError(new Error("prompt is too long"))).toBe(true); + }); + + test("returns false for unrelated errors", () => { + expect(isOutOfContextError(new Error("network error"))).toBe(false); + expect(isOutOfContextError(new Error("authentication failed"))).toBe(false); + expect(isOutOfContextError(new Error("rate limit exceeded"))).toBe(false); + }); + + test("handles string messages", () => { + expect(isOutOfContextError("token limit exceeded")).toBe(true); + expect(isOutOfContextError("some other error")).toBe(false); + }); + + test("handles objects with message property", () => { + expect(isOutOfContextError({ message: "token limit exceeded" })).toBe(true); + expect(isOutOfContextError({ message: "some other error" })).toBe(false); + }); + + test("returns false for non-error values", () => { + expect(isOutOfContextError(null)).toBe(false); + expect(isOutOfContextError(undefined)).toBe(false); + expect(isOutOfContextError(123)).toBe(false); + expect(isOutOfContextError({})).toBe(false); + }); +}); + +describe("createCompactionTool", () => { + test("tool has correct name", () => { + const tools = createCompactionTool(); + expect(tools[COMPACT_CONVERSATION_TOOL_NAME]).toBeDefined(); + }); + + test("tool has description", () => { + const tools = createCompactionTool(); + const tool = tools[COMPACT_CONVERSATION_TOOL_NAME]; + expect(tool.description).toBeDefined(); + expect(tool.description?.length).toBeGreaterThan(0); + }); + + test("execute returns correct format with timestamp", async () => { + const tools = createCompactionTool(); + const tool = tools[COMPACT_CONVERSATION_TOOL_NAME]; + + const result = await tool.execute!( + { summary: "Test summary" }, + { + abortSignal: new AbortController().signal, + toolCallId: "test-call-id", + messages: [], + } + ); + + expect(result).toHaveProperty("summary", "Test summary"); + expect(result).toHaveProperty("compacted_at"); + expect(result).toHaveProperty("message"); + expect(typeof result.compacted_at).toBe("string"); + }); +}); + +describe("createCompactionMarkerPart", () => { + test("creates valid tool call part structure", () => { + const part = createCompactionMarkerPart(); + + expect(part.type).toBe("dynamic-tool"); + expect(part.toolName).toBe(COMPACTION_MARKER_TOOL_NAME); + expect(part.state).toBe("output-available"); + expect(part.input).toBeDefined(); + expect(part.output).toBeDefined(); + }); + + test("has unique toolCallId", () => { + const part1 = createCompactionMarkerPart(); + const part2 = createCompactionMarkerPart(); + + expect(part1.toolCallId).not.toBe(part2.toolCallId); + expect(part1.toolCallId).toMatch(/^compaction-marker-/); + }); + + test("includes model_intent in input", () => { + const part = createCompactionMarkerPart(); + + expect(part.input.model_intent).toBe( + "Out of context, compaction in progress..." + ); + }); +}); + +describe("findCompactionSummary", () => { + test("returns null when no summary exists", () => { + const messages: Message[] = [ + userMsg("1", "Hello"), + assistantMsg("2", "Hi there"), + ]; + expect(findCompactionSummary(messages)).toBeNull(); + }); + + test("finds successful compact_conversation result (dynamic-tool)", () => { + const messages: Message[] = [ + userMsg("1", "Hello"), + summaryMsg("2", "This is the summary"), + ]; + + const result = findCompactionSummary(messages); + + expect(result).not.toBeNull(); + expect(result?.summary).toBe("This is the summary"); + expect(result?.compactedAt).toBe("2024-01-01T00:00:00Z"); + expect(result?.messageIndex).toBe(1); + }); + + test("finds successful compact_conversation result (typed tool)", () => { + const messages: Message[] = [ + userMsg("1", "Hello"), + { + id: "2", + role: "assistant", + parts: [ + { + type: `tool-${COMPACT_CONVERSATION_TOOL_NAME}`, + toolCallId: "test-call", + state: "output-available", + input: { summary: "Test" }, + output: { + summary: "Typed summary", + compacted_at: "2024-01-02T00:00:00Z", + }, + } as Message["parts"][number], + ], + }, + ]; + + const result = findCompactionSummary(messages); + + expect(result).not.toBeNull(); + expect(result?.summary).toBe("Typed summary"); + }); + + test("returns null for incomplete tool calls", () => { + const messages: Message[] = [ + { + id: "1", + role: "assistant", + parts: [ + { + type: "dynamic-tool", + toolName: COMPACT_CONVERSATION_TOOL_NAME, + toolCallId: "test-call", + state: "input-available", + input: { summary: "Test" }, + } as Message["parts"][number], + ], + }, + ]; + + expect(findCompactionSummary(messages)).toBeNull(); + }); +}); + +describe("countCompactionMarkers", () => { + test("returns 0 when no markers exist", () => { + const messages: Message[] = [ + userMsg("1", "Hello"), + assistantMsg("2", "Hi there"), + ]; + expect(countCompactionMarkers(messages)).toBe(0); + }); + + test("returns 1 for a single marker", () => { + const messages: Message[] = [userMsg("1", "Hello"), markerMsg("2")]; + expect(countCompactionMarkers(messages)).toBe(1); + }); + + test("counts multiple markers", () => { + const messages: Message[] = [ + markerMsg("1"), + userMsg("2", "test"), + markerMsg("3"), + userMsg("4", "test"), + markerMsg("5"), + ]; + + expect(countCompactionMarkers(messages)).toBe(3); + }); + + test("stops counting at compaction summary", () => { + const messages: Message[] = [ + markerMsg("1"), + summaryMsg("2", "Summary"), + userMsg("3", "test"), + markerMsg("4"), + ]; + + // Should only count marker4, not marker1 (which is before the summary) + expect(countCompactionMarkers(messages)).toBe(1); + }); +}); + +describe("buildCompactionRequestMessage", () => { + test("creates user message with correct role", () => { + const message = buildCompactionRequestMessage(); + + expect(message.role).toBe("user"); + }); + + test("includes context limit notice", () => { + const message = buildCompactionRequestMessage(); + const textPart = message.parts[0] as { type: "text"; text: string }; + + expect(textPart.text).toContain("SYSTEM NOTICE - CONTEXT LIMIT"); + expect(textPart.text).toContain("compact_conversation"); + }); +}); + +describe("applyCompactionToMessages", () => { + test("returns unchanged messages when no compaction state", () => { + const messages: Message[] = [ + userMsg("1", "Hello"), + assistantMsg("2", "Hi there"), + ]; + const result = applyCompactionToMessages(messages); + expect(result).toEqual(messages); + }); + + test("excludes correct number of messages based on marker count", () => { + const messages: Message[] = [ + userMsg("1", "Message 1"), + userMsg("2", "Message 2"), + assistantMsg("2-assistant", "Response 2"), + assistantMsg("2-assistant2", "Response 2b"), + userMsg("3", "Message 3"), + markerMsg("marker"), + ]; + + const result = applyCompactionToMessages(messages); + const ids = result.map((m) => m.id); + expect(ids).toContain("1"); + expect(ids).toContain("2"); + expect(ids).toContain("2-assistant"); + expect(ids).toContain("2-assistant2"); + expect(ids).not.toContain("3"); + + // With two markers, excludes two user turns + const result2 = applyCompactionToMessages([ + ...messages, + markerMsg("marker2"), + ]); + const ids2 = result2.map((m) => m.id); + expect(ids2).toContain("1"); + expect(ids2).not.toContain("2"); + expect(ids2).not.toContain("2-assistant"); + expect(ids2).not.toContain("2-assistant2"); + expect(ids2).not.toContain("3"); + }); + + test("excludes compaction markers", () => { + const messages: Message[] = [ + userMsg("1", "Message 1"), + userMsg("2", "Message 2"), + userMsg("3", "Message 3"), + markerMsg("marker"), + markerMsg("marker2"), + ]; + + const result = applyCompactionToMessages(messages); + const ids = result.map((m) => m.id); + expect(ids).toContain("1"); + expect(ids).not.toContain("marker"); + expect(ids).not.toContain("marker2"); + }); + + test("injects compaction request when marker found", () => { + const messages: Message[] = [ + userMsg("1", "Message 1"), + userMsg("2", "Message 2"), + userMsg("3", "Message 3"), + markerMsg("marker"), + ]; + + const result = applyCompactionToMessages(messages); + const lastMessage = result[result.length - 1]; + + expect(lastMessage?.role).toBe("user"); + expect(lastMessage?.parts[0]?.type).toBe("text"); + expect((lastMessage?.parts[0] as any).text).toContain( + "compact_conversation" + ); + }); + + test("replaces old messages with summary and excluded messages when compaction complete", () => { + const messages: Message[] = [ + userMsg("kept", "Will be summarized"), + userMsg("excluded-1", "Will be excluded and restored"), + assistantMsg("excluded-1-assistant", "Will be excluded and restored"), + markerMsg("marker-msg"), + summaryMsg("summary-msg", "Summary"), + userMsg("after-summary", "After"), + ]; + + const result = applyCompactionToMessages(messages); + const ids = result.map((m) => m.id); + expect(ids).not.toContain("kept"); + expect(ids).toContain("excluded-1"); + expect(ids).toContain("excluded-1-assistant"); + expect(ids).not.toContain("marker-msg"); + expect(ids).not.toContain("summary-msg"); + expect(ids).toContain("after-summary"); + }); + + test("throws error when would summarize <= 1 message", () => { + const messages: Message[] = [userMsg("1", "M1"), markerMsg("marker")]; + expect(() => applyCompactionToMessages(messages)).toThrow(/Cannot compact/); + }); + + test("uses only the most recent summary when multiple summaries exist", () => { + const messages: Message[] = [ + summaryMsg("old-summary", "Old summary content", "2024-01-01T00:00:00Z"), + userMsg("between-summaries", "Message between summaries"), + summaryMsg("new-summary", "New summary content", "2024-01-02T00:00:00Z"), + userMsg("after-new-summary", "After new summary"), + ]; + + const result = applyCompactionToMessages(messages); + + // Old summary should be discarded + const ids = result.map((m) => m.id); + expect(ids).not.toContain("old-summary"); + expect(ids).not.toContain("new-summary"); + expect(ids).toContain("after-new-summary"); + + // First message should be the summary message with the NEW summary content + const firstMessage = result[0]; + expect(firstMessage?.role).toBe("user"); + expect((firstMessage?.parts[0] as any).text).toContain( + "New summary content" + ); + expect((firstMessage?.parts[0] as any).text).toContain("2024-01-02"); + }); + + test("handles re-compaction after a summary (summary followed by new markers)", () => { + const messages: Message[] = [ + summaryMsg("summary", "First compaction summary"), + userMsg("after-summary-1", "Continued conversation"), + userMsg("after-summary-2", "More conversation"), + markerMsg("new-marker"), + ]; + + const result = applyCompactionToMessages(messages); + + // Should have summary messages at start, then kept messages, then compaction request + const ids = result.map((m) => m.id); + expect(ids).not.toContain("summary"); + expect(ids).not.toContain("new-marker"); + + // Should include the first after-summary message (excluded one user turn) + expect(ids).toContain("after-summary-1"); + expect(ids).not.toContain("after-summary-2"); + + // Last message should be compaction request + const lastMessage = result[result.length - 1]; + expect(lastMessage?.role).toBe("user"); + expect((lastMessage?.parts[0] as any).text).toContain( + "compact_conversation" + ); + }); + + test("handles summary with zero markers before it", () => { + // This can happen if a summary was manually added or from a previous session + const messages: Message[] = [ + userMsg("old-content", "Old content"), + summaryMsg("summary", "Summary with no markers"), + userMsg("after-summary", "After summary"), + ]; + + const result = applyCompactionToMessages(messages); + + const ids = result.map((m) => m.id); + // Old content should be replaced by summary + expect(ids).not.toContain("old-content"); + expect(ids).not.toContain("summary"); + expect(ids).toContain("after-summary"); + + // No excluded messages to restore (markerCount was 0) + // So result should be: summary messages + after-summary + expect(result.length).toBe(3); // user summary + assistant ack + after-summary + }); + + test("throws error when marker count exceeds available user messages", () => { + // 3 markers but only 2 user messages + const messages: Message[] = [ + userMsg("1", "M1"), + markerMsg("marker1"), + userMsg("2", "M2"), + markerMsg("marker2"), + markerMsg("marker3"), + ]; + + // With 3 markers, it tries to exclude 3 user turns, but there are only 2 + // This should cause excludedMessagesStartIndex to be 0, triggering CompactionError + expect(() => applyCompactionToMessages(messages)).toThrow(/Cannot compact/); + }); + + test("output structure starts with summary messages when summary is applied", () => { + const messages: Message[] = [ + userMsg("old", "Old message"), + markerMsg("marker"), + summaryMsg("summary", "The summary content"), + userMsg("after", "After"), + ]; + + const result = applyCompactionToMessages(messages); + + // First message: user message with summary + expect(result[0]?.role).toBe("user"); + expect(result[0]?.id).toBe("compaction-summary"); + expect((result[0]?.parts[0] as any).text).toContain("CONVERSATION SUMMARY"); + expect((result[0]?.parts[0] as any).text).toContain("The summary content"); + + // Second message: assistant acknowledgment + expect(result[1]?.role).toBe("assistant"); + expect(result[1]?.id).toBe("compaction-summary-response"); + expect((result[1]?.parts[0] as any).text).toBe("Acknowledged."); + }); + + test("handles typed tool format for markers", () => { + const messages: Message[] = [ + userMsg("1", "M1"), + userMsg("2", "M2"), + { + id: "marker", + role: "assistant", + parts: [ + { + type: `tool-${COMPACTION_MARKER_TOOL_NAME}`, + toolCallId: "typed-marker", + state: "output-available", + input: { model_intent: "test" }, + output: "marker output", + } as Message["parts"][number], + ], + }, + ]; + + const result = applyCompactionToMessages(messages); + + // Should recognize the typed format and process it + const ids = result.map((m) => m.id); + expect(ids).toContain("1"); + expect(ids).not.toContain("2"); // excluded + expect(ids).not.toContain("marker"); + + // Should inject compaction request + const lastMessage = result[result.length - 1]; + expect((lastMessage?.parts[0] as any).text).toContain( + "compact_conversation" + ); + }); + + test("filters out marker parts from messages with mixed content", () => { + const markerPart = createCompactionMarkerPart(); + const messages: Message[] = [ + userMsg("1", "M1"), + userMsg("2", "M2"), + { + id: "mixed", + role: "assistant", + parts: [ + { type: "text", text: "Some text" }, + markerPart as Message["parts"][number], + ], + }, + ]; + + const result = applyCompactionToMessages(messages); + + // The message with mixed content should be filtered out entirely + // (because it contains a marker part) + const ids = result.map((m) => m.id); + expect(ids).not.toContain("mixed"); + }); + + test("returns empty array for empty messages", () => { + const result = applyCompactionToMessages([]); + expect(result).toEqual([]); + }); + + test("preserves user message added after markers once summary is generated", () => { + // Scenario: compaction was in progress (markers present), user interrupted with a new message, + // then the model produced a summary. The new user message should be preserved. + const messages: Message[] = [ + userMsg("1", "First message"), + userMsg("2", "Second message"), + userMsg("3", "Third message"), + userMsg("4", "Fourth message"), + markerMsg("marker1"), + markerMsg("marker2"), + userMsg("interrupted", "User interrupted compaction with this message"), + markerMsg("marker3"), + summaryMsg("summary", "Summary of the conversation"), + ]; + + const result = applyCompactionToMessages(messages); + + const ids = result.map((m) => m.id); + // Earlier messages should be replaced by summary + expect(ids).not.toContain("1"); + expect(ids).not.toContain("2"); + // Messages excluded during compaction request should be restored + expect(ids).toContain("3"); + expect(ids).toContain("4"); + expect(ids).toContain("interrupted"); // The interrupted user message is preserved + // Markers and summary message itself should be gone + expect(ids).not.toContain("marker1"); + expect(ids).not.toContain("marker2"); + expect(ids).not.toContain("marker3"); + expect(ids).not.toContain("summary"); + // Should start with summary messages + expect(result[0]?.id).toBe("compaction-summary"); + }); +}); diff --git a/packages/scout-agent/lib/compaction.ts b/packages/scout-agent/lib/compaction.ts new file mode 100644 index 0000000..88d7d16 --- /dev/null +++ b/packages/scout-agent/lib/compaction.ts @@ -0,0 +1,401 @@ +import { type Tool, tool } from "ai"; +import { z } from "zod"; +import type { Message } from "./types"; + +// Constants +export const COMPACTION_MARKER_TOOL_NAME = "__compaction_marker"; +export const COMPACT_CONVERSATION_TOOL_NAME = "compact_conversation"; + +// Error patterns for out-of-context detection (regex) +const OUT_OF_CONTEXT_PATTERNS = [ + /context.*(length|limit|window|exceeded)/i, + /token.*(limit|exceeded|maximum)/i, + /maximum.*context/i, + /input.*too.*long/i, + /prompt.*too.*long/i, + // Anthropic specific + /max_tokens_exceeded/i, + // OpenAI specific + /context_length_exceeded/i, + /maximum.*tokens/i, +]; + +/** + * Check if an error is an out-of-context error based on known patterns. + */ +export function isOutOfContextError(error: unknown): boolean { + let message: string; + + if (error instanceof Error) { + message = error.message; + } else if (typeof error === "string") { + message = error; + } else if ( + error !== null && + typeof error === "object" && + "message" in error && + typeof error.message === "string" + ) { + message = error.message; + } else { + return false; + } + + return OUT_OF_CONTEXT_PATTERNS.some((pattern) => pattern.test(message)); +} + +/** + * Create the compact_conversation tool for the model to call. + */ +export function createCompactionTool(): Record< + typeof COMPACT_CONVERSATION_TOOL_NAME, + Tool +> { + return { + [COMPACT_CONVERSATION_TOOL_NAME]: tool({ + description: `Compact the conversation history to save context space. Call this tool when instructed that the conversation is approaching context limits. Provide a detailed and thorough summary that captures: +- The main topics discussed +- Key decisions made +- Important code changes or file modifications (include file paths and what was changed) +- Any ongoing tasks or action items +- Critical context needed to continue the conversation +- Relevant technical details, configurations, or environment information +- Any errors encountered and how they were resolved + +Be thorough and detailed. This summary will replace the earlier conversation history, so include all information needed to continue effectively.`, + inputSchema: z.object({ + summary: z + .string() + .describe( + "A detailed and thorough summary of the conversation so far, including all important context needed to continue effectively." + ), + }), + execute: async ({ summary }) => { + return { + summary, + compacted_at: new Date().toISOString(), + message: + "Conversation history has been compacted. The summary will be used to maintain context in future messages.", + }; + }, + }), + }; +} + +/** + * Check if a message part is a compaction marker. + */ +function isCompactionMarkerPart(part: Message["parts"][number]): boolean { + return ( + (part.type === "dynamic-tool" && + "toolName" in part && + part.toolName === COMPACTION_MARKER_TOOL_NAME) || + part.type === `tool-${COMPACTION_MARKER_TOOL_NAME}` + ); +} + +/** + * Check if a message part is a compaction summary. + */ +function isCompactionSummaryPart(part: Message["parts"][number]): boolean { + return ( + (part.type === `tool-${COMPACT_CONVERSATION_TOOL_NAME}` || + (part.type === "dynamic-tool" && + "toolName" in part && + part.toolName === COMPACT_CONVERSATION_TOOL_NAME)) && + "state" in part && + part.state === "output-available" && + "output" in part + ); +} + +export interface CompactionMarkerPart { + type: "dynamic-tool"; + toolName: typeof COMPACTION_MARKER_TOOL_NAME; + toolCallId: string; + state: "output-available"; + input: { + model_intent: string; + }; + output: string; +} + +/** + * Create a synthetic tool call part for the compaction marker. + * This is emitted when an out-of-context error is detected. + */ +export function createCompactionMarkerPart(): CompactionMarkerPart { + return { + type: "dynamic-tool", + toolCallId: `compaction-marker-${Date.now()}-${Math.random().toString(36).slice(2, 9)}`, + toolName: COMPACTION_MARKER_TOOL_NAME, + state: "output-available", + input: { + model_intent: "Out of context, compaction in progress...", + }, + output: + "Compaction marker - this will trigger compaction on the next iteration", + }; +} + +interface CompactionSummaryResult { + summary: string; + compacted_at: string; +} + +/** + * Find a successful compaction summary in the messages. + */ +export function findCompactionSummary(messages: Message[]): { + summary: string; + compactedAt: string; + messageIndex: number; +} | null { + for (let i = messages.length - 1; i >= 0; i--) { + const message = messages[i]; + if (message?.role !== "assistant") continue; + + for (const part of message.parts) { + if (isCompactionSummaryPart(part) && "output" in part) { + const output = part.output as CompactionSummaryResult | undefined; + if (output?.summary) { + return { + summary: output.summary, + compactedAt: output.compacted_at, + messageIndex: i, + }; + } + } + } + } + return null; +} + +/** + * Count consecutive compaction markers in the messages (markers without a summary in between). + * This is used to determine the retry count - each marker represents a failed compaction attempt. + * + * @param messages The messages to search + * @param beforeIndex Optional index to stop at (exclusive). If provided, only counts markers before this index. + */ +export function countCompactionMarkers( + messages: Message[], + beforeIndex?: number +): number { + let count = 0; + const endIndex = beforeIndex ?? messages.length; + // Scan from the end (or beforeIndex) to find markers, stop if we find a summary + for (let i = endIndex - 1; i >= 0; i--) { + const message = messages[i]; + if (message?.role !== "assistant") { + continue; + } + + for (const part of message.parts) { + if (isCompactionSummaryPart(part)) { + return count; + } + if (isCompactionMarkerPart(part)) { + count++; + } + } + } + return count; +} + +/** + * Build the compaction request message that instructs the model to compact. + */ +export function buildCompactionRequestMessage(): Message { + return { + id: `compaction-request-${Date.now()}`, + role: "user", + parts: [ + { + type: "text", + text: `[SYSTEM NOTICE - CONTEXT LIMIT] +Your conversation has exceeded the context window. + +To prevent context overflow errors, please call the \`compact_conversation\` tool NOW to summarize the conversation history. + +Provide a detailed and thorough summary that captures all important context, decisions, code changes, file paths, and ongoing tasks. Do not leave out important details.`, + }, + ], + }; +} + +/** + * Build a summary message that replaces the compacted conversation history. + */ +export function buildCompactionSummaryMessages( + summary: string, + compactedAt: string +): Message[] { + return [ + { + id: "compaction-summary", + role: "user", + parts: [ + { + type: "text", + text: `[CONVERSATION SUMMARY - Previously compacted at ${compactedAt}] + +${summary} + +--- +The conversation continues from this point.`, + }, + ], + }, + // Add an assistant response to make sure that, when the next message is a "user" message, + // the provider won't throw an error. Some APIs don't accept consecutive "user" messages. + { + id: "compaction-summary-response", + role: "assistant", + parts: [ + { + type: "text", + text: "Acknowledged.", + }, + ], + }, + ] as const; +} + +/** + * Finds the most recent summary and applies it to the messages. + */ +function applySummaryToMessages(messages: Message[]): Message[] { + const summary = findCompactionSummary(messages); + if (!summary) { + return messages; + } + const markerCount = countCompactionMarkers(messages, summary?.messageIndex); + const excludedMessagesStartIndex = findExcludedMessagesStartIndex( + messages.slice(0, summary.messageIndex), + markerCount + ); + const summaryMessages = buildCompactionSummaryMessages( + summary.summary, + summary.compactedAt + ); + const excludedMessages = messages + .slice(excludedMessagesStartIndex, summary.messageIndex) + .filter((m) => !m.parts.some((p) => isCompactionMarkerPart(p))); + + return [ + ...summaryMessages, + ...excludedMessages, + ...messages.slice(summary.messageIndex + 1), + ]; +} + +function findExcludedMessagesStartIndex( + messages: Message[], + markerCount: number +): number { + if (markerCount <= 0) { + return messages.length; + } + let lastUserIndex = messages.length; + let found = 0; + for (let i = messages.length - 1; i >= 0; i--) { + const message = messages[i]; + if (message?.role !== "user") { + continue; + } + lastUserIndex = i; + found++; + if (found === markerCount) { + return lastUserIndex; + } + } + return 0; +} + +function transformMessagesForCompaction(messages: Message[]): Message[] { + const markerCount = countCompactionMarkers(messages); + if (markerCount === 0) { + return messages; + } + const excludedMessagesStartIndex = findExcludedMessagesStartIndex( + messages, + markerCount + ); + if (excludedMessagesStartIndex === 0) { + throw new CompactionError( + "Cannot compact: would leave only the compaction request", + markerCount - 1 + ); + } + return [ + ...messages.slice(0, excludedMessagesStartIndex), + buildCompactionRequestMessage(), + ]; +} + +/** + * Apply compaction logic to messages, handling both summary application and compaction requests. + * + * This function is the main entry point for the compaction system. It processes messages + * in two phases: + * + * ## Phase 1: Apply existing summaries (`applySummaryToMessages`) + * If the messages contain a successful `compact_conversation` tool result (a summary), + * this phase replaces the earlier conversation history with: + * 1. Summary messages (user message with summary + assistant acknowledgment) + * 2. Messages that were excluded when building the compaction request (restored here because + * they weren't in the context when the model wrote the summary). These are the last N + * user turns before the summary, where N = marker count before the summary. + * 3. Messages after the summary + * + * **Multiple summaries**: Only the most recent summary is used. Earlier summaries are + * discarded because the model had them in context when generating the newer summary, + * so their content should be incorporated. Summaries are cumulative, not layered. + * + * ## Phase 2: Transform for compaction request (`transformMessagesForCompaction`) + * If compaction markers are present (indicating out-of-context errors were caught), + * this phase: + * 1. Counts consecutive markers to determine retry count + * 2. Excludes the last N user message "turns" (where N = marker count). Each marker + * represents an out-of-context error, meaning the conversation was too long for the + * model to process. By excluding more messages on each retry, we reduce the input + * size so the model has room to generate the summary. + * 3. Appends a compaction request message asking the model to call `compact_conversation` + * + * ## Retry mechanism + * Each compaction marker represents an out-of-context error. The first marker typically + * comes from normal operation (the conversation grew too long). Subsequent markers + * indicate that even the compaction request itself was too long. On each retry, more + * messages are excluded to give the model room to generate the summary. If all messages + * would be excluded, throws `CompactionError`. + * + * ## Flow example + * 1. Model hits context limit → `processStreamTextOutput` emits compaction marker + * 2. Next iteration calls this function + * 3. Messages are truncated + compaction request appended + * 4. Model calls `compact_conversation` with summary + * 5. Next iteration: summary is applied, old messages replaced + * + * @param messages - The full conversation message history + * @returns Transformed messages ready to send to the model + * @throws {CompactionError} If compaction would leave no messages (too many retries) + */ +export function applyCompactionToMessages(messages: Message[]): Message[] { + const currentConversation = applySummaryToMessages(messages); + const transformedMessages = + transformMessagesForCompaction(currentConversation); + return transformedMessages.filter( + (message) => !message.parts.some((part) => isCompactionMarkerPart(part)) + ); +} + +export class CompactionError extends Error { + constructor( + message: string, + public readonly retryCount: number + ) { + super(message); + this.name = "CompactionError"; + } +} diff --git a/packages/scout-agent/lib/core.test.ts b/packages/scout-agent/lib/core.test.ts index 7f10136..0946481 100644 --- a/packages/scout-agent/lib/core.test.ts +++ b/packages/scout-agent/lib/core.test.ts @@ -18,7 +18,11 @@ import { noopLogger, } from "./compute/test-utils"; import { type Message, Scout } from "./index"; -import { createMockBlinkApiServer, withBlinkApiUrl } from "./test-helpers"; +import { + createAgentTestHelper, + createMockBlinkApiServer, + withBlinkApiUrl, +} from "./test-helpers"; // Add async iterator support to ReadableStream for testing declare global { @@ -33,18 +37,46 @@ type DoStreamOptions = Parameters[0]; const newMockModel = ({ textResponse, onDoStream, + streamError, + throwError, }: { - textResponse: string; + textResponse?: string; onDoStream?: (args: DoStreamOptions) => Promise | void; + /** Error to emit in the stream after some text (simulates mid-stream error) */ + streamError?: Error; + /** Error to throw from doStream (simulates immediate error) */ + throwError?: Error; }) => { return new MockLanguageModelV2({ doStream: async (options) => { await onDoStream?.(options); + + if (throwError) { + throw throwError; + } + + if (streamError) { + // Create a stream that emits some text then errors + return { + stream: new ReadableStream({ + start(controller) { + controller.enqueue({ type: "text-start", id: "text-1" }); + controller.enqueue({ + type: "text-delta", + id: "text-1", + delta: "Hello", + }); + controller.error(streamError); + }, + }), + }; + } + return { stream: simulateReadableStream({ chunks: [ { type: "text-start", id: "text-1" }, - { type: "text-delta", id: "text-1", delta: textResponse }, + { type: "text-delta", id: "text-1", delta: textResponse ?? "" }, { type: "text-end", id: "text-1" }, { type: "finish", @@ -252,10 +284,13 @@ describe("config", async () => { ], tools: [ { - name: "no tools with empty config", + name: "only compact_conversation tool with empty config", config: {}, assertion: ({ callOptions }) => { - expect(callOptions.tools).toBeUndefined(); + // Only the compact_conversation tool should be present (enabled by default) + expect(callOptions.tools).toBeDefined(); + expect(callOptions.tools).toHaveLength(1); + expect(callOptions.tools?.[0]?.name).toBe("compact_conversation"); }, }, { @@ -289,7 +324,13 @@ describe("config", async () => { slack: { botToken: "test", signingSecret: "set" }, }, assertion: ({ callOptions }) => { - expect(callOptions.tools).toBeUndefined(); + // Only the compact_conversation tool should be present (no slack tools when not responding in slack) + expect(callOptions.tools).toBeDefined(); + expect(callOptions.tools).toHaveLength(1); + expect(callOptions.tools?.[0]?.name).toBe("compact_conversation"); + expect( + callOptions.tools?.find((tool) => tool.name.startsWith("slack_")) + ).toBeUndefined(); expect(JSON.stringify(callOptions.prompt)).not.toInclude( "report your Slack status" ); @@ -948,3 +989,770 @@ describe("coder integration", () => { expect(mockClient.getAppHost).toHaveBeenCalled(); }); }); + +describe("compaction", () => { + // Shared helpers for compaction tests + const CONTEXT_LENGTH_ERROR = "context_length_exceeded"; + + /** Check if a message contains the compaction marker */ + const hasCompactionMarker = (msg: UIMessage) => + msg.parts.some( + (p) => + p.type === "tool-__compaction_marker" || + ((p as { type: string; toolName?: string }).type === "dynamic-tool" && + (p as { toolName?: string }).toolName === "__compaction_marker") + ); + + /** Check if a message contains the compact_conversation tool call */ + const hasCompactTool = (msg: UIMessage) => + msg.parts.some( + (p: { type: string; toolName?: string }) => + p.type === "tool-compact_conversation" || + (p.type === "dynamic-tool" && p.toolName === "compact_conversation") + ); + + /** Create a mock response that calls the compact_conversation tool */ + const createCompactToolResponse = ( + summary: string, + toolCallId = "compact-tool-call-1" + ) => ({ + stream: simulateReadableStream({ + chunks: [ + { + type: "tool-call" as const, + toolCallType: "function" as const, + toolCallId, + toolName: "compact_conversation", + input: JSON.stringify({ + model_intent: "Compacting conversation history", + properties: { summary }, + }), + }, + { + type: "finish" as const, + finishReason: "tool-calls" as const, + logprobs: undefined, + usage: { inputTokens: 100, outputTokens: 50, totalTokens: 150 }, + }, + ], + }), + }); + + /** Create a mock response with success text */ + const createTextResponse = (text: string) => ({ + stream: simulateReadableStream({ + chunks: [ + { type: "text-start" as const, id: "text-1" }, + { type: "text-delta" as const, id: "text-1", delta: text }, + { type: "text-end" as const, id: "text-1" }, + { + type: "finish" as const, + finishReason: "stop" as const, + logprobs: undefined, + usage: { inputTokens: 50, outputTokens: 20, totalTokens: 70 }, + }, + ], + }), + }); + + /** Set up agent with scout and chat handler for a given model */ + const setupCompactionTest = ( + model: MockLanguageModelV2, + chatID = "test-chat-id" as blink.ID + ) => { + const agent = new blink.Agent(); + const scout = new Scout({ agent, logger: noopLogger }); + agent.on("chat", async ({ messages }) => { + const params = await scout.buildStreamTextParams({ + chatID, + messages, + model, + }); + return scout.processStreamTextOutput( + streamText({ + ...params, + // by default, streamText prints all errors to console.error, which is noisy in tests + onError: () => {}, + }) + ); + }); + return { agent, scout, chatID }; + }; + + /** Extract all text content from buildStreamTextParams result */ + const extractAllContent = (params: { + messages: Array<{ + content: string | Array<{ type: string; text?: string }>; + }>; + }) => + params.messages + .map((m) => { + if (typeof m.content === "string") return m.content; + if (Array.isArray(m.content)) { + return m.content + .map((p) => (p.type === "text" ? p.text : "")) + .join(""); + } + return ""; + }) + .join(" "); + + /** Get the text from a text part in a message */ + const getTextFromMessage = (msg: UIMessage): string | undefined => { + const textPart = msg.parts.find((p: { type: string }) => p.type === "text"); + return textPart ? (textPart as { text: string }).text : undefined; + }; + + test("buildStreamTextParams always includes compact_conversation tool by default", async () => { + const agent = new blink.Agent(); + const scout = new Scout({ + agent, + logger: noopLogger, + }); + + const params = await scout.buildStreamTextParams({ + chatID: "test-chat-id" as blink.ID, + messages: [], + model: newMockModel({ textResponse: "test" }), + }); + + // Check compact_conversation tool is included + expect(params.tools.compact_conversation).toBeDefined(); + }); + + test("buildStreamTextParams excludes compact_conversation tool when compaction disabled", async () => { + const agent = new blink.Agent(); + const scout = new Scout({ + agent, + logger: noopLogger, + }); + + const params = await scout.buildStreamTextParams({ + chatID: "test-chat-id" as blink.ID, + messages: [], + model: newMockModel({ textResponse: "test" }), + compaction: false, + }); + + // Check compact_conversation tool is NOT included + expect(params.tools.compact_conversation).toBeUndefined(); + }); + + test("buildStreamTextParams throws when exclusion would leave insufficient messages", async () => { + const agent = new blink.Agent(); + const scout = new Scout({ + agent, + logger: noopLogger, + }); + + // Create messages with insufficient content to summarize after exclusion + // With 1 marker, retryCount=0, so 1 message will be excluded, leaving 0 messages + const messages: Message[] = [ + { + id: "user-1", + role: "user", + parts: [{ type: "text", text: "Only message" }], + }, + { + id: "marker-msg", + role: "assistant", + parts: [ + { + type: "dynamic-tool", + toolName: "__compaction_marker", + toolCallId: "marker-1", + state: "output-available", + input: { + model_intent: "Out of context, compaction in progress...", + }, + output: "marker", + } as Message["parts"][number], + ], + }, + ]; + + await expect( + scout.buildStreamTextParams({ + chatID: "test-chat-id" as blink.ID, + messages, + model: newMockModel({ textResponse: "test" }), + }) + ).rejects.toThrow(/Cannot compact/); + }); + + test("processStreamTextOutput passes through normal stream unchanged", async () => { + const agent = new blink.Agent(); + const scout = new Scout({ + agent, + logger: noopLogger, + }); + + const params = await scout.buildStreamTextParams({ + chatID: "test-chat-id" as blink.ID, + messages: [ + { + id: "user-1", + role: "user", + parts: [{ type: "text", text: "Hello" }], + }, + ], + model: newMockModel({ textResponse: "Hello World" }), + compaction: false, + }); + + const stream = streamText(params); + const processedStream = scout.processStreamTextOutput(stream); + + const collectedChunks: { type: string }[] = []; + for await (const chunk of processedStream.fullStream) { + collectedChunks.push(chunk as { type: string }); + } + + // Should have text chunks and finish + expect(collectedChunks.some((c) => c.type === "text-delta")).toBe(true); + expect(collectedChunks.some((c) => c.type === "finish")).toBe(true); + // Should NOT have any compaction markers + expect(collectedChunks.some((c) => c.type === "tool-result")).toBe(false); + }); + + test("e2e: complete compaction flow using scout methods directly", async () => { + let modelCallCount = 0; + + // Call 1: context error, Call 2: compact_conversation, Call 3: success + const model = new MockLanguageModelV2({ + doStream: async () => { + modelCallCount++; + if (modelCallCount === 1) throw new Error(CONTEXT_LENGTH_ERROR); + if (modelCallCount === 2) + return createCompactToolResponse( + "Previous conversation summary from model." + ); + return createTextResponse("Success after compaction"); + }, + }); + + const { agent, scout, chatID } = setupCompactionTest(model); + + await using helper = createAgentTestHelper(agent, { + initialMessages: [ + { + id: "msg-1", + role: "user", + parts: [{ type: "text", text: "Old message 1" }], + }, + { + id: "msg-2", + role: "assistant", + parts: [{ type: "text", text: "Old response 1" }], + }, + { + id: "msg-3", + role: "user", + parts: [{ type: "text", text: "New question" }], + }, + ], + }); + + // Step 1: context error → compaction marker + const result1 = await helper.runChatTurn(); + expect(hasCompactionMarker(result1.assistantMessage)).toBe(true); + expect(modelCallCount).toBe(1); + + // Step 2: model calls compact_conversation + const result2 = await helper.runChatTurn(); + expect(hasCompactTool(result2.assistantMessage)).toBe(true); + expect(modelCallCount).toBe(2); + + // Step 3: success + helper.addUserMessage("Follow-up question"); + const result3 = await helper.runChatTurn(); + expect(getTextFromMessage(result3.assistantMessage)).toBe( + "Success after compaction" + ); + expect(modelCallCount).toBe(3); + + // Verify compaction: old messages removed, summary + excluded restored + const params = await scout.buildStreamTextParams({ + chatID, + messages: helper.messages as Message[], + model, + }); + const allContent = extractAllContent(params); + + expect(allContent).toContain("CONVERSATION SUMMARY"); + expect(allContent).toContain("Previous conversation summary from model"); + expect(allContent).toContain("New question"); // excluded and restored + expect(allContent).not.toContain("Old message 1"); // summarized + expect(allContent).not.toContain("Old response 1"); // summarized + expect(allContent).toContain("Follow-up question"); // added after + }); + + test("e2e: user message submitted during compaction appears after excluded messages", async () => { + let modelCallCount = 0; + + // Calls 1-2: context errors, Call 3: compact, Call 4: success + const model = new MockLanguageModelV2({ + doStream: async () => { + modelCallCount++; + if (modelCallCount <= 2) throw new Error(CONTEXT_LENGTH_ERROR); + if (modelCallCount === 3) + return createCompactToolResponse("Summary of the old conversation."); + return createTextResponse("Response after compaction"); + }, + }); + + const { agent, scout, chatID } = setupCompactionTest(model); + + await using helper = createAgentTestHelper(agent, { + initialMessages: [ + { + id: "msg-1", + role: "user", + parts: [{ type: "text", text: "First message to summarize" }], + }, + { + id: "msg-2", + role: "assistant", + parts: [{ type: "text", text: "First response to summarize" }], + }, + { + id: "msg-3", + role: "user", + parts: [{ type: "text", text: "Second message to summarize" }], + }, + { + id: "msg-4", + role: "assistant", + parts: [{ type: "text", text: "Second response - will be excluded" }], + }, + { + id: "msg-5", + role: "user", + parts: [{ type: "text", text: "Third message - will be excluded" }], + }, + ], + }); + + // Steps 1-2: context errors → markers + const result1 = await helper.runChatTurn(); + expect(hasCompactionMarker(result1.assistantMessage)).toBe(true); + const result2 = await helper.runChatTurn(); + expect(hasCompactionMarker(result2.assistantMessage)).toBe(true); + + // User submits message during compaction + helper.addUserMessage("User message submitted during compaction"); + + // Step 3: compact_conversation, Step 4: success + await helper.runChatTurn(); + const result4 = await helper.runChatTurn(); + expect(getTextFromMessage(result4.assistantMessage)).toBe( + "Response after compaction" + ); + + // Verify message structure + const params = await scout.buildStreamTextParams({ + chatID, + messages: helper.messages as Message[], + model, + }); + const allContent = extractAllContent(params); + + expect(allContent).toContain("CONVERSATION SUMMARY"); + expect(allContent).toContain("Summary of the old conversation"); + expect(allContent).toContain("Third message - will be excluded"); + expect(allContent).not.toContain("First message to summarize"); + expect(allContent).not.toContain("First response to summarize"); + expect(allContent).not.toContain("Second message to summarize"); + expect(allContent).toContain("User message submitted during compaction"); + + // Verify order: summary < excluded < user's new message + const summaryIndex = allContent.indexOf("CONVERSATION SUMMARY"); + const excludedIndex = allContent.indexOf( + "Third message - will be excluded" + ); + const userMsgIndex = allContent.indexOf( + "User message submitted during compaction" + ); + expect(summaryIndex).toBeLessThan(excludedIndex); + expect(excludedIndex).toBeLessThan(userMsgIndex); + }); + + test("e2e: non-context error during compaction does not increase exclusion count", async () => { + let modelCallCount = 0; + + // Call 1: context error, Call 2: network error, Call 3: compact, Call 4: success + const model = new MockLanguageModelV2({ + doStream: async () => { + modelCallCount++; + if (modelCallCount === 1) throw new Error(CONTEXT_LENGTH_ERROR); + if (modelCallCount === 2) + throw new Error("network_error: connection refused"); + if (modelCallCount === 3) + return createCompactToolResponse("Summary after non-context error."); + return createTextResponse("Success after compaction"); + }, + }); + + const { agent, scout, chatID } = setupCompactionTest(model); + + await using helper = createAgentTestHelper(agent, { + initialMessages: [ + { + id: "msg-1", + role: "user", + parts: [{ type: "text", text: "First message to summarize" }], + }, + { + id: "msg-2", + role: "assistant", + parts: [{ type: "text", text: "First response to summarize" }], + }, + { + id: "msg-3", + role: "user", + parts: [{ type: "text", text: "Second message - will be excluded" }], + }, + ], + }); + + // Step 1: context error → marker + const result1 = await helper.runChatTurn(); + expect(hasCompactionMarker(result1.assistantMessage)).toBe(true); + + // Step 2: non-context error → should propagate (not produce marker) + await expect(helper.runChatTurn()).rejects.toThrow(); + + // User retries, then compact and success + helper.addMessage("assistant", "Retry after network error"); + await helper.runChatTurn(); + const result4 = await helper.runChatTurn(); + expect(getTextFromMessage(result4.assistantMessage)).toBe( + "Success after compaction" + ); + + // Verify: only 1 marker, so excludeCount=1 + const params = await scout.buildStreamTextParams({ + chatID, + messages: helper.messages as Message[], + model, + }); + const allContent = extractAllContent(params); + + expect(allContent).toContain("CONVERSATION SUMMARY"); + expect(allContent).toContain("Summary after non-context error"); + expect(allContent).toContain("Second message - will be excluded"); // restored + expect(allContent).not.toContain("First message to summarize"); // summarized + expect(allContent).not.toContain("First response to summarize"); // summarized + expect(allContent).toContain("Retry after network error"); // added after + }); + + test("e2e: error before streaming triggers compaction marker", async () => { + let modelCallCount = 0; + + // Call 1: context error, Call 2: compact, Call 3: success + const model = new MockLanguageModelV2({ + doStream: async () => { + modelCallCount++; + if (modelCallCount === 1) throw new Error(CONTEXT_LENGTH_ERROR); + if (modelCallCount === 2) + return createCompactToolResponse("Error recovery summary."); + return createTextResponse("Success after error recovery"); + }, + }); + + const { agent } = setupCompactionTest(model); + + await using helper = createAgentTestHelper(agent, { + initialMessages: [ + { + id: "msg-1", + role: "user", + parts: [{ type: "text", text: "First message" }], + }, + { + id: "msg-2", + role: "assistant", + parts: [{ type: "text", text: "First response" }], + }, + { + id: "msg-3", + role: "user", + parts: [{ type: "text", text: "Second message" }], + }, + ], + }); + + // Step 1: error → marker + const result1 = await helper.runChatTurn(); + expect(hasCompactionMarker(result1.assistantMessage)).toBe(true); + + // Step 2: compact_conversation + const result2 = await helper.runChatTurn(); + expect(hasCompactTool(result2.assistantMessage)).toBe(true); + + // Step 3: success + helper.addUserMessage("Follow-up after error"); + const result3 = await helper.runChatTurn(); + expect(getTextFromMessage(result3.assistantMessage)).toBe( + "Success after error recovery" + ); + }); + + test("e2e: mid-stream error via controller.error() triggers compaction marker", async () => { + let modelCallCount = 0; + + // Call 1: mid-stream error via controller.error(), Call 2: compact, Call 3: success + const model = new MockLanguageModelV2({ + doStream: async () => { + modelCallCount++; + if (modelCallCount === 1) { + // Stream that emits some chunks, then errors mid-stream + return { + stream: new ReadableStream({ + start(controller) { + controller.enqueue({ type: "text-start", id: "text-1" }); + controller.enqueue({ + type: "text-delta", + id: "text-1", + delta: "Starting to respond...", + }); + controller.error(new Error(CONTEXT_LENGTH_ERROR)); + }, + }), + }; + } + if (modelCallCount === 2) + return createCompactToolResponse( + "Mid-stream error recovery summary." + ); + return createTextResponse("Success after mid-stream error recovery"); + }, + }); + + const { agent } = setupCompactionTest(model); + + await using helper = createAgentTestHelper(agent, { + initialMessages: [ + { + id: "msg-1", + role: "user", + parts: [{ type: "text", text: "First message" }], + }, + { + id: "msg-2", + role: "assistant", + parts: [{ type: "text", text: "First response" }], + }, + { + id: "msg-3", + role: "user", + parts: [{ type: "text", text: "Second message" }], + }, + ], + }); + + // Step 1: mid-stream error → marker + const result1 = await helper.runChatTurn(); + expect(hasCompactionMarker(result1.assistantMessage)).toBe(true); + + // Step 2: compact_conversation + const result2 = await helper.runChatTurn(); + expect(hasCompactTool(result2.assistantMessage)).toBe(true); + + // Step 3: success + helper.addUserMessage("Follow-up after mid-stream error"); + const result3 = await helper.runChatTurn(); + expect(getTextFromMessage(result3.assistantMessage)).toBe( + "Success after mid-stream error recovery" + ); + }); + + test("e2e: handles multiple compaction cycles in long conversation", async () => { + let modelCallCount = 0; + const capturedMessages: string[][] = []; // capture messages for each call + + // Model that goes through two complete compaction cycles + const model = new MockLanguageModelV2({ + doStream: async (options) => { + modelCallCount++; + // Capture message content for verification + const messageContents = options.prompt.map((m) => { + if (typeof m.content === "string") return m.content; + if (Array.isArray(m.content)) + return m.content.map((p) => ("text" in p ? p.text : "")).join(""); + return ""; + }); + capturedMessages.push(messageContents); + + // Cycle 1: calls 1-3 + if (modelCallCount === 1) throw new Error(CONTEXT_LENGTH_ERROR); + if (modelCallCount === 2) + return createCompactToolResponse( + "First compaction summary from cycle 1." + ); + if (modelCallCount === 3) + return createTextResponse("First cycle complete"); + // Cycle 2: calls 4-6 + if (modelCallCount === 4) throw new Error(CONTEXT_LENGTH_ERROR); + if (modelCallCount === 5) + return createCompactToolResponse( + "Second compaction summary from cycle 2.", + "compact-tool-call-2" + ); + if (modelCallCount === 6) + return createTextResponse("Second cycle complete"); + return createTextResponse("Unexpected call"); + }, + }); + + const { agent, scout, chatID } = setupCompactionTest(model); + + await using helper = createAgentTestHelper(agent, { + initialMessages: [ + { + id: "msg-1", + role: "user", + parts: [{ type: "text", text: "Initial message 1" }], + }, + { + id: "msg-2", + role: "assistant", + parts: [{ type: "text", text: "Initial response 1" }], + }, + { + id: "msg-3", + role: "user", + parts: [{ type: "text", text: "Initial message 2" }], + }, + ], + }); + + // Cycle 1: error → compact → success + const result1 = await helper.runChatTurn(); + expect(hasCompactionMarker(result1.assistantMessage)).toBe(true); + await helper.runChatTurn(); // compact_conversation + helper.addUserMessage("Question after first compaction"); + await helper.runChatTurn(); // success + + // Verify compaction instruction was injected for cycle 1 (call 2) + const call2Content = capturedMessages[1]?.join(" ") ?? ""; + expect(call2Content).toContain("SYSTEM NOTICE - CONTEXT LIMIT"); + expect(call2Content).toContain("compact_conversation"); + + // Verify first cycle summary + let params = await scout.buildStreamTextParams({ + chatID, + messages: helper.messages as Message[], + model, + }); + expect(extractAllContent(params)).toContain( + "First compaction summary from cycle 1" + ); + + // Build up context again + helper.addUserMessage("Building up context - message 1"); + helper.addUserMessage("Building up context - message 2"); + helper.addUserMessage("Building up context - message 3"); + + // Cycle 2: error → compact → success + const result4 = await helper.runChatTurn(); + expect(hasCompactionMarker(result4.assistantMessage)).toBe(true); + await helper.runChatTurn(); // compact_conversation + helper.addUserMessage("Question after second compaction"); + await helper.runChatTurn(); // success + + // Verify compaction instruction was injected for cycle 2 (call 5) + const call5Content = capturedMessages[4]?.join(" ") ?? ""; + expect(call5Content).toContain("SYSTEM NOTICE - CONTEXT LIMIT"); + expect(call5Content).toContain("compact_conversation"); + // Should also contain the previous summary as context + expect(call5Content).toContain("First compaction summary from cycle 1"); + + // Verify final state: second summary present, first gone + params = await scout.buildStreamTextParams({ + chatID, + messages: helper.messages as Message[], + model, + }); + const allContent = extractAllContent(params); + expect(allContent).toContain("Second compaction summary from cycle 2"); + expect(allContent).not.toContain("First compaction summary from cycle 1"); + }); + + test("e2e: excluded messages are restored after successful compaction", async () => { + let modelCallCount = 0; + + // Call 1: context error, Call 2: compact, Call 3: success + const model = new MockLanguageModelV2({ + doStream: async () => { + modelCallCount++; + if (modelCallCount === 1) throw new Error(CONTEXT_LENGTH_ERROR); + if (modelCallCount === 2) + return createCompactToolResponse("Summary of conversation so far."); + return createTextResponse("Final response"); + }, + }); + + const { agent, scout, chatID } = setupCompactionTest(model); + + // With 1 marker (retryCount=0), 1 message will be excluded (most recent) + await using helper = createAgentTestHelper(agent, { + initialMessages: [ + { + id: "summarized-1", + role: "user", + parts: [{ type: "text", text: "First message to be summarized" }], + }, + { + id: "summarized-2", + role: "assistant", + parts: [{ type: "text", text: "Response that will be summarized" }], + }, + { + id: "summarized-3", + role: "user", + parts: [{ type: "text", text: "Another message to summarize" }], + }, + { + id: "excluded-msg", + role: "user", + parts: [ + { + type: "text", + text: "This message will be excluded during compaction", + }, + ], + }, + ], + }); + + // Step 1: error → marker, Step 2: compact, Step 3: success + const result1 = await helper.runChatTurn(); + expect(hasCompactionMarker(result1.assistantMessage)).toBe(true); + await helper.runChatTurn(); + helper.addUserMessage("Follow-up question"); + await helper.runChatTurn(); + + // Verify excluded message is restored after summary + const params = await scout.buildStreamTextParams({ + chatID, + messages: helper.messages as Message[], + model, + }); + const allContent = extractAllContent(params); + + expect(allContent).toContain("CONVERSATION SUMMARY"); + expect(allContent).toContain("Summary of conversation so far"); + expect(allContent).toContain( + "This message will be excluded during compaction" + ); + expect(allContent).toContain("Follow-up question"); + + // Verify order: summary comes before excluded message + const summaryIndex = allContent.indexOf("CONVERSATION SUMMARY"); + const excludedIndex = allContent.indexOf( + "This message will be excluded during compaction" + ); + expect(summaryIndex).toBeLessThan(excludedIndex); + }); +}); diff --git a/packages/scout-agent/lib/core.ts b/packages/scout-agent/lib/core.ts index 9e334c7..d2ed4d0 100644 --- a/packages/scout-agent/lib/core.ts +++ b/packages/scout-agent/lib/core.ts @@ -6,6 +6,12 @@ import * as slack from "@blink-sdk/slack"; import type { App } from "@slack/bolt"; import { convertToModelMessages, type LanguageModel, type Tool } from "ai"; import type * as blink from "blink"; +import { + applyCompactionToMessages, + createCompactionMarkerPart, + createCompactionTool, + isOutOfContextError, +} from "./compaction"; import { type CoderApiClient, type CoderWorkspaceInfo, @@ -54,6 +60,12 @@ export interface BuildStreamTextParamsOptions { * If not provided, the GitHub auth context will be created using the app ID and private key from the GitHub config. */ getGithubAppContext?: () => Promise; + /** + * Whether to enable conversation compaction. When enabled, the compact_conversation tool + * will be included and compaction state in messages will be handled automatically. + * Default: true + */ + compaction?: boolean; } interface Logger { @@ -326,6 +338,7 @@ export class Scout { tools: providedTools, getGithubAppContext, systemPrompt = defaultSystemPrompt, + compaction = true, }: BuildStreamTextParamsOptions): Promise<{ model: LanguageModel; messages: ModelMessage[]; @@ -346,6 +359,9 @@ export class Scout { )() : undefined; + // it's important to look in the original messages, not the processed messages + // the processed ones may have been compacted and not include slack metadata + // anymore const slackMetadata = getSlackMetadata(messages); const respondingInSlack = this.slack.app !== undefined && slackMetadata !== undefined; @@ -461,6 +477,8 @@ export class Scout { }) : undefined), ...computeTools, + // Always include compaction tool when compaction is enabled (for caching purposes) + ...(compaction ? createCompactionTool() : {}), ...providedTools, }; @@ -473,7 +491,11 @@ ${slack.formattingRules} `; } - const converted = convertToModelMessages(messages, { + const messagesToConvert = compaction + ? applyCompactionToMessages(messages) + : messages; + + const converted = convertToModelMessages(messagesToConvert, { ignoreIncompleteToolCalls: true, tools, }); @@ -498,4 +520,107 @@ ${slack.formattingRules} tools: withModelIntent(tools), }; } + + /** + * Process the output from streamText, intercepting out-of-context errors + * and replacing them with compaction markers. + * + * @param stream - The StreamTextResult from the AI SDK's streamText() + * @param options - Optional callbacks + * @returns The same stream, but with toUIMessageStream wrapped to handle errors + */ + processStreamTextOutput< + // biome-ignore lint/suspicious/noExplicitAny: toUIMessageStream has complex overloaded signature + T extends { toUIMessageStream: (...args: any[]) => any }, + >( + stream: T, + options?: { + onCompactionTriggered?: () => void; + } + ): T { + // Use a Proxy to wrap toUIMessageStream + return new Proxy(stream, { + get(target, prop) { + // Wrap toUIMessageStream to intercept out-of-context errors + if (prop === "toUIMessageStream") { + const originalMethod = target.toUIMessageStream; + return (...args: unknown[]) => { + const uiStream = originalMethod.apply(target, args); + + // Helper to emit compaction marker chunks + const emitCompactionMarker = ( + controller: ReadableStreamDefaultController + ) => { + options?.onCompactionTriggered?.(); + const markerPart = createCompactionMarkerPart(); + controller.enqueue({ + type: "tool-input-start", + toolCallId: markerPart.toolCallId, + toolName: markerPart.toolName, + }); + controller.enqueue({ + type: "tool-input-available", + toolCallId: markerPart.toolCallId, + toolName: markerPart.toolName, + input: markerPart.input, + }); + controller.enqueue({ + type: "tool-output-available", + toolCallId: markerPart.toolCallId, + output: markerPart.output, + preliminary: false, + }); + }; + + // Use a custom ReadableStream to handle both error chunks and mid-stream errors + // This approach catches errors from controller.error() which TransformStream doesn't handle + return new ReadableStream({ + async start(controller) { + const reader = uiStream.getReader(); + try { + while (true) { + const { done, value: chunk } = await reader.read(); + if (done) break; + + // Check if this is an error chunk in UI format + if ( + chunk && + typeof chunk === "object" && + "type" in chunk && + chunk.type === "error" && + "errorText" in chunk && + typeof chunk.errorText === "string" && + isOutOfContextError(new Error(chunk.errorText)) + ) { + emitCompactionMarker(controller); + continue; + } + controller.enqueue(chunk); + } + controller.close(); + } catch (error) { + // Mid-stream error via controller.error() - check if it's out of context + if (isOutOfContextError(error)) { + emitCompactionMarker(controller); + controller.close(); + } else { + controller.error(error); + } + } finally { + reader.releaseLock(); + } + }, + }); + }; + } + + const value = target[prop as keyof T]; + // Bind functions to the original target to preserve 'this' context + if (typeof value === "function") { + return value.bind(target); + } + return value; + }, + }) as T; + } } diff --git a/packages/scout-agent/lib/index.ts b/packages/scout-agent/lib/index.ts index 2b6d5a5..d60f62f 100644 --- a/packages/scout-agent/lib/index.ts +++ b/packages/scout-agent/lib/index.ts @@ -1,3 +1,4 @@ +export * from "./compaction"; export type { CoderApiClient, CoderWorkspaceInfo } from "./compute/coder/index"; export type { DaytonaClient, DaytonaSandbox } from "./compute/daytona/index"; export * from "./core"; diff --git a/packages/scout-agent/lib/test-helpers.ts b/packages/scout-agent/lib/test-helpers.ts index 27dd1f0..8ba814c 100644 --- a/packages/scout-agent/lib/test-helpers.ts +++ b/packages/scout-agent/lib/test-helpers.ts @@ -1,6 +1,8 @@ import * as http from "node:http"; import { createServerAdapter } from "@whatwg-node/server"; +import { readUIMessageStream, type UIMessage, type UIMessageChunk } from "ai"; import type * as blink from "blink"; +import { Client } from "blink/client"; import { api as controlApi } from "blink/control"; /** @@ -131,3 +133,122 @@ export const noopLogger = { warn: () => {}, error: () => {}, }; + +// Port counter to avoid port collisions between tests +let testPortCounter = 35000; + +export interface RunChatTurnResult { + chunks: UIMessageChunk[]; + assistantMessage: UIMessage; +} + +export interface AgentTestHelper extends AsyncDisposable { + client: Client; + /** Current message history */ + readonly messages: UIMessage[]; + /** + * Adds a message to the history. + */ + addMessage: (role: "user" | "assistant", text: string) => void; + /** + * Adds a user message to the history. + */ + addUserMessage: (text: string) => void; + /** + * Runs a chat turn with the current message history. + * Automatically appends the assistant response to the history. + * Returns the result including chunks and the assistant message. + */ + runChatTurn: () => Promise; +} + +export interface CreateAgentTestHelperOptions { + /** Initial messages to seed the conversation */ + initialMessages?: UIMessage[]; +} + +/** + * Creates a test helper for a blink agent. + * Starts an HTTP server for the agent and provides methods to interact with it. + * Manages message history automatically. + * + * Usage: + * ```ts + * await using helper = createAgentTestHelper(agent, { + * initialMessages: [{ id: "1", role: "user", parts: [{ type: "text", text: "Hello" }] }] + * }); + * const result = await helper.runChatTurn(); + * // Assistant message is automatically added to helper.messages + * ``` + */ +export function createAgentTestHelper( + agent: blink.Agent, + options?: CreateAgentTestHelperOptions +): AgentTestHelper { + const port = testPortCounter++; + const server = agent.serve({ port }); + const client = new Client({ + baseUrl: `http://localhost:${port}`, + }); + + const messages: UIMessage[] = options?.initialMessages + ? [...options.initialMessages] + : []; + + const addMessage = (role: "user" | "assistant", text: string) => { + messages.push({ + id: crypto.randomUUID(), + role, + parts: [{ type: "text", text }], + }); + }; + + const runChatTurn = async (): Promise => { + const stream = await client.chat({ + id: crypto.randomUUID() as blink.ID, + messages, + }); + + const chunks: UIMessageChunk[] = []; + let assistantMessage: UIMessage | null = null; + + const messageStream = readUIMessageStream({ + stream: stream.pipeThrough( + new TransformStream({ + transform(chunk, controller) { + chunks.push(chunk); + controller.enqueue(chunk); + }, + }) + ), + }); + + for await (const message of messageStream) { + assistantMessage = message; + } + + if (!assistantMessage) { + throw new Error("No assistant message received from stream"); + } + + // Automatically append assistant message to history + messages.push(assistantMessage); + + return { chunks, assistantMessage }; + }; + + return { + client, + get messages() { + return messages; + }, + addMessage, + addUserMessage: (text: string) => addMessage("user", text), + runChatTurn, + [Symbol.asyncDispose]: async () => { + const closed = server[Symbol.asyncDispose](); + server.closeAllConnections(); + await closed; + }, + }; +} diff --git a/packages/scout-agent/package.json b/packages/scout-agent/package.json index 67b087f..1c4ff8d 100644 --- a/packages/scout-agent/package.json +++ b/packages/scout-agent/package.json @@ -1,7 +1,7 @@ { "name": "@blink-sdk/scout-agent", "description": "A general-purpose AI agent with GitHub, Slack, web search, and compute capabilities built on Blink SDK.", - "version": "0.0.9", + "version": "0.0.11", "type": "module", "keywords": [ "blink",