fix: finalize agent tasks when report enforcement fails

ThomasK33 · ThomasK33 · commit ec63ecb92617 · 2025-12-18T18:06:42.000+01:00
Change-Id: I994b83bb32473ca8cc04f0a165292848d2b15bc1
Signed-off-by: Thomas Kosiewski &lt;tk@coder.com&gt;
diff --git a/src/node/services/taskService.test.ts b/src/node/services/taskService.test.ts
@@ -1,7 +1,7 @@
 import { describe, it, expect } from "bun:test";
 
 import type { MuxMessage, MuxToolPart } from "@/common/types/message";
-import { Ok } from "@/common/types/result";
+import { Err, Ok } from "@/common/types/result";
 import { TaskService } from "./taskService";
 
 function createTaskToolPart(params: {
@@ -257,4 +257,123 @@ describe("TaskService", () => {
       });
     });
   });
+
+  describe("onStreamEnd", () => {
+    it("should finalize tasks when report enforcement resume fails", async () => {
+      const parentWorkspaceId = "parent";
+      const childWorkspaceId = "child";
+
+      const workspace = {
+        id: childWorkspaceId,
+        path: "/tmp/agent",
+        name: "agent",
+        projectName: "proj",
+        projectPath: "/proj",
+        createdAt: "2025-01-01T00:00:00.000Z",
+        parentWorkspaceId,
+        agentType: "research",
+        taskStatus: "running",
+        taskModel: "openai:gpt-5-codex",
+      };
+
+      const projects = new Map([
+        [
+          "/proj",
+          {
+            workspaces: [workspace],
+          },
+        ],
+      ]);
+
+      let idCounter = 0;
+      const config = {
+        generateStableId: () => `id-${idCounter++}`,
+        getTaskSettings: () => ({
+          maxParallelAgentTasks: 3,
+          maxTaskNestingDepth: 3,
+        }),
+        listWorkspaceConfigs: () => [{ projectPath: "/proj", workspace }],
+        getWorkspaceConfig: (id: string) => {
+          if (id !== childWorkspaceId) {
+            return null;
+          }
+
+          return { projectPath: "/proj", workspace };
+        },
+        editConfig: (edit: (cfg: unknown) => unknown) => {
+          edit({ projects });
+        },
+      };
+
+      const histories = new Map<string, MuxMessage[]>([
+        [
+          childWorkspaceId,
+          [
+            {
+              id: "assistant-1",
+              role: "assistant",
+              parts: [{ type: "text", text: "partial output" }],
+              metadata: {
+                historySequence: 1,
+              },
+            },
+          ],
+        ],
+        [parentWorkspaceId, []],
+      ]);
+
+      const historyService = {
+        getHistory: (workspaceId: string) => Ok(histories.get(workspaceId) ?? []),
+        appendToHistory: (workspaceId: string, message: MuxMessage) => {
+          const list = histories.get(workspaceId) ?? [];
+          list.push(message);
+          histories.set(workspaceId, list);
+          return Ok(undefined);
+        },
+      };
+
+      const partialService = {
+        readPartial: () => null,
+        writePartial: () => Ok(undefined),
+      };
+
+      const removed: string[] = [];
+      const workspaceService = {
+        emitChatEvent: (_workspaceId: string, _event: unknown) => undefined,
+        emitWorkspaceMetadata: (_workspaceId: string) => undefined,
+        resumeStream: () => Err({ type: "api_key_not_found", provider: "openai" }),
+        remove: (workspaceId: string, _force?: boolean) => {
+          removed.push(workspaceId);
+          return Ok(undefined);
+        },
+      };
+
+      const aiService = {
+        on: () => undefined,
+      };
+
+      const service = new TaskService(
+        config as never,
+        historyService as never,
+        partialService as never,
+        workspaceService as never,
+        aiService as never
+      );
+
+      await (service as unknown as { onStreamEnd: (id: string) => Promise<void> }).onStreamEnd(
+        childWorkspaceId
+      );
+
+      expect(workspace.taskStatus).toBe("reported");
+      expect(removed).toEqual([childWorkspaceId]);
+
+      const parentHistory = histories.get(parentWorkspaceId) ?? [];
+      expect(parentHistory).toHaveLength(1);
+
+      const reportText = parentHistory[0].parts.find((p) => p.type === "text")?.text;
+      expect(reportText).toBeDefined();
+      expect(reportText).toContain("Mux was unable to resume this agent task");
+      expect(reportText).toContain("partial output");
+    });
+  });
 });
diff --git a/src/node/services/taskService.ts b/src/node/services/taskService.ts
@@ -189,6 +189,42 @@ export class TaskService {
     }
   }
 
+  private async finalizeAgentTaskWithoutReport(
+    workspaceId: string,
+    reportMarkdown: string
+  ): Promise<void> {
+    const workspaceConfig = this.config.getWorkspaceConfig(workspaceId);
+    if (!workspaceConfig) {
+      log.error("Failed to finalize agent task without report: unknown workspace", {
+        workspaceId,
+      });
+      return;
+    }
+
+    // If this isn't a properly-parented task workspace, at least mark it complete so it doesn't
+    // consume a scheduler slot indefinitely.
+    if (!workspaceConfig.workspace.parentWorkspaceId) {
+      await this.updateTaskWorkspace(workspaceId, { taskStatus: "reported" });
+      await this.maybeCleanupReportedWorkspace(workspaceId);
+      await this.queueScheduling();
+      return;
+    }
+
+    try {
+      await this.handleAgentReport(workspaceId, { reportMarkdown });
+    } catch (error: unknown) {
+      // Ensure a failed report doesn't leave the queue stuck.
+      log.error("Failed to finalize agent task without agent_report", {
+        workspaceId,
+        error,
+      });
+
+      await this.updateTaskWorkspace(workspaceId, { taskStatus: "reported" });
+      await this.maybeCleanupReportedWorkspace(workspaceId);
+      await this.queueScheduling();
+    }
+  }
+
   async createAgentTask(params: CreateAgentTaskParams): Promise<{ childWorkspaceId: string }> {
     const preset = getAgentPreset(params.agentType);
 
@@ -437,53 +473,80 @@ export class TaskService {
       await this.updateTaskWorkspace(workspaceId, { taskStatus: "awaiting_report" });
 
       const preset = getAgentPreset(agentType);
-      if (!preset) {
-        return;
-      }
-
-      // Force a report-only follow-up.
-      const requirePolicy: ToolPolicy = [{ action: "require", regex_match: "^agent_report$" }];
-
-      const nudgeMessage = createMuxMessage(
-        this.config.generateStableId(),
-        "user",
-        "You must now call agent_report with your final reportMarkdown. Do not do anything else.",
-        { synthetic: true }
-      );
+      if (preset) {
+        // Force a report-only follow-up.
+        const requirePolicy: ToolPolicy = [{ action: "require", regex_match: "^agent_report$" }];
+
+        const nudgeMessage = createMuxMessage(
+          this.config.generateStableId(),
+          "user",
+          "You must now call agent_report with your final reportMarkdown. Do not do anything else.",
+          { synthetic: true }
+        );
 
-      const appendResult = await this.historyService.appendToHistory(workspaceId, nudgeMessage);
-      if (!appendResult.success) {
-        throw new Error(appendResult.error);
-      }
+        const appendResult = await this.historyService.appendToHistory(workspaceId, nudgeMessage);
+        if (!appendResult.success) {
+          log.error("Failed to append agent_report enforcement message", {
+            workspaceId,
+            error: appendResult.error,
+          });
+        } else {
+          this.workspaceService.emitChatEvent(workspaceId, {
+            ...nudgeMessage,
+            type: "message",
+          } satisfies WorkspaceChatMessage);
+        }
 
-      this.workspaceService.emitChatEvent(workspaceId, {
-        ...nudgeMessage,
-        type: "message",
-      } satisfies WorkspaceChatMessage);
+        const model = config.workspace.taskModel ?? DEFAULT_MODEL;
+        const resumeResult = await this.workspaceService.resumeStream(workspaceId, {
+          model,
+          mode: "agent",
+          additionalSystemInstructions: preset.systemPrompt,
+          toolPolicy: requirePolicy,
+        });
+        if (resumeResult.success) {
+          return;
+        }
 
-      const model = config.workspace.taskModel ?? DEFAULT_MODEL;
-      const resumeResult = await this.workspaceService.resumeStream(workspaceId, {
-        model,
-        mode: "agent",
-        additionalSystemInstructions: preset.systemPrompt,
-        toolPolicy: requirePolicy,
-      });
-      if (!resumeResult.success) {
         log.error("Failed to resume agent task for report enforcement", {
           workspaceId,
           error: resumeResult.error,
         });
+
+        const fallbackReport = await this.buildFallbackReportFromHistory(workspaceId);
+        const reportMarkdown = [
+          "Mux was unable to resume this agent task to collect a final agent_report.",
+          "",
+          "Resume error:",
+          "```",
+          this.formatErrorForReport(resumeResult.error),
+          "```",
+          ...(fallbackReport
+            ? ["", "Best-effort output extracted from the task history:", "", fallbackReport]
+            : [
+                "",
+                "Mux could not extract any assistant text from the task history (best-effort fallback).",
+              ]),
+        ].join("\n");
+
+        await this.finalizeAgentTaskWithoutReport(workspaceId, reportMarkdown);
+        return;
       }
-      return;
+
+      log.error("Agent task ended without agent_report, but no preset exists for enforcement", {
+        workspaceId,
+        agentType,
+      });
+      // Fall through to best-effort extraction.
     }
 
     // Second failure: fall back to best-effort report extraction.
     const fallbackReport = await this.buildFallbackReportFromHistory(workspaceId);
-    if (!fallbackReport) {
-      return;
-    }
+    const reportMarkdown =
+      fallbackReport ??
+      "Mux did not receive an agent_report for this task and could not extract any assistant text from the task history.";
 
-    await this.handleAgentReport(workspaceId, { reportMarkdown: fallbackReport });
+    await this.finalizeAgentTaskWithoutReport(workspaceId, reportMarkdown);
   }
 
   private async tryResolveParentTaskToolCall(params: {