From bfce4dc2c80d194d6a5e9e0cdd31dbfa8fb09753 Mon Sep 17 00:00:00 2001 From: Nikhil Woodruff Date: Mon, 29 Dec 2025 17:28:21 +0000 Subject: [PATCH 01/13] Run all non-integration tests in CI - Add pytest markers for integration and slow tests - Update CI to run all tests except those marked as integration - Previously only ran test_models.py --- .github/workflows/test.yml | 2 +- pyproject.toml | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 53db06d..42981b0 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -22,4 +22,4 @@ jobs: run: uv sync --extra dev - name: Run tests - run: uv run pytest tests/test_models.py -v + run: uv run pytest -v -m "not integration" diff --git a/pyproject.toml b/pyproject.toml index 175a3e5..27eb310 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -53,6 +53,10 @@ ignore = ["E501"] # Tool descriptions need to be long testpaths = ["tests"] pythonpath = ["."] asyncio_mode = "auto" +markers = [ + "integration: tests that require external services (API keys, databases)", + "slow: tests that take a long time to run", +] [dependency-groups] dev = [ From 4289be2e45a83120a50984517c805f4d93f75183 Mon Sep 17 00:00:00 2001 From: Nikhil Woodruff Date: Mon, 29 Dec 2025 17:32:59 +0000 Subject: [PATCH 02/13] Redesign policy chat UI with structured tool display MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Show tool calls as collapsible cards with parameters - Add progress indicator (Search → Create policy → Analysis → Complete) - Display API requests with method badges (GET/POST) - Make tool results expandable - Show assistant reasoning in muted italic - Parse log messages into structured UI elements - Professional green header with status indicator - Improved example questions with hover effects --- docs/src/components/policy-chat.tsx | 673 ++++++++++++++++++-------- src/policyengine_api/agent_sandbox.py | 20 + 2 files changed, 495 insertions(+), 198 deletions(-) diff --git a/docs/src/components/policy-chat.tsx b/docs/src/components/policy-chat.tsx index 2a9865b..0147753 100644 --- a/docs/src/components/policy-chat.tsx +++ b/docs/src/components/policy-chat.tsx @@ -1,6 +1,6 @@ "use client"; -import { useState, useRef, useEffect } from "react"; +import { useState, useRef, useEffect, useMemo } from "react"; import ReactMarkdown from "react-markdown"; import remarkBreaks from "remark-breaks"; import { useApi } from "./api-context"; @@ -16,6 +16,325 @@ interface LogEntry { message: string; } +interface ParsedStep { + type: "agent" | "tool_use" | "api_call" | "api_response" | "tool_result" | "assistant" | "unknown"; + title: string; + content: string; + method?: string; + url?: string; + statusCode?: number; + toolName?: string; + params?: Record; + isExpanded?: boolean; +} + +function parseLogEntry(message: string): ParsedStep { + // [AGENT] messages + if (message.startsWith("[AGENT]")) { + const content = message.replace("[AGENT] ", ""); + return { + type: "agent", + title: "Agent", + content: content, + }; + } + + // [TOOL_USE] tool_name: {...} + if (message.startsWith("[TOOL_USE]")) { + const content = message.replace("[TOOL_USE] ", ""); + const colonIndex = content.indexOf(":"); + if (colonIndex > -1) { + const toolName = content.slice(0, colonIndex).trim(); + const paramsStr = content.slice(colonIndex + 1).trim(); + let params: Record = {}; + try { + params = JSON.parse(paramsStr); + } catch { + // Not valid JSON + } + // Clean up tool name for display + const displayName = toolName + .replace(/_/g, " ") + .replace(/parameters get$/, "") + .replace(/parameters post$/, "") + .replace(/household calculate post$/, "Calculate household") + .replace(/list /g, "Search "); + return { + type: "tool_use", + title: displayName, + content: paramsStr, + toolName, + params, + }; + } + } + + // [API] GET/POST url + if (message.startsWith("[API]")) { + const content = message.replace("[API] ", ""); + + // Check if it's a response + if (content.startsWith("Response:")) { + const statusCode = parseInt(content.replace("Response: ", ""), 10); + return { + type: "api_response", + title: "Response", + content: content, + statusCode, + }; + } + + // Check if it's a request with method + const methodMatch = content.match(/^(GET|POST|PUT|PATCH|DELETE)\s+(.+)$/); + if (methodMatch) { + return { + type: "api_call", + title: "API Request", + content: content, + method: methodMatch[1], + url: methodMatch[2], + }; + } + + // Query or Body + if (content.startsWith("Query:") || content.startsWith("Body:")) { + return { + type: "api_call", + title: content.startsWith("Query:") ? "Query params" : "Request body", + content: content.replace(/^(Query|Body):\s*/, ""), + }; + } + } + + // [TOOL_RESULT] ... + if (message.startsWith("[TOOL_RESULT]")) { + const content = message.replace("[TOOL_RESULT] ", ""); + return { + type: "tool_result", + title: "Result", + content: content, + }; + } + + // [ASSISTANT] ... + if (message.startsWith("[ASSISTANT]")) { + const content = message.replace("[ASSISTANT] ", ""); + return { + type: "assistant", + title: "Thinking", + content: content, + }; + } + + return { + type: "unknown", + title: "Log", + content: message, + }; +} + +function ToolCard({ step, isLast }: { step: ParsedStep; isLast: boolean }) { + const [isExpanded, setIsExpanded] = useState(false); + + if (step.type === "agent") { + return ( +
+
+ {step.content} +
+ ); + } + + if (step.type === "tool_use") { + return ( +
+
+
+ + {isExpanded && step.params && Object.keys(step.params).length > 0 && ( +
+
+ {Object.entries(step.params).map(([key, value]) => ( +
+ {key}: + + {typeof value === "string" ? `"${value}"` : JSON.stringify(value)} + +
+ ))} +
+
+ )} +
+ {isLast && ( +
+ )} +
+ ); + } + + if (step.type === "api_call" && step.method) { + const methodColors: Record = { + GET: "bg-blue-100 text-blue-700", + POST: "bg-green-100 text-green-700", + PUT: "bg-amber-100 text-amber-700", + DELETE: "bg-red-100 text-red-700", + }; + return ( +
+
+ + {step.method} + + + {step.url?.replace("https://v2.api.policyengine.org", "")} + +
+
+ ); + } + + if (step.type === "api_response") { + const isSuccess = step.statusCode && step.statusCode < 400; + return ( +
+
+ + {step.statusCode} + + + {isSuccess ? "Success" : "Error"} + +
+
+ ); + } + + if (step.type === "tool_result") { + return ( +
+ + {isExpanded && ( +
+
{step.content.slice(0, 2000)}{step.content.length > 2000 ? "\n..." : ""}
+
+ )} +
+ ); + } + + if (step.type === "assistant") { + return ( +
+
+ {step.content} +
+ ); + } + + return null; +} + +function ProgressIndicator({ logs }: { logs: LogEntry[] }) { + const stages = useMemo(() => { + const hasSearch = logs.some(l => l.message.includes("parameters")); + const hasPolicy = logs.some(l => l.message.includes("policies")); + const hasAnalysis = logs.some(l => l.message.includes("analysis") || l.message.includes("economic")); + const hasHousehold = logs.some(l => l.message.includes("household")); + const isComplete = logs.some(l => l.message.includes("Completed")); + + if (hasAnalysis) { + return [ + { label: "Search", done: hasSearch }, + { label: "Create policy", done: hasPolicy }, + { label: "Run analysis", done: isComplete, active: !isComplete }, + { label: "Complete", done: isComplete }, + ]; + } + + if (hasHousehold) { + return [ + { label: "Build household", done: true }, + { label: "Calculate", done: isComplete, active: !isComplete }, + { label: "Complete", done: isComplete }, + ]; + } + + return [ + { label: "Search", done: hasSearch, active: !hasSearch && logs.length > 0 }, + { label: "Retrieve", done: logs.length > 3, active: hasSearch && logs.length <= 3 }, + { label: "Complete", done: isComplete }, + ]; + }, [logs]); + + if (logs.length === 0) return null; + + return ( +
+ {stages.map((stage, i) => ( +
+
+
+ + {stage.label} + +
+ {i < stages.length - 1 && ( +
+ )} +
+ ))} +
+ ); +} + export function PolicyChat() { const { baseUrl } = useApi(); const [messages, setMessages] = useState([]); @@ -34,7 +353,6 @@ export function PolicyChat() { scrollToBottom(); }, [messages, logs]); - // Cleanup polling on unmount useEffect(() => { return () => { if (pollIntervalRef.current) { @@ -43,20 +361,21 @@ export function PolicyChat() { }; }, []); + const parsedSteps = useMemo(() => { + return logs + .map(log => parseLogEntry(log.message)) + .filter(step => step.type !== "unknown"); + }, [logs]); + const pollLogs = async (id: string) => { try { const res = await fetch(`${baseUrl}/agent/logs/${id}`); - if (!res.ok) { - console.error("Failed to fetch logs:", res.status); - return; - } + if (!res.ok) return; const data = await res.json(); setLogs(data.logs || []); - // Check if completed or failed if (data.status === "completed" || data.status === "failed") { - // Stop polling if (pollIntervalRef.current) { clearInterval(pollIntervalRef.current); pollIntervalRef.current = null; @@ -65,39 +384,16 @@ export function PolicyChat() { setIsLoading(false); setCallId(null); - // Extract final result from logs or result field let finalContent = ""; if (data.result?.result) { finalContent = data.result.result; } else { - // Try to extract from logs - look for [CLAUDE] lines with result - const claudeLogs = data.logs - .map((l: LogEntry) => l.message) - .filter((m: string) => m.startsWith("[CLAUDE]")) - .map((m: string) => m.replace("[CLAUDE] ", "")); - - // Try to parse the last few lines for result - for (const log of claudeLogs.reverse()) { - try { - const event = JSON.parse(log); - if (event.type === "result" && event.result) { - finalContent = event.result; - break; - } - } catch { - // Not JSON, skip - } - } - - if (!finalContent) { - finalContent = - data.status === "completed" - ? "Analysis completed. Check logs for details." - : "Analysis failed. Check logs for errors."; - } + finalContent = + data.status === "completed" + ? "Analysis completed. Check the steps above for details." + : "Analysis failed. Please try again."; } - // Update assistant message with final content setMessages((prev) => { const newMessages = [...prev]; const lastIndex = newMessages.length - 1; @@ -126,38 +422,30 @@ export function PolicyChat() { setLogs([]); setCallId(null); - // Stop any existing polling if (pollIntervalRef.current) { clearInterval(pollIntervalRef.current); pollIntervalRef.current = null; } - // Add user message setMessages((prev) => [...prev, { role: "user", content: userMessage }]); - - // Add pending assistant message setMessages((prev) => [ ...prev, { role: "assistant", content: "", status: "pending" }, ]); try { - // Start the agent const res = await fetch(`${baseUrl}/agent/run`, { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ question: userMessage }), }); - if (!res.ok) { - throw new Error(`HTTP ${res.status}`); - } + if (!res.ok) throw new Error(`HTTP ${res.status}`); const data = await res.json(); const newCallId = data.call_id; setCallId(newCallId); - // Update to running status setMessages((prev) => { const newMessages = [...prev]; const lastIndex = newMessages.length - 1; @@ -170,12 +458,10 @@ export function PolicyChat() { return newMessages; }); - // Start polling for logs pollIntervalRef.current = setInterval(() => { pollLogs(newCallId); }, 1000); - // Initial poll pollLogs(newCallId); } catch (err) { setMessages((prev) => { @@ -194,195 +480,186 @@ export function PolicyChat() { } }; - // Parse log message to extract useful info - const parseLogMessage = (message: string): { type: string; content: string } => { - if (message.startsWith("[AGENT]")) { - return { type: "agent", content: message.replace("[AGENT] ", "") }; - } - if (message.startsWith("[CLAUDE]")) { - const claudeContent = message.replace("[CLAUDE] ", ""); - // Try to parse as JSON - try { - const event = JSON.parse(claudeContent); - if (event.type === "assistant" && event.message?.content) { - const textParts = event.message.content - .filter((c: { type: string }) => c.type === "text") - .map((c: { text: string }) => c.text) - .join(""); - if (textParts) { - return { type: "text", content: textParts }; - } - const toolParts = event.message.content - .filter((c: { type: string }) => c.type === "tool_use") - .map((c: { name: string }) => c.name); - if (toolParts.length > 0) { - return { type: "tool", content: `Using: ${toolParts.join(", ")}` }; - } - } - if (event.type === "system" && event.subtype === "init") { - const mcpStatus = event.mcp_servers?.find( - (s: { name: string }) => s.name === "policyengine" - ); - return { - type: "system", - content: mcpStatus?.status === "connected" ? "MCP connected" : "Starting...", - }; - } - if (event.type === "result") { - return { type: "result", content: "Analysis complete" }; - } - return { type: "claude", content: `[${event.type || "event"}]` }; - } catch { - return { type: "claude", content: claudeContent.slice(0, 100) }; - } - } - return { type: "log", content: message.slice(0, 100) }; - }; - const exampleQuestions = [ - "How much would it cost to set the UK basic income tax rate to 19p?", - "What would happen if we doubled child benefit?", - "Calculate tax for a UK household earning 50,000", - "What is the budgetary impact of abolishing the higher rate of income tax?", - "What benefits would a single parent with two children receive in California?", + "What is the UK personal allowance for 2026?", + "Calculate tax for someone earning £50,000 in the UK", + "What would happen if we increased child benefit by 10%?", + "What benefits would a single parent with two children receive?", ]; return ( -
+
{/* Header */} -
-
-
- - Policy analyst - - - Powered by Claude Code + MCP - +
+
+
+
+ + + +
+
+

Policy analyst

+

Ask questions about UK and US tax-benefit policy

+
+
+
+
+ + {isLoading ? "Working..." : "Ready"} + +
-

- Ask natural language questions about UK or US tax and benefit policy -

{/* Messages */} -
- {messages.length === 0 && ( -
-

- Try asking a question like: -

-
+
+ {messages.length === 0 ? ( +
+
+

+ What would you like to know? +

+

+ Ask about tax rates, benefits, or policy impacts +

+
+
{exampleQuestions.map((q, i) => ( ))}
- )} - - {messages.map((message, i) => ( -
-
- {message.role === "assistant" && - (message.status === "pending" || message.status === "running") ? ( -
-
- - {message.status === "pending" ? "Starting..." : "Analysing..."} - -
- ) : message.status === "completed" || message.status === "failed" ? ( -
- - {message.content} - -
- ) : ( -
{message.content}
- )} -
-
- ))} - - {/* Live logs */} - {isLoading && logs.length > 0 && ( -
-
- Live output ({logs.length} entries) -
- {logs.slice(-30).map((log, i) => { - const parsed = parseLogMessage(log.message); - return ( -
- - {">"} - - {parsed.content} -
- ); - })} -
- {">"} - -
+ ) : ( +
+ {messages.map((message, i) => ( +
+ {message.role === "user" ? ( +
+
+

{message.content}

+
+
+ ) : ( +
+ {/* Running state with live steps */} + {(message.status === "pending" || message.status === "running") && ( +
+ + + {message.status === "pending" ? ( +
+
+ Starting analysis... +
+ ) : ( +
+ {parsedSteps.slice(-15).map((step, j) => ( + + ))} +
+
+
+
+ )} +
+ )} + + {/* Completed/failed state */} + {(message.status === "completed" || message.status === "failed") && ( +
+ {/* Collapsible steps summary */} + {parsedSteps.length > 0 && ( +
+ + + + + {parsedSteps.filter(s => s.type === "tool_use").length} tool calls executed + +
+ {parsedSteps.map((step, j) => ( + + ))} +
+
+ )} + + {/* Final response */} +
+
+ + {message.content} + +
+
+
+ )} +
+ )} +
+ ))} +
)} - -
{/* Input */} -
-
+ +
setInput(e.target.value)} placeholder="Ask a policy question..." disabled={isLoading} - className="flex-1 px-4 py-2 text-sm border border-[var(--color-border)] rounded-lg focus:outline-none focus:ring-2 focus:ring-[var(--color-pe-green)] disabled:opacity-50 font-mono" + className="flex-1 px-4 py-3 text-sm border border-[var(--color-border)] rounded-xl bg-white focus:outline-none focus:ring-2 focus:ring-[var(--color-pe-green)] focus:border-transparent disabled:opacity-50 placeholder:text-[var(--color-text-muted)]" />
+ +
); } diff --git a/src/policyengine_api/agent_sandbox.py b/src/policyengine_api/agent_sandbox.py index 01ac0fe..7560ee7 100644 --- a/src/policyengine_api/agent_sandbox.py +++ b/src/policyengine_api/agent_sandbox.py @@ -2,6 +2,7 @@ import json import re +import time from typing import Any, Callable import anthropic @@ -34,8 +35,27 @@ 3. Be concise but thorough 4. For UK, amounts are in GBP. For US, amounts are in USD. 5. Poll async endpoints until status is "completed" + +IMPORTANT: When polling async endpoints, ALWAYS use the sleep tool to wait 5-10 seconds between requests. +Do not poll in a tight loop - this wastes resources and may hit rate limits. """ +# Sleep tool for polling delays +SLEEP_TOOL = { + "name": "sleep", + "description": "Wait for a specified number of seconds. Use this between polling requests to avoid hammering the API.", + "input_schema": { + "type": "object", + "properties": { + "seconds": { + "type": "number", + "description": "Number of seconds to sleep (1-60)", + } + }, + "required": ["seconds"], + }, +} + def fetch_openapi_spec(api_base_url: str) -> dict: """Fetch and cache OpenAPI spec.""" From e09bba870264201baa24f7c7ca14f360e4ea21fd Mon Sep 17 00:00:00 2001 From: Nikhil Woodruff Date: Mon, 29 Dec 2025 17:33:47 +0000 Subject: [PATCH 03/13] Add sleep tool to agent for polling delays --- src/policyengine_api/agent_sandbox.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/src/policyengine_api/agent_sandbox.py b/src/policyengine_api/agent_sandbox.py index 7560ee7..1e40220 100644 --- a/src/policyengine_api/agent_sandbox.py +++ b/src/policyengine_api/agent_sandbox.py @@ -336,6 +336,8 @@ def log(msg: str) -> None: claude_tools = [ {k: v for k, v in t.items() if k != "_meta"} for t in tools ] + # Add the sleep tool + claude_tools.append(SLEEP_TOOL) client = anthropic.Anthropic() messages = [{"role": "user", "content": question}] @@ -370,11 +372,18 @@ def log(msg: str) -> None: assistant_content.append(block) # Execute tool - tool = tool_lookup.get(block.name) - if tool: - result = execute_api_tool(tool, block.input, api_base_url, log) + if block.name == "sleep": + # Handle sleep tool specially + seconds = min(max(block.input.get("seconds", 5), 1), 60) + log(f"[SLEEP] Waiting {seconds} seconds...") + time.sleep(seconds) + result = f"Slept for {seconds} seconds" else: - result = f"Unknown tool: {block.name}" + tool = tool_lookup.get(block.name) + if tool: + result = execute_api_tool(tool, block.input, api_base_url, log) + else: + result = f"Unknown tool: {block.name}" log(f"[TOOL_RESULT] {result[:300]}") From 6bb314ea06de7292a267091151df9e6d51cd6dff Mon Sep 17 00:00:00 2001 From: Nikhil Woodruff Date: Mon, 29 Dec 2025 17:40:23 +0000 Subject: [PATCH 04/13] Simplify chat UI: cleaner tool display, better text spacing - Simplified progress indicator to single status line - Removed noisy API call/response details - Cleaner tool cards without timeline borders - Better paragraph/list spacing in markdown - Hide redundant agent messages --- docker-compose.yml | 2 +- docs/src/components/policy-chat.tsx | 204 +++++++--------------------- src/policyengine_api/api/agent.py | 69 +++++++--- 3 files changed, 100 insertions(+), 175 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index b05c701..60e8645 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -14,7 +14,7 @@ services: API_PORT: ${API_PORT:-8000} ANTHROPIC_API_KEY: ${ANTHROPIC_API_KEY} POLICYENGINE_API_URL: http://localhost:${API_PORT:-8000} - AGENT_USE_MODAL: "false" + AGENT_USE_MODAL: ${AGENT_USE_MODAL:-false} volumes: - ./src:/app/src - ./docs/out:/app/docs/out diff --git a/docs/src/components/policy-chat.tsx b/docs/src/components/policy-chat.tsx index 0147753..53f9890 100644 --- a/docs/src/components/policy-chat.tsx +++ b/docs/src/components/policy-chat.tsx @@ -133,119 +133,68 @@ function parseLogEntry(message: string): ParsedStep { }; } -function ToolCard({ step, isLast }: { step: ParsedStep; isLast: boolean }) { +function ToolCard({ step }: { step: ParsedStep }) { const [isExpanded, setIsExpanded] = useState(false); if (step.type === "agent") { - return ( -
-
- {step.content} -
- ); + return null; // Hide agent messages, they're redundant with progress indicator } if (step.type === "tool_use") { return ( -
-
-
- - {isExpanded && step.params && Object.keys(step.params).length > 0 && ( -
-
- {Object.entries(step.params).map(([key, value]) => ( -
- {key}: - - {typeof value === "string" ? `"${value}"` : JSON.stringify(value)} - -
- ))} -
-
)} -
- {isLast && ( -
+ + {isExpanded && step.params && Object.keys(step.params).length > 0 && ( +
+ {Object.entries(step.params).map(([key, value]) => ( +
+ {key} + : + {typeof value === "string" ? `"${value}"` : JSON.stringify(value)} +
+ ))} +
)}
); } - if (step.type === "api_call" && step.method) { - const methodColors: Record = { - GET: "bg-blue-100 text-blue-700", - POST: "bg-green-100 text-green-700", - PUT: "bg-amber-100 text-amber-700", - DELETE: "bg-red-100 text-red-700", - }; - return ( -
-
- - {step.method} - - - {step.url?.replace("https://v2.api.policyengine.org", "")} - -
-
- ); - } - - if (step.type === "api_response") { - const isSuccess = step.statusCode && step.statusCode < 400; - return ( -
-
- - {step.statusCode} - - - {isSuccess ? "Success" : "Error"} - -
-
- ); + // Hide API details - too noisy + if (step.type === "api_call" || step.type === "api_response") { + return null; } if (step.type === "tool_result") { return ( -
+
{isExpanded && ( -
-
{step.content.slice(0, 2000)}{step.content.length > 2000 ? "\n..." : ""}
+
+
{step.content.slice(0, 1500)}{step.content.length > 1500 ? "\n..." : ""}
)}
@@ -254,9 +203,8 @@ function ToolCard({ step, isLast }: { step: ParsedStep; isLast: boolean }) { if (step.type === "assistant") { return ( -
-
- {step.content} +
+

{step.content}

); } @@ -265,72 +213,29 @@ function ToolCard({ step, isLast }: { step: ParsedStep; isLast: boolean }) { } function ProgressIndicator({ logs }: { logs: LogEntry[] }) { - const stages = useMemo(() => { + const stage = useMemo(() => { const hasSearch = logs.some(l => l.message.includes("parameters")); const hasPolicy = logs.some(l => l.message.includes("policies")); const hasAnalysis = logs.some(l => l.message.includes("analysis") || l.message.includes("economic")); const hasHousehold = logs.some(l => l.message.includes("household")); const isComplete = logs.some(l => l.message.includes("Completed")); - if (hasAnalysis) { - return [ - { label: "Search", done: hasSearch }, - { label: "Create policy", done: hasPolicy }, - { label: "Run analysis", done: isComplete, active: !isComplete }, - { label: "Complete", done: isComplete }, - ]; - } - - if (hasHousehold) { - return [ - { label: "Build household", done: true }, - { label: "Calculate", done: isComplete, active: !isComplete }, - { label: "Complete", done: isComplete }, - ]; - } - - return [ - { label: "Search", done: hasSearch, active: !hasSearch && logs.length > 0 }, - { label: "Retrieve", done: logs.length > 3, active: hasSearch && logs.length <= 3 }, - { label: "Complete", done: isComplete }, - ]; + if (isComplete) return "Complete"; + if (hasAnalysis) return "Running analysis..."; + if (hasPolicy) return "Creating policy..."; + if (hasHousehold) return "Calculating..."; + if (hasSearch) return "Searching parameters..."; + return "Starting..."; }, [logs]); if (logs.length === 0) return null; return ( -
- {stages.map((stage, i) => ( -
-
-
- - {stage.label} - -
- {i < stages.length - 1 && ( -
- )} -
- ))} +
+ {stage !== "Complete" && ( +
+ )} + {stage}
); } @@ -559,17 +464,10 @@ export function PolicyChat() { Starting analysis...
) : ( -
- {parsedSteps.slice(-15).map((step, j) => ( - +
+ {parsedSteps.slice(-10).map((step, j) => ( + ))} -
-
-
)}
@@ -587,9 +485,9 @@ export function PolicyChat() { {parsedSteps.filter(s => s.type === "tool_use").length} tool calls executed -
+
{parsedSteps.map((step, j) => ( - + ))}
@@ -601,7 +499,7 @@ export function PolicyChat() { ? "bg-red-50 border border-red-200" : "bg-white border border-[var(--color-border)]" }`}> -
+
{message.content} diff --git a/src/policyengine_api/api/agent.py b/src/policyengine_api/api/agent.py index 33a4f21..7389211 100644 --- a/src/policyengine_api/api/agent.py +++ b/src/policyengine_api/api/agent.py @@ -3,14 +3,14 @@ This endpoint lets users ask natural language questions about tax/benefit policy and get AI-generated reports using Claude Code connected to the PolicyEngine MCP server. -The agent runs in a Modal sandbox and logs are fetched via Modal SDK. +The agent runs in a Modal sandbox (production) or locally (development). """ +import asyncio import uuid from datetime import datetime import logfire -import modal from fastapi import APIRouter, HTTPException from pydantic import BaseModel @@ -67,6 +67,19 @@ class StatusResponse(BaseModel): _logs: dict[str, list[LogEntry]] = {} +def _run_local_agent(call_id: str, question: str, api_base_url: str) -> None: + """Run agent locally in a background thread.""" + from policyengine_api.agent_sandbox import _run_agent_impl + + try: + result = _run_agent_impl(question, api_base_url, call_id) + _calls[call_id]["status"] = result.get("status", "completed") + _calls[call_id]["result"] = result + except Exception as e: + _calls[call_id]["status"] = "failed" + _calls[call_id]["result"] = {"status": "failed", "error": str(e)} + + @router.post("/run", response_model=RunResponse) async def run_agent(request: RunRequest) -> RunResponse: """Start the agent to answer a policy question. @@ -90,30 +103,44 @@ async def run_agent(request: RunRequest) -> RunResponse: logfire.info("agent_run", question=request.question[:100]) api_base_url = settings.policyengine_api_url - - # Look up the deployed function - run_fn = modal.Function.from_name("policyengine-sandbox", "run_agent") - - # Generate a call_id before spawning so we can pass it to the function call_id = f"fc-{uuid.uuid4().hex[:24]}" # Initialize logs storage _logs[call_id] = [] - # Spawn the function (non-blocking) - pass call_id so it can POST logs back - call = run_fn.spawn(request.question, api_base_url, call_id) - - # Store call info - _calls[call_id] = { - "call": call, - "modal_call_id": call.object_id, - "question": request.question, - "started_at": datetime.utcnow().isoformat(), - "status": "running", - "result": None, - } - - logfire.info("agent_spawned", call_id=call_id, modal_call_id=call.object_id) + if settings.agent_use_modal: + # Production: use Modal + import modal + + run_fn = modal.Function.from_name("policyengine-sandbox", "run_agent") + call = run_fn.spawn(request.question, api_base_url, call_id) + + _calls[call_id] = { + "call": call, + "modal_call_id": call.object_id, + "question": request.question, + "started_at": datetime.utcnow().isoformat(), + "status": "running", + "result": None, + } + logfire.info("agent_spawned", call_id=call_id, modal_call_id=call.object_id) + else: + # Local development: run in background thread + _calls[call_id] = { + "call": None, + "modal_call_id": None, + "question": request.question, + "started_at": datetime.utcnow().isoformat(), + "status": "running", + "result": None, + } + logfire.info("agent_spawned_local", call_id=call_id) + + # Run in background using asyncio + loop = asyncio.get_event_loop() + loop.run_in_executor( + None, _run_local_agent, call_id, request.question, api_base_url + ) return RunResponse(call_id=call_id, status="running") From d8d36a2703ff4451548c289daa9495ac71935478 Mon Sep 17 00:00:00 2001 From: Nikhil Woodruff Date: Mon, 29 Dec 2025 17:40:45 +0000 Subject: [PATCH 05/13] Filter out internal debug messages from chat UI --- docs/src/components/policy-chat.tsx | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/docs/src/components/policy-chat.tsx b/docs/src/components/policy-chat.tsx index 53f9890..8416bdb 100644 --- a/docs/src/components/policy-chat.tsx +++ b/docs/src/components/policy-chat.tsx @@ -29,9 +29,17 @@ interface ParsedStep { } function parseLogEntry(message: string): ParsedStep { - // [AGENT] messages + // [AGENT] messages - filter out internal debug info if (message.startsWith("[AGENT]")) { const content = message.replace("[AGENT] ", ""); + // Skip internal debug messages + if (content.startsWith("Stop reason:") || + content.startsWith("Turn ") || + content.startsWith("Loaded ") || + content.startsWith("Fetching ") || + content.startsWith("Completed")) { + return { type: "unknown", title: "", content: "" }; + } return { type: "agent", title: "Agent", From 2c1ce5abccc02d418de1e6b5a0d0f559431b06c1 Mon Sep 17 00:00:00 2001 From: Nikhil Woodruff Date: Mon, 29 Dec 2025 17:44:11 +0000 Subject: [PATCH 06/13] fix: standardise typography in chat UI to text-sm --- docs/src/components/policy-chat.tsx | 33 +++++++++++++++-------------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/docs/src/components/policy-chat.tsx b/docs/src/components/policy-chat.tsx index 8416bdb..e465947 100644 --- a/docs/src/components/policy-chat.tsx +++ b/docs/src/components/policy-chat.tsx @@ -150,16 +150,16 @@ function ToolCard({ step }: { step: ParsedStep }) { if (step.type === "tool_use") { return ( -
+
{isExpanded && step.params && Object.keys(step.params).length > 0 && ( -
+
{Object.entries(step.params).map(([key, value]) => (
- {key}: + {key}: {typeof value === "string" ? value : JSON.stringify(value)} @@ -202,8 +202,8 @@ function ToolCard({ step }: { step: ParsedStep }) { Result {isExpanded && ( -
-
{step.content.slice(0, 1500)}{step.content.length > 1500 ? "\n..." : ""}
+
+
{step.content}
)}
@@ -431,7 +431,7 @@ export function PolicyChat() { {messages.length === 0 ? (
-

+

What would you like to know?

@@ -563,9 +563,16 @@ export function PolicyChat() { from { opacity: 0; transform: translateY(4px); } to { opacity: 1; transform: translateY(0); } } + @keyframes slideDown { + from { opacity: 0; max-height: 0; } + to { opacity: 1; max-height: 500px; } + } .animate-fadeIn { animation: fadeIn 0.2s ease-out forwards; } + .animate-slideDown { + animation: slideDown 0.2s ease-out forwards; + } `}

); From f815e22f6b0c4e53af3eea49acc91008808ff00a Mon Sep 17 00:00:00 2001 From: Nikhil Woodruff Date: Mon, 29 Dec 2025 17:49:58 +0000 Subject: [PATCH 08/13] style: add monospace font throughout for code-like aesthetic --- docs/src/components/policy-chat.tsx | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/docs/src/components/policy-chat.tsx b/docs/src/components/policy-chat.tsx index 77b5e8e..5a5b101 100644 --- a/docs/src/components/policy-chat.tsx +++ b/docs/src/components/policy-chat.tsx @@ -153,7 +153,7 @@ function ToolCard({ step }: { step: ParsedStep }) {
@@ -457,7 +457,7 @@ export function PolicyChat() { {message.role === "user" ? (
-

{message.content}

+

{message.content}

) : ( @@ -470,7 +470,7 @@ export function PolicyChat() { {message.status === "pending" ? (
- Starting analysis... + Starting analysis...
) : (
@@ -488,7 +488,7 @@ export function PolicyChat() { {/* Collapsible steps summary */} {parsedSteps.length > 0 && (
- + @@ -534,7 +534,7 @@ export function PolicyChat() { onChange={(e) => setInput(e.target.value)} placeholder="Ask a policy question..." disabled={isLoading} - className="flex-1 px-4 py-3 text-sm border border-[var(--color-border)] rounded-xl bg-white focus:outline-none focus:ring-2 focus:ring-[var(--color-pe-green)] focus:border-transparent disabled:opacity-50 placeholder:text-[var(--color-text-muted)]" + className="flex-1 px-4 py-3 text-sm font-mono border border-[var(--color-border)] rounded-xl bg-white focus:outline-none focus:ring-2 focus:ring-[var(--color-pe-green)] focus:border-transparent disabled:opacity-50 placeholder:text-[var(--color-text-muted)]" />