diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 53db06d..42981b0 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -22,4 +22,4 @@ jobs:
         run: uv sync --extra dev
 
       - name: Run tests
-        run: uv run pytest tests/test_models.py -v
+        run: uv run pytest -v -m "not integration"
diff --git a/docker-compose.yml b/docker-compose.yml
index b05c701..60e8645 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -14,7 +14,7 @@ services:
       API_PORT: ${API_PORT:-8000}
       ANTHROPIC_API_KEY: ${ANTHROPIC_API_KEY}
       POLICYENGINE_API_URL: http://localhost:${API_PORT:-8000}
-      AGENT_USE_MODAL: "false"
+      AGENT_USE_MODAL: ${AGENT_USE_MODAL:-false}
     volumes:
       - ./src:/app/src
       - ./docs/out:/app/docs/out
diff --git a/docs/src/components/policy-chat.tsx b/docs/src/components/policy-chat.tsx
index 2a9865b..5a5b101 100644
--- a/docs/src/components/policy-chat.tsx
+++ b/docs/src/components/policy-chat.tsx
@@ -1,6 +1,6 @@
 "use client";
 
-import { useState, useRef, useEffect } from "react";
+import { useState, useRef, useEffect, useMemo } from "react";
 import ReactMarkdown from "react-markdown";
 import remarkBreaks from "remark-breaks";
 import { useApi } from "./api-context";
@@ -16,6 +16,239 @@ interface LogEntry {
   message: string;
 }
 
+interface ParsedStep {
+  type: "agent" | "tool_use" | "api_call" | "api_response" | "tool_result" | "assistant" | "unknown";
+  title: string;
+  content: string;
+  method?: string;
+  url?: string;
+  statusCode?: number;
+  toolName?: string;
+  params?: Record<string, unknown>;
+  isExpanded?: boolean;
+}
+
+function parseLogEntry(message: string): ParsedStep {
+  // [AGENT] messages - filter out internal debug info
+  if (message.startsWith("[AGENT]")) {
+    const content = message.replace("[AGENT] ", "");
+    // Skip internal debug messages
+    if (content.startsWith("Stop reason:") ||
+        content.startsWith("Turn ") ||
+        content.startsWith("Loaded ") ||
+        content.startsWith("Fetching ") ||
+        content.startsWith("Completed")) {
+      return { type: "unknown", title: "", content: "" };
+    }
+    return {
+      type: "agent",
+      title: "Agent",
+      content: content,
+    };
+  }
+
+  // [TOOL_USE] tool_name: {...}
+  if (message.startsWith("[TOOL_USE]")) {
+    const content = message.replace("[TOOL_USE] ", "");
+    const colonIndex = content.indexOf(":");
+    if (colonIndex > -1) {
+      const toolName = content.slice(0, colonIndex).trim();
+      const paramsStr = content.slice(colonIndex + 1).trim();
+      let params: Record<string, unknown> = {};
+      try {
+        params = JSON.parse(paramsStr);
+      } catch {
+        // Not valid JSON
+      }
+      // Clean up tool name for display
+      const displayName = toolName
+        .replace(/_/g, " ")
+        .replace(/parameters get$/, "")
+        .replace(/parameters post$/, "")
+        .replace(/household calculate post$/, "Calculate household")
+        .replace(/list /g, "Search ");
+      return {
+        type: "tool_use",
+        title: displayName,
+        content: paramsStr,
+        toolName,
+        params,
+      };
+    }
+  }
+
+  // [API] GET/POST url
+  if (message.startsWith("[API]")) {
+    const content = message.replace("[API] ", "");
+
+    // Check if it's a response
+    if (content.startsWith("Response:")) {
+      const statusCode = parseInt(content.replace("Response: ", ""), 10);
+      return {
+        type: "api_response",
+        title: "Response",
+        content: content,
+        statusCode,
+      };
+    }
+
+    // Check if it's a request with method
+    const methodMatch = content.match(/^(GET|POST|PUT|PATCH|DELETE)\s+(.+)$/);
+    if (methodMatch) {
+      return {
+        type: "api_call",
+        title: "API Request",
+        content: content,
+        method: methodMatch[1],
+        url: methodMatch[2],
+      };
+    }
+
+    // Query or Body
+    if (content.startsWith("Query:") || content.startsWith("Body:")) {
+      return {
+        type: "api_call",
+        title: content.startsWith("Query:") ? "Query params" : "Request body",
+        content: content.replace(/^(Query|Body):\s*/, ""),
+      };
+    }
+  }
+
+  // [TOOL_RESULT] ...
+  if (message.startsWith("[TOOL_RESULT]")) {
+    const content = message.replace("[TOOL_RESULT] ", "");
+    return {
+      type: "tool_result",
+      title: "Result",
+      content: content,
+    };
+  }
+
+  // [ASSISTANT] ...
+  if (message.startsWith("[ASSISTANT]")) {
+    const content = message.replace("[ASSISTANT] ", "");
+    return {
+      type: "assistant",
+      title: "Thinking",
+      content: content,
+    };
+  }
+
+  return {
+    type: "unknown",
+    title: "Log",
+    content: message,
+  };
+}
+
+function ToolCard({ step }: { step: ParsedStep }) {
+  const [isExpanded, setIsExpanded] = useState(false);
+
+  if (step.type === "agent") {
+    return null; // Hide agent messages, they're redundant with progress indicator
+  }
+
+  if (step.type === "tool_use") {
+    return (
+      <div className="py-1 animate-fadeIn">
+        <button
+          onClick={() => setIsExpanded(!isExpanded)}
+          className="flex items-center gap-2 hover:text-[var(--color-pe-green)] transition-colors font-mono"
+        >
+          <span className="w-1.5 h-1.5 rounded-full bg-[var(--color-pe-green)] shrink-0" />
+          <span className="text-sm text-[var(--color-text-secondary)]">{step.title}</span>
+          {step.params && Object.keys(step.params).length > 0 && (
+            <svg
+              className={`w-3.5 h-3.5 text-[var(--color-text-muted)] transition-transform shrink-0 ${isExpanded ? "rotate-90" : ""}`}
+              fill="none"
+              viewBox="0 0 24 24"
+              stroke="currentColor"
+            >
+              <path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M9 5l7 7-7 7" />
+            </svg>
+          )}
+        </button>
+        {isExpanded && step.params && Object.keys(step.params).length > 0 && (
+          <div className="ml-3.5 mt-1.5 font-mono text-xs text-[var(--color-text-muted)] bg-[var(--color-surface)] rounded-lg px-3 py-2 animate-slideDown">
+            {Object.entries(step.params).map(([key, value]) => (
+              <div key={key} className="flex gap-1">
+                <span className="text-[var(--color-pe-green)]">{key}:</span>
+                <span className="text-[var(--color-text-secondary)]">
+                  {typeof value === "string" ? value : JSON.stringify(value)}
+                </span>
+              </div>
+            ))}
+          </div>
+        )}
+      </div>
+    );
+  }
+
+  // Hide API details - too noisy
+  if (step.type === "api_call" || step.type === "api_response") {
+    return null;
+  }
+
+  if (step.type === "tool_result") {
+    return (
+      <div className="py-1 ml-3.5 animate-fadeIn">
+        <button
+          onClick={() => setIsExpanded(!isExpanded)}
+          className="flex items-center gap-1.5 text-sm text-[var(--color-text-muted)] hover:text-[var(--color-text-secondary)] font-mono"
+        >
+          <svg className={`w-3.5 h-3.5 transition-transform ${isExpanded ? "rotate-90" : ""}`} fill="none" viewBox="0 0 24 24" stroke="currentColor">
+            <path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M9 5l7 7-7 7" />
+          </svg>
+          <span>Result</span>
+        </button>
+        {isExpanded && (
+          <div className="mt-1.5 font-mono text-xs bg-[var(--color-code-bg)] text-[var(--color-code-text)] rounded p-2 overflow-x-auto max-h-64 overflow-y-auto animate-slideDown">
+            <pre className="whitespace-pre-wrap">{step.content}</pre>
+          </div>
+        )}
+      </div>
+    );
+  }
+
+  if (step.type === "assistant") {
+    return (
+      <div className="py-1.5 animate-fadeIn">
+        <p className="text-sm text-[var(--color-text-muted)] leading-relaxed">{step.content}</p>
+      </div>
+    );
+  }
+
+  return null;
+}
+
+function ProgressIndicator({ logs }: { logs: LogEntry[] }) {
+  const stage = useMemo(() => {
+    const hasSearch = logs.some(l => l.message.includes("parameters"));
+    const hasPolicy = logs.some(l => l.message.includes("policies"));
+    const hasAnalysis = logs.some(l => l.message.includes("analysis") || l.message.includes("economic"));
+    const hasHousehold = logs.some(l => l.message.includes("household"));
+    const isComplete = logs.some(l => l.message.includes("Completed"));
+
+    if (isComplete) return "Complete";
+    if (hasAnalysis) return "Running analysis...";
+    if (hasPolicy) return "Creating policy...";
+    if (hasHousehold) return "Calculating...";
+    if (hasSearch) return "Searching parameters...";
+    return "Starting...";
+  }, [logs]);
+
+  if (logs.length === 0) return null;
+
+  return (
+    <div className="flex items-center gap-2 mb-3 text-sm text-[var(--color-text-muted)] font-mono">
+      {stage !== "Complete" && (
+        <div className="w-3.5 h-3.5 border-2 border-[var(--color-pe-green)] border-t-transparent rounded-full animate-spin" />
+      )}
+      <span>{stage}</span>
+    </div>
+  );
+}
+
 export function PolicyChat() {
   const { baseUrl } = useApi();
   const [messages, setMessages] = useState<Message[]>([]);
@@ -34,7 +267,6 @@ export function PolicyChat() {
     scrollToBottom();
   }, [messages, logs]);
 
-  // Cleanup polling on unmount
   useEffect(() => {
     return () => {
       if (pollIntervalRef.current) {
@@ -43,20 +275,21 @@ export function PolicyChat() {
     };
   }, []);
 
+  const parsedSteps = useMemo(() => {
+    return logs
+      .map(log => parseLogEntry(log.message))
+      .filter(step => step.type !== "unknown");
+  }, [logs]);
+
   const pollLogs = async (id: string) => {
     try {
       const res = await fetch(`${baseUrl}/agent/logs/${id}`);
-      if (!res.ok) {
-        console.error("Failed to fetch logs:", res.status);
-        return;
-      }
+      if (!res.ok) return;
 
       const data = await res.json();
       setLogs(data.logs || []);
 
-      // Check if completed or failed
       if (data.status === "completed" || data.status === "failed") {
-        // Stop polling
         if (pollIntervalRef.current) {
           clearInterval(pollIntervalRef.current);
           pollIntervalRef.current = null;
@@ -65,39 +298,16 @@ export function PolicyChat() {
         setIsLoading(false);
         setCallId(null);
 
-        // Extract final result from logs or result field
         let finalContent = "";
         if (data.result?.result) {
           finalContent = data.result.result;
         } else {
-          // Try to extract from logs - look for [CLAUDE] lines with result
-          const claudeLogs = data.logs
-            .map((l: LogEntry) => l.message)
-            .filter((m: string) => m.startsWith("[CLAUDE]"))
-            .map((m: string) => m.replace("[CLAUDE] ", ""));
-
-          // Try to parse the last few lines for result
-          for (const log of claudeLogs.reverse()) {
-            try {
-              const event = JSON.parse(log);
-              if (event.type === "result" && event.result) {
-                finalContent = event.result;
-                break;
-              }
-            } catch {
-              // Not JSON, skip
-            }
-          }
-
-          if (!finalContent) {
-            finalContent =
-              data.status === "completed"
-                ? "Analysis completed. Check logs for details."
-                : "Analysis failed. Check logs for errors.";
-          }
+          finalContent =
+            data.status === "completed"
+              ? "Analysis completed. Check the steps above for details."
+              : "Analysis failed. Please try again.";
         }
 
-        // Update assistant message with final content
         setMessages((prev) => {
           const newMessages = [...prev];
           const lastIndex = newMessages.length - 1;
@@ -126,38 +336,30 @@ export function PolicyChat() {
     setLogs([]);
     setCallId(null);
 
-    // Stop any existing polling
     if (pollIntervalRef.current) {
       clearInterval(pollIntervalRef.current);
       pollIntervalRef.current = null;
     }
 
-    // Add user message
     setMessages((prev) => [...prev, { role: "user", content: userMessage }]);
-
-    // Add pending assistant message
     setMessages((prev) => [
       ...prev,
       { role: "assistant", content: "", status: "pending" },
     ]);
 
     try {
-      // Start the agent
       const res = await fetch(`${baseUrl}/agent/run`, {
         method: "POST",
         headers: { "Content-Type": "application/json" },
         body: JSON.stringify({ question: userMessage }),
       });
 
-      if (!res.ok) {
-        throw new Error(`HTTP ${res.status}`);
-      }
+      if (!res.ok) throw new Error(`HTTP ${res.status}`);
 
       const data = await res.json();
       const newCallId = data.call_id;
       setCallId(newCallId);
 
-      // Update to running status
       setMessages((prev) => {
         const newMessages = [...prev];
         const lastIndex = newMessages.length - 1;
@@ -170,12 +372,10 @@ export function PolicyChat() {
         return newMessages;
       });
 
-      // Start polling for logs
       pollIntervalRef.current = setInterval(() => {
         pollLogs(newCallId);
       }, 1000);
 
-      // Initial poll
       pollLogs(newCallId);
     } catch (err) {
       setMessages((prev) => {
@@ -194,195 +394,186 @@ export function PolicyChat() {
     }
   };
 
-  // Parse log message to extract useful info
-  const parseLogMessage = (message: string): { type: string; content: string } => {
-    if (message.startsWith("[AGENT]")) {
-      return { type: "agent", content: message.replace("[AGENT] ", "") };
-    }
-    if (message.startsWith("[CLAUDE]")) {
-      const claudeContent = message.replace("[CLAUDE] ", "");
-      // Try to parse as JSON
-      try {
-        const event = JSON.parse(claudeContent);
-        if (event.type === "assistant" && event.message?.content) {
-          const textParts = event.message.content
-            .filter((c: { type: string }) => c.type === "text")
-            .map((c: { text: string }) => c.text)
-            .join("");
-          if (textParts) {
-            return { type: "text", content: textParts };
-          }
-          const toolParts = event.message.content
-            .filter((c: { type: string }) => c.type === "tool_use")
-            .map((c: { name: string }) => c.name);
-          if (toolParts.length > 0) {
-            return { type: "tool", content: `Using: ${toolParts.join(", ")}` };
-          }
-        }
-        if (event.type === "system" && event.subtype === "init") {
-          const mcpStatus = event.mcp_servers?.find(
-            (s: { name: string }) => s.name === "policyengine"
-          );
-          return {
-            type: "system",
-            content: mcpStatus?.status === "connected" ? "MCP connected" : "Starting...",
-          };
-        }
-        if (event.type === "result") {
-          return { type: "result", content: "Analysis complete" };
-        }
-        return { type: "claude", content: `[${event.type || "event"}]` };
-      } catch {
-        return { type: "claude", content: claudeContent.slice(0, 100) };
-      }
-    }
-    return { type: "log", content: message.slice(0, 100) };
-  };
-
   const exampleQuestions = [
-    "How much would it cost to set the UK basic income tax rate to 19p?",
-    "What would happen if we doubled child benefit?",
-    "Calculate tax for a UK household earning 50,000",
-    "What is the budgetary impact of abolishing the higher rate of income tax?",
-    "What benefits would a single parent with two children receive in California?",
+    "What is the UK personal allowance for 2026?",
+    "Calculate tax for someone earning £50,000 in the UK",
+    "What would happen if we increased child benefit by 10%?",
+    "What benefits would a single parent with two children receive?",
   ];
 
   return (
-    <div className="border border-[var(--color-border)] rounded-xl overflow-hidden bg-white flex flex-col h-[600px]">
+    <div className="border border-[var(--color-border)] rounded-2xl overflow-hidden bg-white flex flex-col h-[700px] shadow-sm">
       {/* Header */}
-      <div className="p-4 border-b border-[var(--color-border)] bg-[var(--color-surface)]">
-        <div className="flex items-center gap-2">
-          <div
-            className={`w-2 h-2 rounded-full ${
-              isLoading ? "bg-amber-400 animate-pulse" : "bg-gray-300"
-            }`}
-          />
-          <span className="text-sm font-medium text-[var(--color-text-primary)] font-mono">
-            Policy analyst
-          </span>
-          <span className="text-xs text-[var(--color-text-muted)] ml-auto font-mono">
-            Powered by Claude Code + MCP
-          </span>
+      <div className="px-5 py-4 border-b border-[var(--color-border)] bg-gradient-to-r from-[var(--color-pe-green)] to-[var(--color-pe-green-dark)]">
+        <div className="flex items-center justify-between">
+          <div className="flex items-center gap-3">
+            <div className="w-8 h-8 rounded-lg bg-white/20 flex items-center justify-center">
+              <svg className="w-5 h-5 text-white" fill="none" viewBox="0 0 24 24" stroke="currentColor">
+                <path strokeLinecap="round" strokeLinejoin="round" strokeWidth={1.5} d="M9.75 17L9 20l-1 1h8l-1-1-.75-3M3 13h18M5 17h14a2 2 0 002-2V5a2 2 0 00-2-2H5a2 2 0 00-2 2v10a2 2 0 002 2z" />
+              </svg>
+            </div>
+            <div>
+              <h2 className="text-white font-semibold">Policy analyst</h2>
+              <p className="text-white/70 text-xs">Ask questions about UK and US tax-benefit policy</p>
+            </div>
+          </div>
+          <div className="flex items-center gap-2">
+            <div className={`w-2 h-2 rounded-full ${isLoading ? "bg-amber-300 animate-pulse" : "bg-green-300"}`} />
+            <span className="text-white/70 text-xs font-medium">
+              {isLoading ? "Working..." : "Ready"}
+            </span>
+          </div>
         </div>
-        <p className="text-xs text-[var(--color-text-muted)] mt-1 font-mono">
-          Ask natural language questions about UK or US tax and benefit policy
-        </p>
       </div>
 
       {/* Messages */}
-      <div className="flex-1 overflow-y-auto p-4 space-y-4">
-        {messages.length === 0 && (
-          <div className="text-center py-8">
-            <p className="text-sm text-[var(--color-text-muted)] mb-4 font-mono">
-              Try asking a question like:
-            </p>
-            <div className="space-y-2">
+      <div className="flex-1 overflow-y-auto p-5">
+        {messages.length === 0 ? (
+          <div className="h-full flex flex-col justify-center">
+            <div className="text-center mb-8">
+              <h3 className="text-xl font-medium text-[var(--color-text-primary)] mb-2">
+                What would you like to know?
+              </h3>
+              <p className="text-sm text-[var(--color-text-muted)]">
+                Ask about tax rates, benefits, or policy impacts
+              </p>
+            </div>
+            <div className="grid gap-2 max-w-lg mx-auto">
               {exampleQuestions.map((q, i) => (
                 <button
                   key={i}
                   onClick={() => setInput(q)}
-                  className="block w-full text-left p-3 rounded-lg bg-[var(--color-surface-sunken)] text-sm text-[var(--color-text-secondary)] hover:bg-[var(--color-surface)] transition-colors font-mono"
+                  className="text-left p-4 rounded-xl bg-[var(--color-surface-sunken)] hover:bg-[var(--color-surface)] border border-transparent hover:border-[var(--color-border)] text-sm text-[var(--color-text-secondary)] transition-all group font-mono"
                 >
-                  {q}
+                  <span className="group-hover:text-[var(--color-pe-green)] transition-colors">{q}</span>
                 </button>
               ))}
             </div>
           </div>
-        )}
-
-        {messages.map((message, i) => (
-          <div
-            key={i}
-            className={`flex ${message.role === "user" ? "justify-end" : "justify-start"}`}
-          >
-            <div
-              className={`max-w-[85%] rounded-xl px-4 py-3 ${
-                message.role === "user"
-                  ? "bg-[var(--color-pe-green)] text-white"
-                  : "bg-[var(--color-surface-sunken)] text-[var(--color-text-primary)]"
-              }`}
-            >
-              {message.role === "assistant" &&
-              (message.status === "pending" || message.status === "running") ? (
-                <div className="flex items-center gap-2 font-mono">
-                  <div className="w-3 h-3 border-2 border-[var(--color-pe-green)] border-t-transparent rounded-full animate-spin" />
-                  <span className="text-sm">
-                    {message.status === "pending" ? "Starting..." : "Analysing..."}
-                  </span>
-                </div>
-              ) : message.status === "completed" || message.status === "failed" ? (
-                <div className="font-mono prose prose-sm max-w-none text-sm [&>*]:text-[var(--color-text-primary)] [&_code]:bg-[var(--color-surface)] [&_code]:px-1 [&_code]:py-0.5 [&_code]:rounded [&_strong]:font-semibold">
-                  <ReactMarkdown remarkPlugins={[remarkBreaks]}>
-                    {message.content}
-                  </ReactMarkdown>
-                </div>
-              ) : (
-                <div className="text-sm whitespace-pre-wrap font-mono">{message.content}</div>
-              )}
-            </div>
-          </div>
-        ))}
-
-        {/* Live logs */}
-        {isLoading && logs.length > 0 && (
-          <div className="bg-[var(--color-surface-sunken)] rounded-xl p-3 space-y-1 font-mono text-xs max-h-64 overflow-y-auto">
-            <div className="text-xs font-medium text-[var(--color-text-muted)] mb-2 sticky top-0 bg-[var(--color-surface-sunken)]">
-              Live output ({logs.length} entries)
-            </div>
-            {logs.slice(-30).map((log, i) => {
-              const parsed = parseLogMessage(log.message);
-              return (
-                <div
-                  key={i}
-                  className={`flex items-start gap-2 ${
-                    parsed.type === "tool"
-                      ? "text-amber-600"
-                      : parsed.type === "text"
-                      ? "text-[var(--color-text-primary)]"
-                      : parsed.type === "agent"
-                      ? "text-blue-600"
-                      : parsed.type === "system"
-                      ? "text-green-600"
-                      : "text-[var(--color-text-muted)]"
-                  }`}
-                >
-                  <span className="text-[var(--color-text-muted)] select-none shrink-0">
-                    {">"}
-                  </span>
-                  <span className="whitespace-pre-wrap break-words">{parsed.content}</span>
-                </div>
-              );
-            })}
-            <div className="flex items-center gap-2 text-[var(--color-text-muted)]">
-              <span className="select-none">{">"}</span>
-              <span className="inline-block w-2 h-3 bg-[var(--color-pe-green)] animate-pulse" />
-            </div>
+        ) : (
+          <div className="space-y-6">
+            {messages.map((message, i) => (
+              <div key={i}>
+                {message.role === "user" ? (
+                  <div className="flex justify-end">
+                    <div className="max-w-[80%] bg-[var(--color-pe-green)] text-white rounded-2xl rounded-br-md px-4 py-3">
+                      <p className="text-sm font-mono">{message.content}</p>
+                    </div>
+                  </div>
+                ) : (
+                  <div className="space-y-3">
+                    {/* Running state with live steps */}
+                    {(message.status === "pending" || message.status === "running") && (
+                      <div className="bg-[var(--color-surface-sunken)] rounded-2xl p-4">
+                        <ProgressIndicator logs={logs} />
+
+                        {message.status === "pending" ? (
+                          <div className="flex items-center gap-3">
+                            <div className="w-5 h-5 border-2 border-[var(--color-pe-green)] border-t-transparent rounded-full animate-spin" />
+                            <span className="text-sm text-[var(--color-text-secondary)] font-mono">Starting analysis...</span>
+                          </div>
+                        ) : (
+                          <div className="space-y-0">
+                            {parsedSteps.slice(-10).map((step, j) => (
+                              <ToolCard key={j} step={step} />
+                            ))}
+                          </div>
+                        )}
+                      </div>
+                    )}
+
+                    {/* Completed/failed state */}
+                    {(message.status === "completed" || message.status === "failed") && (
+                      <div className="space-y-4">
+                        {/* Collapsible steps summary */}
+                        {parsedSteps.length > 0 && (
+                          <details className="group">
+                            <summary className="cursor-pointer list-none flex items-center gap-2 text-sm text-[var(--color-text-muted)] hover:text-[var(--color-text-secondary)] font-mono">
+                              <svg className="w-3.5 h-3.5 group-open:rotate-90 transition-transform" fill="none" viewBox="0 0 24 24" stroke="currentColor">
+                                <path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M9 5l7 7-7 7" />
+                              </svg>
+                              <span>{parsedSteps.filter(s => s.type === "tool_use").length} tool calls executed</span>
+                            </summary>
+                            <div className="mt-3 bg-[var(--color-surface-sunken)] rounded-xl p-4 space-y-0">
+                              {parsedSteps.map((step, j) => (
+                                <ToolCard key={j} step={step} />
+                              ))}
+                            </div>
+                          </details>
+                        )}
+
+                        {/* Final response */}
+                        <div className={`rounded-2xl rounded-bl-md px-5 py-4 ${
+                          message.status === "failed"
+                            ? "bg-red-50 border border-red-200"
+                            : "bg-white border border-[var(--color-border)]"
+                        }`}>
+                          <div className="prose prose-sm max-w-none text-[var(--color-text-primary)] [&_strong]:font-semibold [&_code]:bg-[var(--color-surface-sunken)] [&_code]:px-1.5 [&_code]:py-0.5 [&_code]:rounded [&_code]:text-sm [&_code]:font-mono [&_h1]:text-lg [&_h1]:mt-4 [&_h1]:mb-2 [&_h2]:text-base [&_h2]:mt-3 [&_h2]:mb-2 [&_h3]:text-sm [&_h3]:mt-2 [&_h3]:mb-1 [&_p]:my-3 [&_p]:leading-relaxed [&_ul]:my-3 [&_ul]:space-y-1 [&_ol]:my-3 [&_ol]:space-y-1 [&_li]:my-0 [&_li]:leading-relaxed [&_blockquote]:border-l-2 [&_blockquote]:border-[var(--color-pe-green)] [&_blockquote]:pl-4 [&_blockquote]:my-3 [&_blockquote]:text-[var(--color-text-secondary)]">
+                            <ReactMarkdown remarkPlugins={[remarkBreaks]}>
+                              {message.content}
+                            </ReactMarkdown>
+                          </div>
+                        </div>
+                      </div>
+                    )}
+                  </div>
+                )}
+              </div>
+            ))}
+            <div ref={messagesEndRef} />
           </div>
         )}
-
-        <div ref={messagesEndRef} />
       </div>
 
       {/* Input */}
-      <form onSubmit={handleSubmit} className="p-4 border-t border-[var(--color-border)]">
-        <div className="flex gap-2">
+      <form onSubmit={handleSubmit} className="p-4 border-t border-[var(--color-border)] bg-[var(--color-surface)]">
+        <div className="flex gap-3">
           <input
             type="text"
             value={input}
             onChange={(e) => setInput(e.target.value)}
             placeholder="Ask a policy question..."
             disabled={isLoading}
-            className="flex-1 px-4 py-2 text-sm border border-[var(--color-border)] rounded-lg focus:outline-none focus:ring-2 focus:ring-[var(--color-pe-green)] disabled:opacity-50 font-mono"
+            className="flex-1 px-4 py-3 text-sm font-mono border border-[var(--color-border)] rounded-xl bg-white focus:outline-none focus:ring-2 focus:ring-[var(--color-pe-green)] focus:border-transparent disabled:opacity-50 placeholder:text-[var(--color-text-muted)]"
           />
           <button
             type="submit"
             disabled={isLoading || !input.trim()}
-            className="px-4 py-2 bg-[var(--color-pe-green)] text-white rounded-lg text-sm font-medium hover:bg-[var(--color-pe-green-dark)] disabled:opacity-50 disabled:cursor-not-allowed transition-colors font-mono"
+            className="px-6 py-3 bg-[var(--color-pe-green)] hover:bg-[var(--color-pe-green-dark)] text-white rounded-xl text-sm font-medium disabled:opacity-50 disabled:cursor-not-allowed transition-colors flex items-center gap-2"
           >
-            {isLoading ? "..." : "Ask"}
+            {isLoading ? (
+              <>
+                <div className="w-4 h-4 border-2 border-white/30 border-t-white rounded-full animate-spin" />
+                <span>Working</span>
+              </>
+            ) : (
+              <>
+                <span>Ask</span>
+                <svg className="w-4 h-4" fill="none" viewBox="0 0 24 24" stroke="currentColor">
+                  <path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M14 5l7 7m0 0l-7 7m7-7H3" />
+                </svg>
+              </>
+            )}
           </button>
         </div>
       </form>
+
+      <style jsx>{`
+        @keyframes fadeIn {
+          from { opacity: 0; transform: translateY(4px); }
+          to { opacity: 1; transform: translateY(0); }
+        }
+        @keyframes slideDown {
+          from { opacity: 0; max-height: 0; }
+          to { opacity: 1; max-height: 500px; }
+        }
+        .animate-fadeIn {
+          animation: fadeIn 0.2s ease-out forwards;
+        }
+        .animate-slideDown {
+          animation: slideDown 0.2s ease-out forwards;
+        }
+      `}</style>
     </div>
   );
 }
diff --git a/pyproject.toml b/pyproject.toml
index 175a3e5..27eb310 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -53,6 +53,10 @@ ignore = ["E501"]  # Tool descriptions need to be long
 testpaths = ["tests"]
 pythonpath = ["."]
 asyncio_mode = "auto"
+markers = [
+    "integration: tests that require external services (API keys, databases)",
+    "slow: tests that take a long time to run",
+]
 
 [dependency-groups]
 dev = [
diff --git a/scripts/init.py b/scripts/init.py
index 587755e..cf7a04a 100644
--- a/scripts/init.py
+++ b/scripts/init.py
@@ -157,6 +157,7 @@ def apply_rls_policies(engine):
         "parameters",
         "parameter_values",
         "users",
+        "household_jobs",
     ]
 
     # Read-only tables (public can read, only service role can write)
@@ -176,6 +177,7 @@ def apply_rls_policies(engine):
         "policies",
         "dynamics",
         "reports",
+        "household_jobs",
     ]
 
     # Read-only results tables
diff --git a/src/policyengine_api/agent_sandbox.py b/src/policyengine_api/agent_sandbox.py
index 01ac0fe..6408093 100644
--- a/src/policyengine_api/agent_sandbox.py
+++ b/src/policyengine_api/agent_sandbox.py
@@ -2,6 +2,7 @@
 
 import json
 import re
+import time
 from typing import Any, Callable
 
 import anthropic
@@ -34,8 +35,27 @@
 3. Be concise but thorough
 4. For UK, amounts are in GBP. For US, amounts are in USD.
 5. Poll async endpoints until status is "completed"
+
+IMPORTANT: When polling async endpoints, ALWAYS use the sleep tool to wait 5-10 seconds between requests.
+Do not poll in a tight loop - this wastes resources and may hit rate limits.
 """
 
+# Sleep tool for polling delays
+SLEEP_TOOL = {
+    "name": "sleep",
+    "description": "Wait for a specified number of seconds. Use this between polling requests to avoid hammering the API.",
+    "input_schema": {
+        "type": "object",
+        "properties": {
+            "seconds": {
+                "type": "number",
+                "description": "Number of seconds to sleep (1-60)",
+            }
+        },
+        "required": ["seconds"],
+    },
+}
+
 
 def fetch_openapi_spec(api_base_url: str) -> dict:
     """Fetch and cache OpenAPI spec."""
@@ -285,7 +305,7 @@ def _run_agent_impl(
     question: str,
     api_base_url: str = "https://v2.api.policyengine.org",
     call_id: str = "",
-    max_turns: int = 15,
+    max_turns: int = 30,
 ) -> dict:
     """Core agent implementation."""
 
@@ -316,6 +336,8 @@ def log(msg: str) -> None:
     claude_tools = [
         {k: v for k, v in t.items() if k != "_meta"} for t in tools
     ]
+    # Add the sleep tool
+    claude_tools.append(SLEEP_TOOL)
 
     client = anthropic.Anthropic()
     messages = [{"role": "user", "content": question}]
@@ -350,11 +372,18 @@ def log(msg: str) -> None:
                 assistant_content.append(block)
 
                 # Execute tool
-                tool = tool_lookup.get(block.name)
-                if tool:
-                    result = execute_api_tool(tool, block.input, api_base_url, log)
+                if block.name == "sleep":
+                    # Handle sleep tool specially
+                    seconds = min(max(block.input.get("seconds", 5), 1), 60)
+                    log(f"[SLEEP] Waiting {seconds} seconds...")
+                    time.sleep(seconds)
+                    result = f"Slept for {seconds} seconds"
                 else:
-                    result = f"Unknown tool: {block.name}"
+                    tool = tool_lookup.get(block.name)
+                    if tool:
+                        result = execute_api_tool(tool, block.input, api_base_url, log)
+                    else:
+                        result = f"Unknown tool: {block.name}"
 
                 log(f"[TOOL_RESULT] {result[:300]}")
 
@@ -392,12 +421,12 @@ def log(msg: str) -> None:
     return result
 
 
-@app.function(image=image, secrets=[anthropic_secret], timeout=300)
+@app.function(image=image, secrets=[anthropic_secret], timeout=600)
 def run_agent(
     question: str,
     api_base_url: str = "https://v2.api.policyengine.org",
     call_id: str = "",
-    max_turns: int = 15,
+    max_turns: int = 30,
 ) -> dict:
     """Run agentic loop to answer a policy question (Modal wrapper)."""
     return _run_agent_impl(question, api_base_url, call_id, max_turns)
diff --git a/src/policyengine_api/api/agent.py b/src/policyengine_api/api/agent.py
index 33a4f21..7389211 100644
--- a/src/policyengine_api/api/agent.py
+++ b/src/policyengine_api/api/agent.py
@@ -3,14 +3,14 @@
 This endpoint lets users ask natural language questions about tax/benefit policy
 and get AI-generated reports using Claude Code connected to the PolicyEngine MCP server.
 
-The agent runs in a Modal sandbox and logs are fetched via Modal SDK.
+The agent runs in a Modal sandbox (production) or locally (development).
 """
 
+import asyncio
 import uuid
 from datetime import datetime
 
 import logfire
-import modal
 from fastapi import APIRouter, HTTPException
 from pydantic import BaseModel
 
@@ -67,6 +67,19 @@ class StatusResponse(BaseModel):
 _logs: dict[str, list[LogEntry]] = {}
 
 
+def _run_local_agent(call_id: str, question: str, api_base_url: str) -> None:
+    """Run agent locally in a background thread."""
+    from policyengine_api.agent_sandbox import _run_agent_impl
+
+    try:
+        result = _run_agent_impl(question, api_base_url, call_id)
+        _calls[call_id]["status"] = result.get("status", "completed")
+        _calls[call_id]["result"] = result
+    except Exception as e:
+        _calls[call_id]["status"] = "failed"
+        _calls[call_id]["result"] = {"status": "failed", "error": str(e)}
+
+
 @router.post("/run", response_model=RunResponse)
 async def run_agent(request: RunRequest) -> RunResponse:
     """Start the agent to answer a policy question.
@@ -90,30 +103,44 @@ async def run_agent(request: RunRequest) -> RunResponse:
     logfire.info("agent_run", question=request.question[:100])
 
     api_base_url = settings.policyengine_api_url
-
-    # Look up the deployed function
-    run_fn = modal.Function.from_name("policyengine-sandbox", "run_agent")
-
-    # Generate a call_id before spawning so we can pass it to the function
     call_id = f"fc-{uuid.uuid4().hex[:24]}"
 
     # Initialize logs storage
     _logs[call_id] = []
 
-    # Spawn the function (non-blocking) - pass call_id so it can POST logs back
-    call = run_fn.spawn(request.question, api_base_url, call_id)
-
-    # Store call info
-    _calls[call_id] = {
-        "call": call,
-        "modal_call_id": call.object_id,
-        "question": request.question,
-        "started_at": datetime.utcnow().isoformat(),
-        "status": "running",
-        "result": None,
-    }
-
-    logfire.info("agent_spawned", call_id=call_id, modal_call_id=call.object_id)
+    if settings.agent_use_modal:
+        # Production: use Modal
+        import modal
+
+        run_fn = modal.Function.from_name("policyengine-sandbox", "run_agent")
+        call = run_fn.spawn(request.question, api_base_url, call_id)
+
+        _calls[call_id] = {
+            "call": call,
+            "modal_call_id": call.object_id,
+            "question": request.question,
+            "started_at": datetime.utcnow().isoformat(),
+            "status": "running",
+            "result": None,
+        }
+        logfire.info("agent_spawned", call_id=call_id, modal_call_id=call.object_id)
+    else:
+        # Local development: run in background thread
+        _calls[call_id] = {
+            "call": None,
+            "modal_call_id": None,
+            "question": request.question,
+            "started_at": datetime.utcnow().isoformat(),
+            "status": "running",
+            "result": None,
+        }
+        logfire.info("agent_spawned_local", call_id=call_id)
+
+        # Run in background using asyncio
+        loop = asyncio.get_event_loop()
+        loop.run_in_executor(
+            None, _run_local_agent, call_id, request.question, api_base_url
+        )
 
     return RunResponse(call_id=call_id, status="running")
 
diff --git a/supabase/migrations/20241121000001_rls_policies.sql b/supabase/migrations/20241121000001_rls_policies.sql
deleted file mode 100644
index 7022091..0000000
--- a/supabase/migrations/20241121000001_rls_policies.sql
+++ /dev/null
@@ -1,157 +0,0 @@
--- Enable RLS on all application tables
-ALTER TABLE datasets ENABLE ROW LEVEL SECURITY;
-ALTER TABLE dataset_versions ENABLE ROW LEVEL SECURITY;
-ALTER TABLE simulations ENABLE ROW LEVEL SECURITY;
-ALTER TABLE policies ENABLE ROW LEVEL SECURITY;
-ALTER TABLE dynamics ENABLE ROW LEVEL SECURITY;
-ALTER TABLE aggregates ENABLE ROW LEVEL SECURITY;
-ALTER TABLE change_aggregates ENABLE ROW LEVEL SECURITY;
-ALTER TABLE tax_benefit_models ENABLE ROW LEVEL SECURITY;
-ALTER TABLE tax_benefit_model_versions ENABLE ROW LEVEL SECURITY;
-ALTER TABLE variables ENABLE ROW LEVEL SECURITY;
-ALTER TABLE parameters ENABLE ROW LEVEL SECURITY;
-ALTER TABLE parameter_values ENABLE ROW LEVEL SECURITY;
-
--- Service role policies (full access to everything)
-DO $$
-BEGIN
-    -- Datasets
-    IF NOT EXISTS (SELECT 1 FROM pg_policies WHERE schemaname = 'public' AND tablename = 'datasets' AND policyname = 'Service role full access') THEN
-        CREATE POLICY "Service role full access" ON datasets FOR ALL TO service_role USING (true) WITH CHECK (true);
-    END IF;
-
-    -- Dataset versions
-    IF NOT EXISTS (SELECT 1 FROM pg_policies WHERE schemaname = 'public' AND tablename = 'dataset_versions' AND policyname = 'Service role full access') THEN
-        CREATE POLICY "Service role full access" ON dataset_versions FOR ALL TO service_role USING (true) WITH CHECK (true);
-    END IF;
-
-    -- Simulations
-    IF NOT EXISTS (SELECT 1 FROM pg_policies WHERE schemaname = 'public' AND tablename = 'simulations' AND policyname = 'Service role full access') THEN
-        CREATE POLICY "Service role full access" ON simulations FOR ALL TO service_role USING (true) WITH CHECK (true);
-    END IF;
-
-    -- Policies
-    IF NOT EXISTS (SELECT 1 FROM pg_policies WHERE schemaname = 'public' AND tablename = 'policies' AND policyname = 'Service role full access') THEN
-        CREATE POLICY "Service role full access" ON policies FOR ALL TO service_role USING (true) WITH CHECK (true);
-    END IF;
-
-    -- Dynamics
-    IF NOT EXISTS (SELECT 1 FROM pg_policies WHERE schemaname = 'public' AND tablename = 'dynamics' AND policyname = 'Service role full access') THEN
-        CREATE POLICY "Service role full access" ON dynamics FOR ALL TO service_role USING (true) WITH CHECK (true);
-    END IF;
-
-    -- Aggregates
-    IF NOT EXISTS (SELECT 1 FROM pg_policies WHERE schemaname = 'public' AND tablename = 'aggregates' AND policyname = 'Service role full access') THEN
-        CREATE POLICY "Service role full access" ON aggregates FOR ALL TO service_role USING (true) WITH CHECK (true);
-    END IF;
-
-    -- Change aggregates
-    IF NOT EXISTS (SELECT 1 FROM pg_policies WHERE schemaname = 'public' AND tablename = 'change_aggregates' AND policyname = 'Service role full access') THEN
-        CREATE POLICY "Service role full access" ON change_aggregates FOR ALL TO service_role USING (true) WITH CHECK (true);
-    END IF;
-
-    -- Tax benefit models
-    IF NOT EXISTS (SELECT 1 FROM pg_policies WHERE schemaname = 'public' AND tablename = 'tax_benefit_models' AND policyname = 'Service role full access') THEN
-        CREATE POLICY "Service role full access" ON tax_benefit_models FOR ALL TO service_role USING (true) WITH CHECK (true);
-    END IF;
-
-    -- Tax benefit model versions
-    IF NOT EXISTS (SELECT 1 FROM pg_policies WHERE schemaname = 'public' AND tablename = 'tax_benefit_model_versions' AND policyname = 'Service role full access') THEN
-        CREATE POLICY "Service role full access" ON tax_benefit_model_versions FOR ALL TO service_role USING (true) WITH CHECK (true);
-    END IF;
-
-    -- Variables
-    IF NOT EXISTS (SELECT 1 FROM pg_policies WHERE schemaname = 'public' AND tablename = 'variables' AND policyname = 'Service role full access') THEN
-        CREATE POLICY "Service role full access" ON variables FOR ALL TO service_role USING (true) WITH CHECK (true);
-    END IF;
-
-    -- Parameters
-    IF NOT EXISTS (SELECT 1 FROM pg_policies WHERE schemaname = 'public' AND tablename = 'parameters' AND policyname = 'Service role full access') THEN
-        CREATE POLICY "Service role full access" ON parameters FOR ALL TO service_role USING (true) WITH CHECK (true);
-    END IF;
-
-    -- Parameter values
-    IF NOT EXISTS (SELECT 1 FROM pg_policies WHERE schemaname = 'public' AND tablename = 'parameter_values' AND policyname = 'Service role full access') THEN
-        CREATE POLICY "Service role full access" ON parameter_values FOR ALL TO service_role USING (true) WITH CHECK (true);
-    END IF;
-END $$;
-
--- Public read access for read-only tables
-DO $$
-BEGIN
-    -- Tax benefit models (read-only for public)
-    IF NOT EXISTS (SELECT 1 FROM pg_policies WHERE schemaname = 'public' AND tablename = 'tax_benefit_models' AND policyname = 'Public read access') THEN
-        CREATE POLICY "Public read access" ON tax_benefit_models FOR SELECT TO anon, authenticated USING (true);
-    END IF;
-
-    -- Tax benefit model versions (read-only for public)
-    IF NOT EXISTS (SELECT 1 FROM pg_policies WHERE schemaname = 'public' AND tablename = 'tax_benefit_model_versions' AND policyname = 'Public read access') THEN
-        CREATE POLICY "Public read access" ON tax_benefit_model_versions FOR SELECT TO anon, authenticated USING (true);
-    END IF;
-
-    -- Variables (read-only for public)
-    IF NOT EXISTS (SELECT 1 FROM pg_policies WHERE schemaname = 'public' AND tablename = 'variables' AND policyname = 'Public read access') THEN
-        CREATE POLICY "Public read access" ON variables FOR SELECT TO anon, authenticated USING (true);
-    END IF;
-
-    -- Parameters (read-only for public)
-    IF NOT EXISTS (SELECT 1 FROM pg_policies WHERE schemaname = 'public' AND tablename = 'parameters' AND policyname = 'Public read access') THEN
-        CREATE POLICY "Public read access" ON parameters FOR SELECT TO anon, authenticated USING (true);
-    END IF;
-
-    -- Parameter values (read-only for public)
-    IF NOT EXISTS (SELECT 1 FROM pg_policies WHERE schemaname = 'public' AND tablename = 'parameter_values' AND policyname = 'Public read access') THEN
-        CREATE POLICY "Public read access" ON parameter_values FOR SELECT TO anon, authenticated USING (true);
-    END IF;
-
-    -- Datasets (read-only for public)
-    IF NOT EXISTS (SELECT 1 FROM pg_policies WHERE schemaname = 'public' AND tablename = 'datasets' AND policyname = 'Public read access') THEN
-        CREATE POLICY "Public read access" ON datasets FOR SELECT TO anon, authenticated USING (true);
-    END IF;
-
-    -- Dataset versions (read-only for public)
-    IF NOT EXISTS (SELECT 1 FROM pg_policies WHERE schemaname = 'public' AND tablename = 'dataset_versions' AND policyname = 'Public read access') THEN
-        CREATE POLICY "Public read access" ON dataset_versions FOR SELECT TO anon, authenticated USING (true);
-    END IF;
-END $$;
-
--- User-created content policies
-DO $$
-BEGIN
-    -- Simulations (users can create and read their own)
-    IF NOT EXISTS (SELECT 1 FROM pg_policies WHERE schemaname = 'public' AND tablename = 'simulations' AND policyname = 'Users can create simulations') THEN
-        CREATE POLICY "Users can create simulations" ON simulations FOR INSERT TO anon, authenticated WITH CHECK (true);
-    END IF;
-
-    IF NOT EXISTS (SELECT 1 FROM pg_policies WHERE schemaname = 'public' AND tablename = 'simulations' AND policyname = 'Users can read simulations') THEN
-        CREATE POLICY "Users can read simulations" ON simulations FOR SELECT TO anon, authenticated USING (true);
-    END IF;
-
-    -- Policies (users can create and read their own)
-    IF NOT EXISTS (SELECT 1 FROM pg_policies WHERE schemaname = 'public' AND tablename = 'policies' AND policyname = 'Users can create policies') THEN
-        CREATE POLICY "Users can create policies" ON policies FOR INSERT TO anon, authenticated WITH CHECK (true);
-    END IF;
-
-    IF NOT EXISTS (SELECT 1 FROM pg_policies WHERE schemaname = 'public' AND tablename = 'policies' AND policyname = 'Users can read policies') THEN
-        CREATE POLICY "Users can read policies" ON policies FOR SELECT TO anon, authenticated USING (true);
-    END IF;
-
-    -- Dynamics (users can create and read their own)
-    IF NOT EXISTS (SELECT 1 FROM pg_policies WHERE schemaname = 'public' AND tablename = 'dynamics' AND policyname = 'Users can create dynamics') THEN
-        CREATE POLICY "Users can create dynamics" ON dynamics FOR INSERT TO anon, authenticated WITH CHECK (true);
-    END IF;
-
-    IF NOT EXISTS (SELECT 1 FROM pg_policies WHERE schemaname = 'public' AND tablename = 'dynamics' AND policyname = 'Users can read dynamics') THEN
-        CREATE POLICY "Users can read dynamics" ON dynamics FOR SELECT TO anon, authenticated USING (true);
-    END IF;
-
-    -- Aggregates (read access for all)
-    IF NOT EXISTS (SELECT 1 FROM pg_policies WHERE schemaname = 'public' AND tablename = 'aggregates' AND policyname = 'Users can read aggregates') THEN
-        CREATE POLICY "Users can read aggregates" ON aggregates FOR SELECT TO anon, authenticated USING (true);
-    END IF;
-
-    -- Change aggregates (read access for all)
-    IF NOT EXISTS (SELECT 1 FROM pg_policies WHERE schemaname = 'public' AND tablename = 'change_aggregates' AND policyname = 'Users can read change aggregates') THEN
-        CREATE POLICY "Users can read change aggregates" ON change_aggregates FOR SELECT TO anon, authenticated USING (true);
-    END IF;
-END $$;
diff --git a/supabase/migrations/20241228000000_household_jobs.sql b/supabase/migrations/20241228000000_household_jobs.sql
deleted file mode 100644
index 758ba62..0000000
--- a/supabase/migrations/20241228000000_household_jobs.sql
+++ /dev/null
@@ -1,42 +0,0 @@
--- Create household_jobs table for async household calculations
-
-CREATE TABLE IF NOT EXISTS household_jobs (
-    id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
-    tax_benefit_model_name TEXT NOT NULL,
-    request_data JSONB NOT NULL,
-    policy_id UUID REFERENCES policies(id),
-    dynamic_id UUID REFERENCES dynamics(id),
-    status TEXT NOT NULL DEFAULT 'pending',
-    error_message TEXT,
-    result JSONB,
-    created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
-    started_at TIMESTAMPTZ,
-    completed_at TIMESTAMPTZ
-);
-
--- Index for polling by status
-CREATE INDEX IF NOT EXISTS idx_household_jobs_status ON household_jobs(status);
-
--- Index for looking up by id
-CREATE INDEX IF NOT EXISTS idx_household_jobs_id ON household_jobs(id);
-
--- Enable RLS
-ALTER TABLE household_jobs ENABLE ROW LEVEL SECURITY;
-
--- Allow public read access (jobs are not sensitive)
-CREATE POLICY "Allow public read access to household_jobs"
-    ON household_jobs
-    FOR SELECT
-    USING (true);
-
--- Allow public insert (anyone can create a job)
-CREATE POLICY "Allow public insert to household_jobs"
-    ON household_jobs
-    FOR INSERT
-    WITH CHECK (true);
-
--- Allow service role to update (for Modal functions)
-CREATE POLICY "Allow service role to update household_jobs"
-    ON household_jobs
-    FOR UPDATE
-    USING (true);
diff --git a/supabase/migrations/20241229000000_allow_null_user_id.sql b/supabase/migrations/20241229000000_allow_null_user_id.sql
deleted file mode 100644
index dba0825..0000000
--- a/supabase/migrations/20241229000000_allow_null_user_id.sql
+++ /dev/null
@@ -1,2 +0,0 @@
--- Allow null user_id in reports table for anonymous API-triggered reports
-ALTER TABLE reports ALTER COLUMN user_id DROP NOT NULL;
diff --git a/tests/test_agent.py b/tests/test_agent.py
index c917159..2c591f5 100644
--- a/tests/test_agent.py
+++ b/tests/test_agent.py
@@ -3,10 +3,12 @@
 Tests verify that Claude Code is invoked correctly with proper MCP configuration.
 """
 
+import pytest
+
+pytestmark = pytest.mark.integration
+
 import json
 from unittest.mock import AsyncMock, MagicMock, patch
-
-import pytest
 from fastapi.testclient import TestClient
 
 from policyengine_api.main import app
diff --git a/tests/test_analysis.py b/tests/test_analysis.py
index b093303..90dbe7c 100644
--- a/tests/test_analysis.py
+++ b/tests/test_analysis.py
@@ -5,6 +5,8 @@
 """
 
 import pytest
+
+pytestmark = pytest.mark.integration
 from fastapi.testclient import TestClient
 from sqlmodel import Session, select
 
diff --git a/tests/test_household.py b/tests/test_household.py
index f8e2629..8f17176 100644
--- a/tests/test_household.py
+++ b/tests/test_household.py
@@ -1,6 +1,9 @@
 """Tests for household calculation endpoint."""
 
 import pytest
+
+pytestmark = pytest.mark.integration
+
 from fastapi.testclient import TestClient
 
 from policyengine_api.main import app
diff --git a/tests/test_household_impact.py b/tests/test_household_impact.py
index 1cfd469..2ed1224 100644
--- a/tests/test_household_impact.py
+++ b/tests/test_household_impact.py
@@ -1,6 +1,9 @@
 """Tests for household impact comparison endpoint."""
 
 import pytest
+
+pytestmark = pytest.mark.integration
+
 from fastapi.testclient import TestClient
 
 from policyengine_api.main import app
diff --git a/tests/test_integration.py b/tests/test_integration.py
index d3f8dc1..e044cab 100644
--- a/tests/test_integration.py
+++ b/tests/test_integration.py
@@ -4,9 +4,11 @@
 Run with: make integration-test
 """
 
-from datetime import datetime, timezone
-
 import pytest
+
+pytestmark = pytest.mark.integration
+
+from datetime import datetime, timezone
 from rich.console import Console
 from sqlmodel import Session, create_engine, select