From a176b6f34970e43272ad0a341fa8f39e9cb9e74c Mon Sep 17 00:00:00 2001
From: Nikhil Woodruff <nikhil@policyengine.org>
Date: Tue, 30 Dec 2025 14:08:18 +0000
Subject: [PATCH 1/4] fix: remove non-existent label field from variables
 search
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Variables don't have a label field, only name and description.
The search filter was referencing Variable.label which caused
500 errors when searching with tax_benefit_model_name filter.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
---
 src/policyengine_api/api/variables.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/src/policyengine_api/api/variables.py b/src/policyengine_api/api/variables.py
index a24df44..d660b1b 100644
--- a/src/policyengine_api/api/variables.py
+++ b/src/policyengine_api/api/variables.py
@@ -54,11 +54,10 @@ def list_variables(
 
     if search:
         # Case-insensitive search using ILIKE
+        # Note: Variables don't have a label field, only name and description
         search_pattern = f"%{search}%"
-        search_filter = (
-            Variable.name.ilike(search_pattern)
-            | Variable.label.ilike(search_pattern)
-            | Variable.description.ilike(search_pattern)
+        search_filter = Variable.name.ilike(search_pattern) | Variable.description.ilike(
+            search_pattern
         )
         query = query.where(search_filter)
 

From a91521f6b8a61c83555c4343d02fd739304d27c3 Mon Sep 17 00:00:00 2001
From: Nikhil Woodruff <nikhil@policyengine.org>
Date: Tue, 30 Dec 2025 14:08:27 +0000
Subject: [PATCH 2/4] feat: include datasets in lite mode, filter to 2026 only
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Lite mode now seeds FRS 2026 and CPS 2026 datasets instead of skipping all
- Added hugging_face_token setting for dataset downloads

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
---
 scripts/seed.py                         | 30 ++++++++++++++++++-------
 src/policyengine_api/config/settings.py |  1 +
 2 files changed, 23 insertions(+), 8 deletions(-)

diff --git a/scripts/seed.py b/scripts/seed.py
index 2069108..5ef2ed2 100644
--- a/scripts/seed.py
+++ b/scripts/seed.py
@@ -363,10 +363,11 @@ def seed_model(model_version, session, lite: bool = False) -> TaxBenefitModelVer
         return db_version
 
 
-def seed_datasets(session):
+def seed_datasets(session, lite: bool = False):
     """Seed datasets and upload to S3."""
     with logfire.span("seed_datasets"):
-        console.print("[bold blue]Seeding datasets...")
+        mode_str = " (lite mode - 2026 only)" if lite else ""
+        console.print(f"[bold blue]Seeding datasets{mode_str}...")
 
         # Get UK and US models
         uk_model = session.exec(
@@ -385,6 +386,14 @@ def seed_datasets(session):
         # UK datasets
         console.print("  Creating UK datasets...")
         uk_datasets = ensure_uk_datasets()
+
+        # In lite mode, only upload FRS 2026
+        if lite:
+            uk_datasets = {
+                k: v for k, v in uk_datasets.items() if v.year == 2026 and "frs" in k
+            }
+            console.print(f"    Lite mode: filtered to {len(uk_datasets)} dataset(s)")
+
         uk_created = 0
         uk_skipped = 0
 
@@ -431,6 +440,14 @@ def seed_datasets(session):
         # US datasets
         console.print("  Creating US datasets...")
         us_datasets = ensure_us_datasets()
+
+        # In lite mode, only upload CPS 2026
+        if lite:
+            us_datasets = {
+                k: v for k, v in us_datasets.items() if v.year == 2026 and "cps" in k
+            }
+            console.print(f"    Lite mode: filtered to {len(us_datasets)} dataset(s)")
+
         us_created = 0
         us_skipped = 0
 
@@ -602,7 +619,7 @@ def main():
     parser.add_argument(
         "--lite",
         action="store_true",
-        help="Lite mode: skip US state parameters for faster local seeding",
+        help="Lite mode: skip US state parameters, only seed FRS 2026 and CPS 2026 datasets",
     )
     args = parser.parse_args()
 
@@ -619,11 +636,8 @@ def main():
             us_version = seed_model(us_latest, session, lite=args.lite)
             console.print(f"[green]✓[/green] US model seeded: {us_version.id}\n")
 
-            # Seed datasets (skip in lite mode - requires cached data)
-            if not args.lite:
-                seed_datasets(session)
-            else:
-                console.print("[yellow]Skipping datasets (lite mode)[/yellow]\n")
+            # Seed datasets
+            seed_datasets(session, lite=args.lite)
 
             # Seed example policies
             seed_example_policies(session)
diff --git a/src/policyengine_api/config/settings.py b/src/policyengine_api/config/settings.py
index eb93967..76a1ab1 100644
--- a/src/policyengine_api/config/settings.py
+++ b/src/policyengine_api/config/settings.py
@@ -31,6 +31,7 @@ class Settings(BaseSettings):
 
     # Seeding
     limit_seed_parameters: bool = False
+    hugging_face_token: str = ""
 
     # Agent
     anthropic_api_key: str = ""

From 50e6c93ac1f9f181f53a577a7794be20922182a6 Mon Sep 17 00:00:00 2001
From: Nikhil Woodruff <nikhil@policyengine.org>
Date: Tue, 30 Dec 2025 14:39:42 +0000
Subject: [PATCH 3/4] Update agent ui

---
 docs/bun.lock                         |  33 ++++
 docs/package.json                     |   3 +-
 docs/src/components/policy-chat.tsx   | 259 ++++++++++++++++++++------
 scripts/seed.py                       |   5 +-
 src/policyengine_api/agent_sandbox.py |  27 ++-
 5 files changed, 267 insertions(+), 60 deletions(-)

diff --git a/docs/bun.lock b/docs/bun.lock
index 8f25c95..8b46639 100644
--- a/docs/bun.lock
+++ b/docs/bun.lock
@@ -10,6 +10,7 @@
         "react-dom": "19.2.1",
         "react-markdown": "^10.1.0",
         "remark-breaks": "^4.0.0",
+        "remark-gfm": "^4.0.1",
       },
       "devDependencies": {
         "@tailwindcss/postcss": "^4",
@@ -694,12 +695,26 @@
 
     "magic-string": ["magic-string@0.30.21", "", { "dependencies": { "@jridgewell/sourcemap-codec": "^1.5.5" } }, "sha512-vd2F4YUyEXKGcLHoq+TEyCjxueSeHnFxyyjNp80yg0XV4vUhnDer/lvvlqM/arB5bXQN5K2/3oinyCRyx8T2CQ=="],
 
+    "markdown-table": ["markdown-table@3.0.4", "", {}, "sha512-wiYz4+JrLyb/DqW2hkFJxP7Vd7JuTDm77fvbM8VfEQdmSMqcImWeeRbHwZjBjIFki/VaMK2BhFi7oUUZeM5bqw=="],
+
     "math-intrinsics": ["math-intrinsics@1.1.0", "", {}, "sha512-/IXtbwEk5HTPyEwyKX6hGkYXxM9nbj64B+ilVJnC/R6B0pH5G4V3b0pVbL7DBj4tkhBAppbQUlf6F6Xl9LHu1g=="],
 
     "mdast-util-find-and-replace": ["mdast-util-find-and-replace@3.0.2", "", { "dependencies": { "@types/mdast": "^4.0.0", "escape-string-regexp": "^5.0.0", "unist-util-is": "^6.0.0", "unist-util-visit-parents": "^6.0.0" } }, "sha512-Tmd1Vg/m3Xz43afeNxDIhWRtFZgM2VLyaf4vSTYwudTyeuTneoL3qtWMA5jeLyz/O1vDJmmV4QuScFCA2tBPwg=="],
 
     "mdast-util-from-markdown": ["mdast-util-from-markdown@2.0.2", "", { "dependencies": { "@types/mdast": "^4.0.0", "@types/unist": "^3.0.0", "decode-named-character-reference": "^1.0.0", "devlop": "^1.0.0", "mdast-util-to-string": "^4.0.0", "micromark": "^4.0.0", "micromark-util-decode-numeric-character-reference": "^2.0.0", "micromark-util-decode-string": "^2.0.0", "micromark-util-normalize-identifier": "^2.0.0", "micromark-util-symbol": "^2.0.0", "micromark-util-types": "^2.0.0", "unist-util-stringify-position": "^4.0.0" } }, "sha512-uZhTV/8NBuw0WHkPTrCqDOl0zVe1BIng5ZtHoDk49ME1qqcjYmmLmOf0gELgcRMxN4w2iuIeVso5/6QymSrgmA=="],
 
+    "mdast-util-gfm": ["mdast-util-gfm@3.1.0", "", { "dependencies": { "mdast-util-from-markdown": "^2.0.0", "mdast-util-gfm-autolink-literal": "^2.0.0", "mdast-util-gfm-footnote": "^2.0.0", "mdast-util-gfm-strikethrough": "^2.0.0", "mdast-util-gfm-table": "^2.0.0", "mdast-util-gfm-task-list-item": "^2.0.0", "mdast-util-to-markdown": "^2.0.0" } }, "sha512-0ulfdQOM3ysHhCJ1p06l0b0VKlhU0wuQs3thxZQagjcjPrlFRqY215uZGHHJan9GEAXd9MbfPjFJz+qMkVR6zQ=="],
+
+    "mdast-util-gfm-autolink-literal": ["mdast-util-gfm-autolink-literal@2.0.1", "", { "dependencies": { "@types/mdast": "^4.0.0", "ccount": "^2.0.0", "devlop": "^1.0.0", "mdast-util-find-and-replace": "^3.0.0", "micromark-util-character": "^2.0.0" } }, "sha512-5HVP2MKaP6L+G6YaxPNjuL0BPrq9orG3TsrZ9YXbA3vDw/ACI4MEsnoDpn6ZNm7GnZgtAcONJyPhOP8tNJQavQ=="],
+
+    "mdast-util-gfm-footnote": ["mdast-util-gfm-footnote@2.1.0", "", { "dependencies": { "@types/mdast": "^4.0.0", "devlop": "^1.1.0", "mdast-util-from-markdown": "^2.0.0", "mdast-util-to-markdown": "^2.0.0", "micromark-util-normalize-identifier": "^2.0.0" } }, "sha512-sqpDWlsHn7Ac9GNZQMeUzPQSMzR6Wv0WKRNvQRg0KqHh02fpTz69Qc1QSseNX29bhz1ROIyNyxExfawVKTm1GQ=="],
+
+    "mdast-util-gfm-strikethrough": ["mdast-util-gfm-strikethrough@2.0.0", "", { "dependencies": { "@types/mdast": "^4.0.0", "mdast-util-from-markdown": "^2.0.0", "mdast-util-to-markdown": "^2.0.0" } }, "sha512-mKKb915TF+OC5ptj5bJ7WFRPdYtuHv0yTRxK2tJvi+BDqbkiG7h7u/9SI89nRAYcmap2xHQL9D+QG/6wSrTtXg=="],
+
+    "mdast-util-gfm-table": ["mdast-util-gfm-table@2.0.0", "", { "dependencies": { "@types/mdast": "^4.0.0", "devlop": "^1.0.0", "markdown-table": "^3.0.0", "mdast-util-from-markdown": "^2.0.0", "mdast-util-to-markdown": "^2.0.0" } }, "sha512-78UEvebzz/rJIxLvE7ZtDd/vIQ0RHv+3Mh5DR96p7cS7HsBhYIICDBCu8csTNWNO6tBWfqXPWekRuj2FNOGOZg=="],
+
+    "mdast-util-gfm-task-list-item": ["mdast-util-gfm-task-list-item@2.0.0", "", { "dependencies": { "@types/mdast": "^4.0.0", "devlop": "^1.0.0", "mdast-util-from-markdown": "^2.0.0", "mdast-util-to-markdown": "^2.0.0" } }, "sha512-IrtvNvjxC1o06taBAVJznEnkiHxLFTzgonUdy8hzFVeDun0uTjxxrRGVaNFqkU1wJR3RBPEfsxmU6jDWPofrTQ=="],
+
     "mdast-util-mdx-expression": ["mdast-util-mdx-expression@2.0.1", "", { "dependencies": { "@types/estree-jsx": "^1.0.0", "@types/hast": "^3.0.0", "@types/mdast": "^4.0.0", "devlop": "^1.0.0", "mdast-util-from-markdown": "^2.0.0", "mdast-util-to-markdown": "^2.0.0" } }, "sha512-J6f+9hUp+ldTZqKRSg7Vw5V6MqjATc+3E4gf3CFNcuZNWD8XdyI6zQ8GqH7f8169MM6P7hMBRDVGnn7oHB9kXQ=="],
 
     "mdast-util-mdx-jsx": ["mdast-util-mdx-jsx@3.2.0", "", { "dependencies": { "@types/estree-jsx": "^1.0.0", "@types/hast": "^3.0.0", "@types/mdast": "^4.0.0", "@types/unist": "^3.0.0", "ccount": "^2.0.0", "devlop": "^1.1.0", "mdast-util-from-markdown": "^2.0.0", "mdast-util-to-markdown": "^2.0.0", "parse-entities": "^4.0.0", "stringify-entities": "^4.0.0", "unist-util-stringify-position": "^4.0.0", "vfile-message": "^4.0.0" } }, "sha512-lj/z8v0r6ZtsN/cGNNtemmmfoLAFZnjMbNyLzBafjzikOM+glrjNHPlf6lQDOTccj9n5b0PPihEBbhneMyGs1Q=="],
@@ -722,6 +737,20 @@
 
     "micromark-core-commonmark": ["micromark-core-commonmark@2.0.3", "", { "dependencies": { "decode-named-character-reference": "^1.0.0", "devlop": "^1.0.0", "micromark-factory-destination": "^2.0.0", "micromark-factory-label": "^2.0.0", "micromark-factory-space": "^2.0.0", "micromark-factory-title": "^2.0.0", "micromark-factory-whitespace": "^2.0.0", "micromark-util-character": "^2.0.0", "micromark-util-chunked": "^2.0.0", "micromark-util-classify-character": "^2.0.0", "micromark-util-html-tag-name": "^2.0.0", "micromark-util-normalize-identifier": "^2.0.0", "micromark-util-resolve-all": "^2.0.0", "micromark-util-subtokenize": "^2.0.0", "micromark-util-symbol": "^2.0.0", "micromark-util-types": "^2.0.0" } }, "sha512-RDBrHEMSxVFLg6xvnXmb1Ayr2WzLAWjeSATAoxwKYJV94TeNavgoIdA0a9ytzDSVzBy2YKFK+emCPOEibLeCrg=="],
 
+    "micromark-extension-gfm": ["micromark-extension-gfm@3.0.0", "", { "dependencies": { "micromark-extension-gfm-autolink-literal": "^2.0.0", "micromark-extension-gfm-footnote": "^2.0.0", "micromark-extension-gfm-strikethrough": "^2.0.0", "micromark-extension-gfm-table": "^2.0.0", "micromark-extension-gfm-tagfilter": "^2.0.0", "micromark-extension-gfm-task-list-item": "^2.0.0", "micromark-util-combine-extensions": "^2.0.0", "micromark-util-types": "^2.0.0" } }, "sha512-vsKArQsicm7t0z2GugkCKtZehqUm31oeGBV/KVSorWSy8ZlNAv7ytjFhvaryUiCUJYqs+NoE6AFhpQvBTM6Q4w=="],
+
+    "micromark-extension-gfm-autolink-literal": ["micromark-extension-gfm-autolink-literal@2.1.0", "", { "dependencies": { "micromark-util-character": "^2.0.0", "micromark-util-sanitize-uri": "^2.0.0", "micromark-util-symbol": "^2.0.0", "micromark-util-types": "^2.0.0" } }, "sha512-oOg7knzhicgQ3t4QCjCWgTmfNhvQbDDnJeVu9v81r7NltNCVmhPy1fJRX27pISafdjL+SVc4d3l48Gb6pbRypw=="],
+
+    "micromark-extension-gfm-footnote": ["micromark-extension-gfm-footnote@2.1.0", "", { "dependencies": { "devlop": "^1.0.0", "micromark-core-commonmark": "^2.0.0", "micromark-factory-space": "^2.0.0", "micromark-util-character": "^2.0.0", "micromark-util-normalize-identifier": "^2.0.0", "micromark-util-sanitize-uri": "^2.0.0", "micromark-util-symbol": "^2.0.0", "micromark-util-types": "^2.0.0" } }, "sha512-/yPhxI1ntnDNsiHtzLKYnE3vf9JZ6cAisqVDauhp4CEHxlb4uoOTxOCJ+9s51bIB8U1N1FJ1RXOKTIlD5B/gqw=="],
+
+    "micromark-extension-gfm-strikethrough": ["micromark-extension-gfm-strikethrough@2.1.0", "", { "dependencies": { "devlop": "^1.0.0", "micromark-util-chunked": "^2.0.0", "micromark-util-classify-character": "^2.0.0", "micromark-util-resolve-all": "^2.0.0", "micromark-util-symbol": "^2.0.0", "micromark-util-types": "^2.0.0" } }, "sha512-ADVjpOOkjz1hhkZLlBiYA9cR2Anf8F4HqZUO6e5eDcPQd0Txw5fxLzzxnEkSkfnD0wziSGiv7sYhk/ktvbf1uw=="],
+
+    "micromark-extension-gfm-table": ["micromark-extension-gfm-table@2.1.1", "", { "dependencies": { "devlop": "^1.0.0", "micromark-factory-space": "^2.0.0", "micromark-util-character": "^2.0.0", "micromark-util-symbol": "^2.0.0", "micromark-util-types": "^2.0.0" } }, "sha512-t2OU/dXXioARrC6yWfJ4hqB7rct14e8f7m0cbI5hUmDyyIlwv5vEtooptH8INkbLzOatzKuVbQmAYcbWoyz6Dg=="],
+
+    "micromark-extension-gfm-tagfilter": ["micromark-extension-gfm-tagfilter@2.0.0", "", { "dependencies": { "micromark-util-types": "^2.0.0" } }, "sha512-xHlTOmuCSotIA8TW1mDIM6X2O1SiX5P9IuDtqGonFhEK0qgRI4yeC6vMxEV2dgyr2TiD+2PQ10o+cOhdVAcwfg=="],
+
+    "micromark-extension-gfm-task-list-item": ["micromark-extension-gfm-task-list-item@2.1.0", "", { "dependencies": { "devlop": "^1.0.0", "micromark-factory-space": "^2.0.0", "micromark-util-character": "^2.0.0", "micromark-util-symbol": "^2.0.0", "micromark-util-types": "^2.0.0" } }, "sha512-qIBZhqxqI6fjLDYFTBIa4eivDMnP+OZqsNwmQ3xNLE4Cxwc+zfQEfbs6tzAo2Hjq+bh6q5F+Z8/cksrLFYWQQw=="],
+
     "micromark-factory-destination": ["micromark-factory-destination@2.0.1", "", { "dependencies": { "micromark-util-character": "^2.0.0", "micromark-util-symbol": "^2.0.0", "micromark-util-types": "^2.0.0" } }, "sha512-Xe6rDdJlkmbFRExpTOmRj9N3MaWmbAgdpSrBQvCFqhezUn4AHqJHbaEnfbVYYiexVSs//tqOdY/DxhjdCiJnIA=="],
 
     "micromark-factory-label": ["micromark-factory-label@2.0.1", "", { "dependencies": { "devlop": "^1.0.0", "micromark-util-character": "^2.0.0", "micromark-util-symbol": "^2.0.0", "micromark-util-types": "^2.0.0" } }, "sha512-VFMekyQExqIW7xIChcXn4ok29YE3rnuyveW3wZQWWqF4Nv9Wk5rgJ99KzPvHjkmPXF93FXIbBp6YdW3t71/7Vg=="],
@@ -844,10 +873,14 @@
 
     "remark-breaks": ["remark-breaks@4.0.0", "", { "dependencies": { "@types/mdast": "^4.0.0", "mdast-util-newline-to-break": "^2.0.0", "unified": "^11.0.0" } }, "sha512-IjEjJOkH4FuJvHZVIW0QCDWxcG96kCq7An/KVH2NfJe6rKZU2AsHeB3OEjPNRxi4QC34Xdx7I2KGYn6IpT7gxQ=="],
 
+    "remark-gfm": ["remark-gfm@4.0.1", "", { "dependencies": { "@types/mdast": "^4.0.0", "mdast-util-gfm": "^3.0.0", "micromark-extension-gfm": "^3.0.0", "remark-parse": "^11.0.0", "remark-stringify": "^11.0.0", "unified": "^11.0.0" } }, "sha512-1quofZ2RQ9EWdeN34S79+KExV1764+wCUGop5CPL1WGdD0ocPpu91lzPGbwWMECpEpd42kJGQwzRfyov9j4yNg=="],
+
     "remark-parse": ["remark-parse@11.0.0", "", { "dependencies": { "@types/mdast": "^4.0.0", "mdast-util-from-markdown": "^2.0.0", "micromark-util-types": "^2.0.0", "unified": "^11.0.0" } }, "sha512-FCxlKLNGknS5ba/1lmpYijMUzX2esxW5xQqjWxw2eHFfS2MSdaHVINFmhjo+qN1WhZhNimq0dZATN9pH0IDrpA=="],
 
     "remark-rehype": ["remark-rehype@11.1.2", "", { "dependencies": { "@types/hast": "^3.0.0", "@types/mdast": "^4.0.0", "mdast-util-to-hast": "^13.0.0", "unified": "^11.0.0", "vfile": "^6.0.0" } }, "sha512-Dh7l57ianaEoIpzbp0PC9UKAdCSVklD8E5Rpw7ETfbTl3FqcOOgq5q2LVDhgGCkaBv7p24JXikPdvhhmHvKMsw=="],
 
+    "remark-stringify": ["remark-stringify@11.0.0", "", { "dependencies": { "@types/mdast": "^4.0.0", "mdast-util-to-markdown": "^2.0.0", "unified": "^11.0.0" } }, "sha512-1OSmLd3awB/t8qdoEOMazZkNsfVTeY4fTsgzcQFdXNq8ToTN4ZGwrMnlda4K6smTFKD+GRV6O48i6Z4iKgPPpw=="],
+
     "resolve": ["resolve@1.22.11", "", { "dependencies": { "is-core-module": "^2.16.1", "path-parse": "^1.0.7", "supports-preserve-symlinks-flag": "^1.0.0" }, "bin": { "resolve": "bin/resolve" } }, "sha512-RfqAvLnMl313r7c9oclB1HhUEAezcpLjz95wFH4LVuhk9JF/r22qmVP9AMmOU4vMX7Q8pN8jwNg/CSpdFnMjTQ=="],
 
     "resolve-from": ["resolve-from@4.0.0", "", {}, "sha512-pb/MYmXstAkysRFx8piNI1tGFNQIFA3vkE3Gq4EuA1dF6gHp/+vgZqsCGJapvy8N3Q+4o7FwvquPJcnZ7RYy4g=="],
diff --git a/docs/package.json b/docs/package.json
index 253ad06..4bafb7f 100644
--- a/docs/package.json
+++ b/docs/package.json
@@ -13,7 +13,8 @@
     "react": "19.2.1",
     "react-dom": "19.2.1",
     "react-markdown": "^10.1.0",
-    "remark-breaks": "^4.0.0"
+    "remark-breaks": "^4.0.0",
+    "remark-gfm": "^4.0.1"
   },
   "devDependencies": {
     "@tailwindcss/postcss": "^4",
diff --git a/docs/src/components/policy-chat.tsx b/docs/src/components/policy-chat.tsx
index 5a5b101..5a687fa 100644
--- a/docs/src/components/policy-chat.tsx
+++ b/docs/src/components/policy-chat.tsx
@@ -3,12 +3,14 @@
 import { useState, useRef, useEffect, useMemo } from "react";
 import ReactMarkdown from "react-markdown";
 import remarkBreaks from "remark-breaks";
+import remarkGfm from "remark-gfm";
 import { useApi } from "./api-context";
 
 interface Message {
   role: "user" | "assistant";
   content: string;
   status?: "pending" | "running" | "completed" | "failed";
+  steps?: ParsedStep[];
 }
 
 interface LogEntry {
@@ -60,13 +62,46 @@ function parseLogEntry(message: string): ParsedStep {
       } catch {
         // Not valid JSON
       }
-      // Clean up tool name for display
-      const displayName = toolName
-        .replace(/_/g, " ")
-        .replace(/parameters get$/, "")
-        .replace(/parameters post$/, "")
-        .replace(/household calculate post$/, "Calculate household")
-        .replace(/list /g, "Search ");
+      // Map tool names to human-readable labels
+      const toolNameMap: Record<string, string> = {
+        // Parameters
+        "list_parameters_parameters__get": "Search parameters",
+        "get_parameter_parameters__parameter_id__get": "Get parameter",
+        "list_parameter_values_parameter_values__get": "Get parameter values",
+        "get_parameter_value_parameter_values__parameter_value_id__get": "Get parameter value",
+        // Variables
+        "list_variables_variables__get": "Search variables",
+        "get_variable_variables__variable_id__get": "Get variable",
+        // Policies
+        "create_policy_policies__post": "Create policy",
+        "get_policy_policies__policy_id__get": "Get policy",
+        "list_policies_policies__get": "List policies",
+        // Household
+        "calculate_household_household_calculate_post": "Calculate household",
+        "get_household_job_status_household_calculate__job_id__get": "Poll household job",
+        // Household impact
+        "calculate_household_impact_comparison_household_impact_post": "Calculate household impact",
+        "get_household_impact_job_status_household_impact__job_id__get": "Poll household impact",
+        // Economic impact
+        "economic_impact_analysis_economic_impact_post": "Run economic analysis",
+        "get_economic_impact_status_analysis_economic_impact__report_id__get": "Poll economic analysis",
+        // Datasets
+        "list_datasets_datasets__get": "List datasets",
+        "get_dataset_datasets__dataset_id__get": "Get dataset",
+        // Models
+        "list_tax_benefit_models_tax_benefit_models__get": "List models",
+        "get_tax_benefit_model_tax_benefit_models__model_id__get": "Get model",
+        // Simulations
+        "list_simulations_simulations__get": "List simulations",
+        "get_simulation_simulations__simulation_id__get": "Get simulation",
+        // Utility
+        "sleep": "Wait",
+      };
+      const displayName = toolNameMap[toolName] || toolName
+        .replace(/_+/g, " ")
+        .replace(/\s+(get|post|put|delete)$/i, "")
+        .replace(/\s+/g, " ")
+        .trim();
       return {
         type: "tool_use",
         title: displayName,
@@ -145,7 +180,7 @@ function ToolCard({ step }: { step: ParsedStep }) {
   const [isExpanded, setIsExpanded] = useState(false);
 
   if (step.type === "agent") {
-    return null; // Hide agent messages, they're redundant with progress indicator
+    return null;
   }
 
   if (step.type === "tool_use") {
@@ -153,13 +188,13 @@ function ToolCard({ step }: { step: ParsedStep }) {
       <div className="py-1 animate-fadeIn">
         <button
           onClick={() => setIsExpanded(!isExpanded)}
-          className="flex items-center gap-2 hover:text-[var(--color-pe-green)] transition-colors font-mono"
+          className="flex items-center gap-2 hover:text-[var(--color-pe-green)] transition-colors group w-full text-left font-mono"
         >
           <span className="w-1.5 h-1.5 rounded-full bg-[var(--color-pe-green)] shrink-0" />
-          <span className="text-sm text-[var(--color-text-secondary)]">{step.title}</span>
+          <span className="text-[12px] text-[var(--color-text-secondary)]">{step.title}</span>
           {step.params && Object.keys(step.params).length > 0 && (
             <svg
-              className={`w-3.5 h-3.5 text-[var(--color-text-muted)] transition-transform shrink-0 ${isExpanded ? "rotate-90" : ""}`}
+              className={`w-3 h-3 text-[var(--color-text-muted)] transition-transform shrink-0 ${isExpanded ? "rotate-90" : ""}`}
               fill="none"
               viewBox="0 0 24 24"
               stroke="currentColor"
@@ -169,11 +204,11 @@ function ToolCard({ step }: { step: ParsedStep }) {
           )}
         </button>
         {isExpanded && step.params && Object.keys(step.params).length > 0 && (
-          <div className="ml-3.5 mt-1.5 font-mono text-xs text-[var(--color-text-muted)] bg-[var(--color-surface)] rounded-lg px-3 py-2 animate-slideDown">
+          <div className="ml-3.5 mt-1.5 text-[11px] bg-[var(--color-code-bg)] text-[var(--color-code-text)] rounded-md px-3 py-2 animate-slideDown font-mono">
             {Object.entries(step.params).map(([key, value]) => (
-              <div key={key} className="flex gap-1">
-                <span className="text-[var(--color-pe-green)]">{key}:</span>
-                <span className="text-[var(--color-text-secondary)]">
+              <div key={key} className="flex gap-2 py-0.5">
+                <span className="text-[var(--color-pe-green-light)]">{key}:</span>
+                <span className="text-[var(--color-code-text)]/80">
                   {typeof value === "string" ? value : JSON.stringify(value)}
                 </span>
               </div>
@@ -184,7 +219,6 @@ function ToolCard({ step }: { step: ParsedStep }) {
     );
   }
 
-  // Hide API details - too noisy
   if (step.type === "api_call" || step.type === "api_response") {
     return null;
   }
@@ -194,16 +228,16 @@ function ToolCard({ step }: { step: ParsedStep }) {
       <div className="py-1 ml-3.5 animate-fadeIn">
         <button
           onClick={() => setIsExpanded(!isExpanded)}
-          className="flex items-center gap-1.5 text-sm text-[var(--color-text-muted)] hover:text-[var(--color-text-secondary)] font-mono"
+          className="flex items-center gap-1.5 text-[11px] text-[var(--color-text-muted)] hover:text-[var(--color-text-secondary)] font-mono"
         >
-          <svg className={`w-3.5 h-3.5 transition-transform ${isExpanded ? "rotate-90" : ""}`} fill="none" viewBox="0 0 24 24" stroke="currentColor">
+          <svg className={`w-3 h-3 transition-transform ${isExpanded ? "rotate-90" : ""}`} fill="none" viewBox="0 0 24 24" stroke="currentColor">
             <path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M9 5l7 7-7 7" />
           </svg>
-          <span>Result</span>
+          <span>result</span>
         </button>
         {isExpanded && (
-          <div className="mt-1.5 font-mono text-xs bg-[var(--color-code-bg)] text-[var(--color-code-text)] rounded p-2 overflow-x-auto max-h-64 overflow-y-auto animate-slideDown">
-            <pre className="whitespace-pre-wrap">{step.content}</pre>
+          <div className="mt-1.5 text-[11px] bg-[var(--color-code-bg)] text-[var(--color-code-text)] rounded-md p-2.5 overflow-x-auto max-h-48 overflow-y-auto animate-slideDown font-mono">
+            <pre className="whitespace-pre-wrap leading-relaxed">{step.content}</pre>
           </div>
         )}
       </div>
@@ -213,7 +247,7 @@ function ToolCard({ step }: { step: ParsedStep }) {
   if (step.type === "assistant") {
     return (
       <div className="py-1.5 animate-fadeIn">
-        <p className="text-sm text-[var(--color-text-muted)] leading-relaxed">{step.content}</p>
+        <p className="text-[12px] text-[var(--color-text-muted)] leading-relaxed italic">{step.content}</p>
       </div>
     );
   }
@@ -229,22 +263,29 @@ function ProgressIndicator({ logs }: { logs: LogEntry[] }) {
     const hasHousehold = logs.some(l => l.message.includes("household"));
     const isComplete = logs.some(l => l.message.includes("Completed"));
 
-    if (isComplete) return "Complete";
-    if (hasAnalysis) return "Running analysis...";
-    if (hasPolicy) return "Creating policy...";
-    if (hasHousehold) return "Calculating...";
-    if (hasSearch) return "Searching parameters...";
-    return "Starting...";
+    if (isComplete) return "complete";
+    if (hasAnalysis) return "running analysis...";
+    if (hasPolicy) return "creating policy...";
+    if (hasHousehold) return "calculating...";
+    if (hasSearch) return "searching parameters...";
+    return "starting...";
   }, [logs]);
 
   if (logs.length === 0) return null;
 
   return (
-    <div className="flex items-center gap-2 mb-3 text-sm text-[var(--color-text-muted)] font-mono">
-      {stage !== "Complete" && (
-        <div className="w-3.5 h-3.5 border-2 border-[var(--color-pe-green)] border-t-transparent rounded-full animate-spin" />
+    <div className="flex items-center gap-2 mb-3 pb-2.5 border-b border-[var(--color-border)]">
+      {stage !== "complete" && (
+        <div className="w-3 h-3 border-2 border-[var(--color-pe-green)] border-t-transparent rounded-full animate-spin" />
       )}
-      <span>{stage}</span>
+      {stage === "complete" && (
+        <div className="w-3 h-3 rounded-full bg-[var(--color-success)] flex items-center justify-center">
+          <svg className="w-2 h-2 text-white" fill="none" viewBox="0 0 24 24" stroke="currentColor" strokeWidth={3}>
+            <path strokeLinecap="round" strokeLinejoin="round" d="M5 13l4 4L19 7" />
+          </svg>
+        </div>
+      )}
+      <span className="text-[11px] font-mono text-[var(--color-text-muted)]">{stage}</span>
     </div>
   );
 }
@@ -308,6 +349,11 @@ export function PolicyChat() {
               : "Analysis failed. Please try again.";
         }
 
+        // Parse and store steps with the message so they persist
+        const finalSteps = (data.logs || [])
+          .map((log: LogEntry) => parseLogEntry(log.message))
+          .filter((step: ParsedStep) => step.type !== "unknown");
+
         setMessages((prev) => {
           const newMessages = [...prev];
           const lastIndex = newMessages.length - 1;
@@ -316,6 +362,7 @@ export function PolicyChat() {
               ...newMessages[lastIndex],
               content: finalContent,
               status: data.status,
+              steps: finalSteps,
             };
           }
           return newMessages;
@@ -396,9 +443,9 @@ export function PolicyChat() {
 
   const exampleQuestions = [
     "What is the UK personal allowance for 2026?",
-    "Calculate tax for someone earning £50,000 in the UK",
-    "What would happen if we increased child benefit by 10%?",
-    "What benefits would a single parent with two children receive?",
+    "Calculate tax for someone earning £50,000",
+    "What if we increased child benefit by 10%?",
+    "What benefits would a single parent receive?",
   ];
 
   return (
@@ -443,7 +490,7 @@ export function PolicyChat() {
                 <button
                   key={i}
                   onClick={() => setInput(q)}
-                  className="text-left p-4 rounded-xl bg-[var(--color-surface-sunken)] hover:bg-[var(--color-surface)] border border-transparent hover:border-[var(--color-border)] text-sm text-[var(--color-text-secondary)] transition-all group font-mono"
+                  className="text-left px-4 py-3 rounded-lg bg-[var(--color-surface-sunken)] hover:bg-white border border-transparent hover:border-[var(--color-border)] hover:shadow-sm text-[13px] text-[var(--color-text-secondary)] transition-all group font-mono"
                 >
                   <span className="group-hover:text-[var(--color-pe-green)] transition-colors">{q}</span>
                 </button>
@@ -457,24 +504,24 @@ export function PolicyChat() {
                 {message.role === "user" ? (
                   <div className="flex justify-end">
                     <div className="max-w-[80%] bg-[var(--color-pe-green)] text-white rounded-2xl rounded-br-md px-4 py-3">
-                      <p className="text-sm font-mono">{message.content}</p>
+                      <p className="text-[14px] leading-relaxed">{message.content}</p>
                     </div>
                   </div>
                 ) : (
                   <div className="space-y-3">
                     {/* Running state with live steps */}
                     {(message.status === "pending" || message.status === "running") && (
-                      <div className="bg-[var(--color-surface-sunken)] rounded-2xl p-4">
+                      <div className="bg-[var(--color-surface-sunken)] rounded-xl p-4">
                         <ProgressIndicator logs={logs} />
 
                         {message.status === "pending" ? (
-                          <div className="flex items-center gap-3">
-                            <div className="w-5 h-5 border-2 border-[var(--color-pe-green)] border-t-transparent rounded-full animate-spin" />
-                            <span className="text-sm text-[var(--color-text-secondary)] font-mono">Starting analysis...</span>
+                          <div className="flex items-center gap-2">
+                            <div className="w-3 h-3 border-2 border-[var(--color-pe-green)] border-t-transparent rounded-full animate-spin" />
+                            <span className="text-[11px] font-mono text-[var(--color-text-muted)]">starting...</span>
                           </div>
                         ) : (
                           <div className="space-y-0">
-                            {parsedSteps.slice(-10).map((step, j) => (
+                            {parsedSteps.slice(-12).map((step, j) => (
                               <ToolCard key={j} step={step} />
                             ))}
                           </div>
@@ -484,18 +531,18 @@ export function PolicyChat() {
 
                     {/* Completed/failed state */}
                     {(message.status === "completed" || message.status === "failed") && (
-                      <div className="space-y-4">
+                      <div className="space-y-3">
                         {/* Collapsible steps summary */}
-                        {parsedSteps.length > 0 && (
+                        {message.steps && message.steps.length > 0 && (
                           <details className="group">
-                            <summary className="cursor-pointer list-none flex items-center gap-2 text-sm text-[var(--color-text-muted)] hover:text-[var(--color-text-secondary)] font-mono">
-                              <svg className="w-3.5 h-3.5 group-open:rotate-90 transition-transform" fill="none" viewBox="0 0 24 24" stroke="currentColor">
+                            <summary className="cursor-pointer list-none flex items-center gap-2 text-[11px] text-[var(--color-text-muted)] hover:text-[var(--color-text-secondary)] font-mono">
+                              <svg className="w-3 h-3 group-open:rotate-90 transition-transform" fill="none" viewBox="0 0 24 24" stroke="currentColor">
                                 <path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M9 5l7 7-7 7" />
                               </svg>
-                              <span>{parsedSteps.filter(s => s.type === "tool_use").length} tool calls executed</span>
+                              <span>{message.steps.filter(s => s.type === "tool_use").length} tool calls</span>
                             </summary>
-                            <div className="mt-3 bg-[var(--color-surface-sunken)] rounded-xl p-4 space-y-0">
-                              {parsedSteps.map((step, j) => (
+                            <div className="mt-2 bg-[var(--color-surface-sunken)] rounded-lg p-3 space-y-0">
+                              {message.steps.map((step, j) => (
                                 <ToolCard key={j} step={step} />
                               ))}
                             </div>
@@ -503,13 +550,13 @@ export function PolicyChat() {
                         )}
 
                         {/* Final response */}
-                        <div className={`rounded-2xl rounded-bl-md px-5 py-4 ${
+                        <div className={`rounded-lg px-4 py-3 ${
                           message.status === "failed"
                             ? "bg-red-50 border border-red-200"
                             : "bg-white border border-[var(--color-border)]"
                         }`}>
-                          <div className="prose prose-sm max-w-none text-[var(--color-text-primary)] [&_strong]:font-semibold [&_code]:bg-[var(--color-surface-sunken)] [&_code]:px-1.5 [&_code]:py-0.5 [&_code]:rounded [&_code]:text-sm [&_code]:font-mono [&_h1]:text-lg [&_h1]:mt-4 [&_h1]:mb-2 [&_h2]:text-base [&_h2]:mt-3 [&_h2]:mb-2 [&_h3]:text-sm [&_h3]:mt-2 [&_h3]:mb-1 [&_p]:my-3 [&_p]:leading-relaxed [&_ul]:my-3 [&_ul]:space-y-1 [&_ol]:my-3 [&_ol]:space-y-1 [&_li]:my-0 [&_li]:leading-relaxed [&_blockquote]:border-l-2 [&_blockquote]:border-[var(--color-pe-green)] [&_blockquote]:pl-4 [&_blockquote]:my-3 [&_blockquote]:text-[var(--color-text-secondary)]">
-                            <ReactMarkdown remarkPlugins={[remarkBreaks]}>
+                          <div className="response-content">
+                            <ReactMarkdown remarkPlugins={[remarkBreaks, remarkGfm]}>
                               {message.content}
                             </ReactMarkdown>
                           </div>
@@ -534,12 +581,12 @@ export function PolicyChat() {
             onChange={(e) => setInput(e.target.value)}
             placeholder="Ask a policy question..."
             disabled={isLoading}
-            className="flex-1 px-4 py-3 text-sm font-mono border border-[var(--color-border)] rounded-xl bg-white focus:outline-none focus:ring-2 focus:ring-[var(--color-pe-green)] focus:border-transparent disabled:opacity-50 placeholder:text-[var(--color-text-muted)]"
+            className="flex-1 px-4 py-2.5 text-[13px] font-mono border border-[var(--color-border)] rounded-lg bg-white focus:outline-none focus:ring-2 focus:ring-[var(--color-pe-green)] focus:border-transparent disabled:opacity-50 placeholder:text-[var(--color-text-muted)]"
           />
           <button
             type="submit"
             disabled={isLoading || !input.trim()}
-            className="px-6 py-3 bg-[var(--color-pe-green)] hover:bg-[var(--color-pe-green-dark)] text-white rounded-xl text-sm font-medium disabled:opacity-50 disabled:cursor-not-allowed transition-colors flex items-center gap-2"
+            className="px-4 py-2.5 bg-[var(--color-pe-green)] hover:bg-[var(--color-pe-green-dark)] text-white rounded-lg text-[13px] font-medium disabled:opacity-50 disabled:cursor-not-allowed transition-colors flex items-center gap-2"
           >
             {isLoading ? (
               <>
@@ -558,7 +605,7 @@ export function PolicyChat() {
         </div>
       </form>
 
-      <style jsx>{`
+      <style jsx global>{`
         @keyframes fadeIn {
           from { opacity: 0; transform: translateY(4px); }
           to { opacity: 1; transform: translateY(0); }
@@ -573,6 +620,108 @@ export function PolicyChat() {
         .animate-slideDown {
           animation: slideDown 0.2s ease-out forwards;
         }
+
+        /* Response content typography */
+        .response-content {
+          font-family: var(--font-sans);
+          font-size: 14px;
+          line-height: 1.6;
+          color: var(--color-text-primary);
+        }
+        .response-content p {
+          margin: 0.75em 0;
+        }
+        .response-content p:first-child {
+          margin-top: 0;
+        }
+        .response-content p:last-child {
+          margin-bottom: 0;
+        }
+        .response-content h1, .response-content h2, .response-content h3 {
+          font-weight: 600;
+          margin-top: 1.25em;
+          margin-bottom: 0.5em;
+          line-height: 1.3;
+        }
+        .response-content h1 { font-size: 1.25em; }
+        .response-content h2 { font-size: 1.1em; }
+        .response-content h3 { font-size: 1em; }
+        .response-content h1:first-child,
+        .response-content h2:first-child,
+        .response-content h3:first-child {
+          margin-top: 0;
+        }
+        .response-content strong {
+          font-weight: 600;
+        }
+        .response-content ul, .response-content ol {
+          margin: 0.75em 0;
+          padding-left: 1.5em;
+        }
+        .response-content li {
+          margin: 0.25em 0;
+        }
+        .response-content code {
+          font-family: var(--font-mono);
+          font-size: 0.9em;
+          background: var(--color-surface-sunken);
+          padding: 0.15em 0.4em;
+          border-radius: 4px;
+        }
+        .response-content pre {
+          font-family: var(--font-mono);
+          font-size: 12px;
+          background: var(--color-code-bg);
+          color: var(--color-code-text);
+          padding: 1em;
+          border-radius: 8px;
+          overflow-x: auto;
+          margin: 1em 0;
+        }
+        .response-content pre code {
+          background: none;
+          padding: 0;
+          font-size: inherit;
+        }
+        .response-content table {
+          width: 100%;
+          border-collapse: collapse;
+          margin: 1em 0;
+          font-size: 13px;
+        }
+        .response-content th {
+          background: var(--color-surface-sunken);
+          border: 1px solid var(--color-border);
+          padding: 0.5em 0.75em;
+          text-align: left;
+          font-weight: 600;
+        }
+        .response-content td {
+          border: 1px solid var(--color-border);
+          padding: 0.5em 0.75em;
+        }
+        .response-content tr:hover td {
+          background: var(--color-surface-sunken);
+        }
+        .response-content blockquote {
+          border-left: 3px solid var(--color-pe-green);
+          padding-left: 1em;
+          margin: 1em 0;
+          color: var(--color-text-secondary);
+          font-style: italic;
+        }
+        .response-content a {
+          color: var(--color-pe-green);
+          text-decoration: underline;
+        }
+        .response-content a:hover {
+          color: var(--color-pe-green-dark);
+        }
+        .response-content hr {
+          border: none;
+          border-top: 1px solid var(--color-border);
+          margin: 1.5em 0;
+        }
       `}</style>
     </div>
   );
diff --git a/scripts/seed.py b/scripts/seed.py
index 5ef2ed2..f3fbfa8 100644
--- a/scripts/seed.py
+++ b/scripts/seed.py
@@ -385,7 +385,8 @@ def seed_datasets(session, lite: bool = False):
 
         # UK datasets
         console.print("  Creating UK datasets...")
-        uk_datasets = ensure_uk_datasets()
+        data_folder = str(Path(__file__).parent.parent / "data")
+        uk_datasets = ensure_uk_datasets(data_folder=data_folder)
 
         # In lite mode, only upload FRS 2026
         if lite:
@@ -439,7 +440,7 @@ def seed_datasets(session, lite: bool = False):
 
         # US datasets
         console.print("  Creating US datasets...")
-        us_datasets = ensure_us_datasets()
+        us_datasets = ensure_us_datasets(data_folder=data_folder)
 
         # In lite mode, only upload CPS 2026
         if lite:
diff --git a/src/policyengine_api/agent_sandbox.py b/src/policyengine_api/agent_sandbox.py
index 411d2f2..3987cda 100644
--- a/src/policyengine_api/agent_sandbox.py
+++ b/src/policyengine_api/agent_sandbox.py
@@ -45,11 +45,34 @@
    - POST /analysis/economic-impact with tax_benefit_model_name, policy_id and dataset_id
    - GET /analysis/economic-impact/{report_id} for results (includes decile_impacts and program_statistics)
 
+## Response formatting
+
+Follow PolicyEngine's writing style:
+
+1. **Active voice**: "The reform reduces poverty by 3.2%" not "Poverty is reduced by 3.2%"
+2. **Quantitative precision**: Use specific numbers, avoid vague words like "significantly" or "substantially"
+3. **Neutral tone**: Describe what policies do, not whether they're good or bad
+4. **Tables for data**: Present breakdowns and comparisons in markdown tables
+
+Example response format:
+| Item | Amount |
+|------|--------|
+| Income tax | £7,486 |
+| National Insurance | £2,994 |
+| **Total tax** | **£10,480** |
+
+- Gross income: £50,000
+- Net income: £39,520
+- Effective tax rate: 21.0%
+
+Avoid: "significantly reduces", "substantial savings", "unfortunately", "great news"
+Prefer: specific percentages, pound/dollar amounts, neutral descriptions
+
 ## Guidelines
 
 1. Use the API tools to get accurate, current data
-2. Be concise but thorough
-3. For UK, amounts are in GBP. For US, amounts are in USD.
+2. Be concise - lead with key numbers
+3. For UK, amounts are in GBP (£). For US, amounts are in USD ($)
 4. When polling async endpoints, use the sleep tool to wait 5-10 seconds between requests
 """
 

From b277187f3020b06bc2b5ec984c33b00e6f0cd208 Mon Sep 17 00:00:00 2001
From: Nikhil Woodruff <nikhil@policyengine.org>
Date: Tue, 30 Dec 2025 14:45:00 +0000
Subject: [PATCH 4/4] feat: improve agent chat UX and add conversation history

- Fix tool name display with proper mapping (e.g. "Calculate household")
- Add conversation history support for multi-turn chat
- Move response styling to global CSS with Inter font
- Add PolicyEngine style guide to agent prompt (tables, active voice)
- Persist tool steps per message so they don't disappear
---
 docs/src/app/globals.css              | 122 ++++++++++++++++++++++++++
 docs/src/components/policy-chat.tsx   | 119 +++----------------------
 src/policyengine_api/agent_sandbox.py |   9 +-
 src/policyengine_api/api/agent.py     |  20 ++++-
 4 files changed, 161 insertions(+), 109 deletions(-)

diff --git a/docs/src/app/globals.css b/docs/src/app/globals.css
index 8c00e5f..68ea6b7 100644
--- a/docs/src/app/globals.css
+++ b/docs/src/app/globals.css
@@ -100,3 +100,125 @@ code {
   background: var(--color-pe-green);
   color: white;
 }
+
+/* Agent response content */
+.response-content {
+  font-family: var(--font-inter), -apple-system, BlinkMacSystemFont, "Segoe UI", sans-serif;
+  font-size: 15px;
+  line-height: 1.7;
+  color: #1e293b;
+  -webkit-font-smoothing: antialiased;
+}
+.response-content p {
+  margin: 0.875em 0;
+}
+.response-content p:first-child {
+  margin-top: 0;
+}
+.response-content p:last-child {
+  margin-bottom: 0;
+}
+.response-content h1,
+.response-content h2,
+.response-content h3 {
+  font-weight: 600;
+  color: #0f172a;
+  margin-top: 1.5em;
+  margin-bottom: 0.5em;
+  line-height: 1.3;
+}
+.response-content h1 {
+  font-size: 1.375em;
+}
+.response-content h2 {
+  font-size: 1.125em;
+}
+.response-content h3 {
+  font-size: 1em;
+}
+.response-content h1:first-child,
+.response-content h2:first-child,
+.response-content h3:first-child {
+  margin-top: 0;
+}
+.response-content strong {
+  font-weight: 600;
+  color: #0f172a;
+}
+.response-content ul,
+.response-content ol {
+  margin: 0.875em 0;
+  padding-left: 1.5em;
+}
+.response-content li {
+  margin: 0.375em 0;
+}
+.response-content li::marker {
+  color: #64748b;
+}
+.response-content code {
+  font-family: "JetBrains Mono", ui-monospace, monospace;
+  font-size: 0.875em;
+  background: #f1f5f9;
+  padding: 0.2em 0.4em;
+  border-radius: 4px;
+  color: #334155;
+}
+.response-content pre {
+  font-family: "JetBrains Mono", ui-monospace, monospace;
+  font-size: 13px;
+  background: #1e293b;
+  color: #e2e8f0;
+  padding: 1em 1.25em;
+  border-radius: 8px;
+  overflow-x: auto;
+  margin: 1em 0;
+  line-height: 1.6;
+}
+.response-content pre code {
+  background: none;
+  padding: 0;
+  font-size: inherit;
+  color: inherit;
+}
+.response-content table {
+  width: 100%;
+  border-collapse: collapse;
+  margin: 1em 0;
+  font-size: 14px;
+}
+.response-content th {
+  background: #f8fafc;
+  border: 1px solid #e2e8f0;
+  padding: 0.625em 0.875em;
+  text-align: left;
+  font-weight: 600;
+  color: #475569;
+}
+.response-content td {
+  border: 1px solid #e2e8f0;
+  padding: 0.625em 0.875em;
+  color: #334155;
+}
+.response-content tr:hover td {
+  background: #f8fafc;
+}
+.response-content blockquote {
+  border-left: 3px solid #2c6e49;
+  padding-left: 1em;
+  margin: 1em 0;
+  color: #64748b;
+  font-style: italic;
+}
+.response-content a {
+  color: #2c6e49;
+  text-decoration: underline;
+}
+.response-content a:hover {
+  color: #1a4a2e;
+}
+.response-content hr {
+  border: none;
+  border-top: 1px solid #e2e8f0;
+  margin: 1.5em 0;
+}
diff --git a/docs/src/components/policy-chat.tsx b/docs/src/components/policy-chat.tsx
index 5a687fa..c47ab8f 100644
--- a/docs/src/components/policy-chat.tsx
+++ b/docs/src/components/policy-chat.tsx
@@ -388,6 +388,11 @@ export function PolicyChat() {
       pollIntervalRef.current = null;
     }
 
+    // Build history from completed messages (exclude pending/running ones)
+    const history = messages
+      .filter(m => m.status === "completed" || m.role === "user")
+      .map(m => ({ role: m.role, content: m.content }));
+
     setMessages((prev) => [...prev, { role: "user", content: userMessage }]);
     setMessages((prev) => [
       ...prev,
@@ -398,7 +403,7 @@ export function PolicyChat() {
       const res = await fetch(`${baseUrl}/agent/run`, {
         method: "POST",
         headers: { "Content-Type": "application/json" },
-        body: JSON.stringify({ question: userMessage }),
+        body: JSON.stringify({ question: userMessage, history }),
       });
 
       if (!res.ok) throw new Error(`HTTP ${res.status}`);
@@ -555,7 +560,13 @@ export function PolicyChat() {
                             ? "bg-red-50 border border-red-200"
                             : "bg-white border border-[var(--color-border)]"
                         }`}>
-                          <div className="response-content">
+                          <div
+                            className="response-content font-[family-name:var(--font-inter)]"
+                            style={{
+                              fontSize: '15px',
+                              lineHeight: 1.7,
+                            }}
+                          >
                             <ReactMarkdown remarkPlugins={[remarkBreaks, remarkGfm]}>
                               {message.content}
                             </ReactMarkdown>
@@ -605,7 +616,7 @@ export function PolicyChat() {
         </div>
       </form>
 
-      <style jsx global>{`
+      <style jsx>{`
         @keyframes fadeIn {
           from { opacity: 0; transform: translateY(4px); }
           to { opacity: 1; transform: translateY(0); }
@@ -620,108 +631,6 @@ export function PolicyChat() {
         .animate-slideDown {
           animation: slideDown 0.2s ease-out forwards;
         }
-
-        /* Response content typography */
-        .response-content {
-          font-family: var(--font-sans);
-          font-size: 14px;
-          line-height: 1.6;
-          color: var(--color-text-primary);
-        }
-        .response-content p {
-          margin: 0.75em 0;
-        }
-        .response-content p:first-child {
-          margin-top: 0;
-        }
-        .response-content p:last-child {
-          margin-bottom: 0;
-        }
-        .response-content h1, .response-content h2, .response-content h3 {
-          font-weight: 600;
-          margin-top: 1.25em;
-          margin-bottom: 0.5em;
-          line-height: 1.3;
-        }
-        .response-content h1 { font-size: 1.25em; }
-        .response-content h2 { font-size: 1.1em; }
-        .response-content h3 { font-size: 1em; }
-        .response-content h1:first-child,
-        .response-content h2:first-child,
-        .response-content h3:first-child {
-          margin-top: 0;
-        }
-        .response-content strong {
-          font-weight: 600;
-        }
-        .response-content ul, .response-content ol {
-          margin: 0.75em 0;
-          padding-left: 1.5em;
-        }
-        .response-content li {
-          margin: 0.25em 0;
-        }
-        .response-content code {
-          font-family: var(--font-mono);
-          font-size: 0.9em;
-          background: var(--color-surface-sunken);
-          padding: 0.15em 0.4em;
-          border-radius: 4px;
-        }
-        .response-content pre {
-          font-family: var(--font-mono);
-          font-size: 12px;
-          background: var(--color-code-bg);
-          color: var(--color-code-text);
-          padding: 1em;
-          border-radius: 8px;
-          overflow-x: auto;
-          margin: 1em 0;
-        }
-        .response-content pre code {
-          background: none;
-          padding: 0;
-          font-size: inherit;
-        }
-        .response-content table {
-          width: 100%;
-          border-collapse: collapse;
-          margin: 1em 0;
-          font-size: 13px;
-        }
-        .response-content th {
-          background: var(--color-surface-sunken);
-          border: 1px solid var(--color-border);
-          padding: 0.5em 0.75em;
-          text-align: left;
-          font-weight: 600;
-        }
-        .response-content td {
-          border: 1px solid var(--color-border);
-          padding: 0.5em 0.75em;
-        }
-        .response-content tr:hover td {
-          background: var(--color-surface-sunken);
-        }
-        .response-content blockquote {
-          border-left: 3px solid var(--color-pe-green);
-          padding-left: 1em;
-          margin: 1em 0;
-          color: var(--color-text-secondary);
-          font-style: italic;
-        }
-        .response-content a {
-          color: var(--color-pe-green);
-          text-decoration: underline;
-        }
-        .response-content a:hover {
-          color: var(--color-pe-green-dark);
-        }
-        .response-content hr {
-          border: none;
-          border-top: 1px solid var(--color-border);
-          margin: 1.5em 0;
-        }
       `}</style>
     </div>
   );
diff --git a/src/policyengine_api/agent_sandbox.py b/src/policyengine_api/agent_sandbox.py
index 3987cda..09101dd 100644
--- a/src/policyengine_api/agent_sandbox.py
+++ b/src/policyengine_api/agent_sandbox.py
@@ -341,6 +341,7 @@ def _run_agent_impl(
     question: str,
     api_base_url: str = "https://v2.api.policyengine.org",
     call_id: str = "",
+    history: list[dict] | None = None,
     max_turns: int = 30,
 ) -> dict:
     """Core agent implementation."""
@@ -376,7 +377,13 @@ def log(msg: str) -> None:
     claude_tools.append(SLEEP_TOOL)
 
     client = anthropic.Anthropic()
-    messages = [{"role": "user", "content": question}]
+
+    # Build messages with conversation history
+    messages = []
+    if history:
+        for msg in history:
+            messages.append({"role": msg["role"], "content": msg["content"]})
+    messages.append({"role": "user", "content": question})
 
     final_response = None
     turns = 0
diff --git a/src/policyengine_api/api/agent.py b/src/policyengine_api/api/agent.py
index 7389211..9ea4b8a 100644
--- a/src/policyengine_api/api/agent.py
+++ b/src/policyengine_api/api/agent.py
@@ -19,10 +19,18 @@
 router = APIRouter(prefix="/agent", tags=["agent"])
 
 
+class ConversationMessage(BaseModel):
+    """A message in the conversation history."""
+
+    role: str  # "user" or "assistant"
+    content: str
+
+
 class RunRequest(BaseModel):
     """Request to run the agent."""
 
     question: str
+    history: list[ConversationMessage] = []
 
 
 class RunResponse(BaseModel):
@@ -67,12 +75,18 @@ class StatusResponse(BaseModel):
 _logs: dict[str, list[LogEntry]] = {}
 
 
-def _run_local_agent(call_id: str, question: str, api_base_url: str) -> None:
+def _run_local_agent(
+    call_id: str,
+    question: str,
+    api_base_url: str,
+    history: list[ConversationMessage] | None = None,
+) -> None:
     """Run agent locally in a background thread."""
     from policyengine_api.agent_sandbox import _run_agent_impl
 
     try:
-        result = _run_agent_impl(question, api_base_url, call_id)
+        history_dicts = [{"role": m.role, "content": m.content} for m in (history or [])]
+        result = _run_agent_impl(question, api_base_url, call_id, history_dicts)
         _calls[call_id]["status"] = result.get("status", "completed")
         _calls[call_id]["result"] = result
     except Exception as e:
@@ -139,7 +153,7 @@ async def run_agent(request: RunRequest) -> RunResponse:
         # Run in background using asyncio
         loop = asyncio.get_event_loop()
         loop.run_in_executor(
-            None, _run_local_agent, call_id, request.question, api_base_url
+            None, _run_local_agent, call_id, request.question, api_base_url, request.history
         )
 
     return RunResponse(call_id=call_id, status="running")