From a176b6f34970e43272ad0a341fa8f39e9cb9e74c Mon Sep 17 00:00:00 2001 From: Nikhil Woodruff Date: Tue, 30 Dec 2025 14:08:18 +0000 Subject: [PATCH 1/4] fix: remove non-existent label field from variables search MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Variables don't have a label field, only name and description. The search filter was referencing Variable.label which caused 500 errors when searching with tax_benefit_model_name filter. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- src/policyengine_api/api/variables.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/policyengine_api/api/variables.py b/src/policyengine_api/api/variables.py index a24df44..d660b1b 100644 --- a/src/policyengine_api/api/variables.py +++ b/src/policyengine_api/api/variables.py @@ -54,11 +54,10 @@ def list_variables( if search: # Case-insensitive search using ILIKE + # Note: Variables don't have a label field, only name and description search_pattern = f"%{search}%" - search_filter = ( - Variable.name.ilike(search_pattern) - | Variable.label.ilike(search_pattern) - | Variable.description.ilike(search_pattern) + search_filter = Variable.name.ilike(search_pattern) | Variable.description.ilike( + search_pattern ) query = query.where(search_filter) From a91521f6b8a61c83555c4343d02fd739304d27c3 Mon Sep 17 00:00:00 2001 From: Nikhil Woodruff Date: Tue, 30 Dec 2025 14:08:27 +0000 Subject: [PATCH 2/4] feat: include datasets in lite mode, filter to 2026 only MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Lite mode now seeds FRS 2026 and CPS 2026 datasets instead of skipping all - Added hugging_face_token setting for dataset downloads 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- scripts/seed.py | 30 ++++++++++++++++++------- src/policyengine_api/config/settings.py | 1 + 2 files changed, 23 insertions(+), 8 deletions(-) diff --git a/scripts/seed.py b/scripts/seed.py index 2069108..5ef2ed2 100644 --- a/scripts/seed.py +++ b/scripts/seed.py @@ -363,10 +363,11 @@ def seed_model(model_version, session, lite: bool = False) -> TaxBenefitModelVer return db_version -def seed_datasets(session): +def seed_datasets(session, lite: bool = False): """Seed datasets and upload to S3.""" with logfire.span("seed_datasets"): - console.print("[bold blue]Seeding datasets...") + mode_str = " (lite mode - 2026 only)" if lite else "" + console.print(f"[bold blue]Seeding datasets{mode_str}...") # Get UK and US models uk_model = session.exec( @@ -385,6 +386,14 @@ def seed_datasets(session): # UK datasets console.print(" Creating UK datasets...") uk_datasets = ensure_uk_datasets() + + # In lite mode, only upload FRS 2026 + if lite: + uk_datasets = { + k: v for k, v in uk_datasets.items() if v.year == 2026 and "frs" in k + } + console.print(f" Lite mode: filtered to {len(uk_datasets)} dataset(s)") + uk_created = 0 uk_skipped = 0 @@ -431,6 +440,14 @@ def seed_datasets(session): # US datasets console.print(" Creating US datasets...") us_datasets = ensure_us_datasets() + + # In lite mode, only upload CPS 2026 + if lite: + us_datasets = { + k: v for k, v in us_datasets.items() if v.year == 2026 and "cps" in k + } + console.print(f" Lite mode: filtered to {len(us_datasets)} dataset(s)") + us_created = 0 us_skipped = 0 @@ -602,7 +619,7 @@ def main(): parser.add_argument( "--lite", action="store_true", - help="Lite mode: skip US state parameters for faster local seeding", + help="Lite mode: skip US state parameters, only seed FRS 2026 and CPS 2026 datasets", ) args = parser.parse_args() @@ -619,11 +636,8 @@ def main(): us_version = seed_model(us_latest, session, lite=args.lite) console.print(f"[green]✓[/green] US model seeded: {us_version.id}\n") - # Seed datasets (skip in lite mode - requires cached data) - if not args.lite: - seed_datasets(session) - else: - console.print("[yellow]Skipping datasets (lite mode)[/yellow]\n") + # Seed datasets + seed_datasets(session, lite=args.lite) # Seed example policies seed_example_policies(session) diff --git a/src/policyengine_api/config/settings.py b/src/policyengine_api/config/settings.py index eb93967..76a1ab1 100644 --- a/src/policyengine_api/config/settings.py +++ b/src/policyengine_api/config/settings.py @@ -31,6 +31,7 @@ class Settings(BaseSettings): # Seeding limit_seed_parameters: bool = False + hugging_face_token: str = "" # Agent anthropic_api_key: str = "" From 50e6c93ac1f9f181f53a577a7794be20922182a6 Mon Sep 17 00:00:00 2001 From: Nikhil Woodruff Date: Tue, 30 Dec 2025 14:39:42 +0000 Subject: [PATCH 3/4] Update agent ui --- docs/bun.lock | 33 ++++ docs/package.json | 3 +- docs/src/components/policy-chat.tsx | 259 ++++++++++++++++++++------ scripts/seed.py | 5 +- src/policyengine_api/agent_sandbox.py | 27 ++- 5 files changed, 267 insertions(+), 60 deletions(-) diff --git a/docs/bun.lock b/docs/bun.lock index 8f25c95..8b46639 100644 --- a/docs/bun.lock +++ b/docs/bun.lock @@ -10,6 +10,7 @@ "react-dom": "19.2.1", "react-markdown": "^10.1.0", "remark-breaks": "^4.0.0", + "remark-gfm": "^4.0.1", }, "devDependencies": { "@tailwindcss/postcss": "^4", @@ -694,12 +695,26 @@ "magic-string": ["magic-string@0.30.21", "", { "dependencies": { "@jridgewell/sourcemap-codec": "^1.5.5" } }, "sha512-vd2F4YUyEXKGcLHoq+TEyCjxueSeHnFxyyjNp80yg0XV4vUhnDer/lvvlqM/arB5bXQN5K2/3oinyCRyx8T2CQ=="], + "markdown-table": ["markdown-table@3.0.4", "", {}, "sha512-wiYz4+JrLyb/DqW2hkFJxP7Vd7JuTDm77fvbM8VfEQdmSMqcImWeeRbHwZjBjIFki/VaMK2BhFi7oUUZeM5bqw=="], + "math-intrinsics": ["math-intrinsics@1.1.0", "", {}, "sha512-/IXtbwEk5HTPyEwyKX6hGkYXxM9nbj64B+ilVJnC/R6B0pH5G4V3b0pVbL7DBj4tkhBAppbQUlf6F6Xl9LHu1g=="], "mdast-util-find-and-replace": ["mdast-util-find-and-replace@3.0.2", "", { "dependencies": { "@types/mdast": "^4.0.0", "escape-string-regexp": "^5.0.0", "unist-util-is": "^6.0.0", "unist-util-visit-parents": "^6.0.0" } }, "sha512-Tmd1Vg/m3Xz43afeNxDIhWRtFZgM2VLyaf4vSTYwudTyeuTneoL3qtWMA5jeLyz/O1vDJmmV4QuScFCA2tBPwg=="], "mdast-util-from-markdown": ["mdast-util-from-markdown@2.0.2", "", { "dependencies": { "@types/mdast": "^4.0.0", "@types/unist": "^3.0.0", "decode-named-character-reference": "^1.0.0", "devlop": "^1.0.0", "mdast-util-to-string": "^4.0.0", "micromark": "^4.0.0", "micromark-util-decode-numeric-character-reference": "^2.0.0", "micromark-util-decode-string": "^2.0.0", "micromark-util-normalize-identifier": "^2.0.0", "micromark-util-symbol": "^2.0.0", "micromark-util-types": "^2.0.0", "unist-util-stringify-position": "^4.0.0" } }, "sha512-uZhTV/8NBuw0WHkPTrCqDOl0zVe1BIng5ZtHoDk49ME1qqcjYmmLmOf0gELgcRMxN4w2iuIeVso5/6QymSrgmA=="], + "mdast-util-gfm": ["mdast-util-gfm@3.1.0", "", { "dependencies": { "mdast-util-from-markdown": "^2.0.0", "mdast-util-gfm-autolink-literal": "^2.0.0", "mdast-util-gfm-footnote": "^2.0.0", "mdast-util-gfm-strikethrough": "^2.0.0", "mdast-util-gfm-table": "^2.0.0", "mdast-util-gfm-task-list-item": "^2.0.0", "mdast-util-to-markdown": "^2.0.0" } }, "sha512-0ulfdQOM3ysHhCJ1p06l0b0VKlhU0wuQs3thxZQagjcjPrlFRqY215uZGHHJan9GEAXd9MbfPjFJz+qMkVR6zQ=="], + + "mdast-util-gfm-autolink-literal": ["mdast-util-gfm-autolink-literal@2.0.1", "", { "dependencies": { "@types/mdast": "^4.0.0", "ccount": "^2.0.0", "devlop": "^1.0.0", "mdast-util-find-and-replace": "^3.0.0", "micromark-util-character": "^2.0.0" } }, "sha512-5HVP2MKaP6L+G6YaxPNjuL0BPrq9orG3TsrZ9YXbA3vDw/ACI4MEsnoDpn6ZNm7GnZgtAcONJyPhOP8tNJQavQ=="], + + "mdast-util-gfm-footnote": ["mdast-util-gfm-footnote@2.1.0", "", { "dependencies": { "@types/mdast": "^4.0.0", "devlop": "^1.1.0", "mdast-util-from-markdown": "^2.0.0", "mdast-util-to-markdown": "^2.0.0", "micromark-util-normalize-identifier": "^2.0.0" } }, "sha512-sqpDWlsHn7Ac9GNZQMeUzPQSMzR6Wv0WKRNvQRg0KqHh02fpTz69Qc1QSseNX29bhz1ROIyNyxExfawVKTm1GQ=="], + + "mdast-util-gfm-strikethrough": ["mdast-util-gfm-strikethrough@2.0.0", "", { "dependencies": { "@types/mdast": "^4.0.0", "mdast-util-from-markdown": "^2.0.0", "mdast-util-to-markdown": "^2.0.0" } }, "sha512-mKKb915TF+OC5ptj5bJ7WFRPdYtuHv0yTRxK2tJvi+BDqbkiG7h7u/9SI89nRAYcmap2xHQL9D+QG/6wSrTtXg=="], + + "mdast-util-gfm-table": ["mdast-util-gfm-table@2.0.0", "", { "dependencies": { "@types/mdast": "^4.0.0", "devlop": "^1.0.0", "markdown-table": "^3.0.0", "mdast-util-from-markdown": "^2.0.0", "mdast-util-to-markdown": "^2.0.0" } }, "sha512-78UEvebzz/rJIxLvE7ZtDd/vIQ0RHv+3Mh5DR96p7cS7HsBhYIICDBCu8csTNWNO6tBWfqXPWekRuj2FNOGOZg=="], + + "mdast-util-gfm-task-list-item": ["mdast-util-gfm-task-list-item@2.0.0", "", { "dependencies": { "@types/mdast": "^4.0.0", "devlop": "^1.0.0", "mdast-util-from-markdown": "^2.0.0", "mdast-util-to-markdown": "^2.0.0" } }, "sha512-IrtvNvjxC1o06taBAVJznEnkiHxLFTzgonUdy8hzFVeDun0uTjxxrRGVaNFqkU1wJR3RBPEfsxmU6jDWPofrTQ=="], + "mdast-util-mdx-expression": ["mdast-util-mdx-expression@2.0.1", "", { "dependencies": { "@types/estree-jsx": "^1.0.0", "@types/hast": "^3.0.0", "@types/mdast": "^4.0.0", "devlop": "^1.0.0", "mdast-util-from-markdown": "^2.0.0", "mdast-util-to-markdown": "^2.0.0" } }, "sha512-J6f+9hUp+ldTZqKRSg7Vw5V6MqjATc+3E4gf3CFNcuZNWD8XdyI6zQ8GqH7f8169MM6P7hMBRDVGnn7oHB9kXQ=="], "mdast-util-mdx-jsx": ["mdast-util-mdx-jsx@3.2.0", "", { "dependencies": { "@types/estree-jsx": "^1.0.0", "@types/hast": "^3.0.0", "@types/mdast": "^4.0.0", "@types/unist": "^3.0.0", "ccount": "^2.0.0", "devlop": "^1.1.0", "mdast-util-from-markdown": "^2.0.0", "mdast-util-to-markdown": "^2.0.0", "parse-entities": "^4.0.0", "stringify-entities": "^4.0.0", "unist-util-stringify-position": "^4.0.0", "vfile-message": "^4.0.0" } }, "sha512-lj/z8v0r6ZtsN/cGNNtemmmfoLAFZnjMbNyLzBafjzikOM+glrjNHPlf6lQDOTccj9n5b0PPihEBbhneMyGs1Q=="], @@ -722,6 +737,20 @@ "micromark-core-commonmark": ["micromark-core-commonmark@2.0.3", "", { "dependencies": { "decode-named-character-reference": "^1.0.0", "devlop": "^1.0.0", "micromark-factory-destination": "^2.0.0", "micromark-factory-label": "^2.0.0", "micromark-factory-space": "^2.0.0", "micromark-factory-title": "^2.0.0", "micromark-factory-whitespace": "^2.0.0", "micromark-util-character": "^2.0.0", "micromark-util-chunked": "^2.0.0", "micromark-util-classify-character": "^2.0.0", "micromark-util-html-tag-name": "^2.0.0", "micromark-util-normalize-identifier": "^2.0.0", "micromark-util-resolve-all": "^2.0.0", "micromark-util-subtokenize": "^2.0.0", "micromark-util-symbol": "^2.0.0", "micromark-util-types": "^2.0.0" } }, "sha512-RDBrHEMSxVFLg6xvnXmb1Ayr2WzLAWjeSATAoxwKYJV94TeNavgoIdA0a9ytzDSVzBy2YKFK+emCPOEibLeCrg=="], + "micromark-extension-gfm": ["micromark-extension-gfm@3.0.0", "", { "dependencies": { "micromark-extension-gfm-autolink-literal": "^2.0.0", "micromark-extension-gfm-footnote": "^2.0.0", "micromark-extension-gfm-strikethrough": "^2.0.0", "micromark-extension-gfm-table": "^2.0.0", "micromark-extension-gfm-tagfilter": "^2.0.0", "micromark-extension-gfm-task-list-item": "^2.0.0", "micromark-util-combine-extensions": "^2.0.0", "micromark-util-types": "^2.0.0" } }, "sha512-vsKArQsicm7t0z2GugkCKtZehqUm31oeGBV/KVSorWSy8ZlNAv7ytjFhvaryUiCUJYqs+NoE6AFhpQvBTM6Q4w=="], + + "micromark-extension-gfm-autolink-literal": ["micromark-extension-gfm-autolink-literal@2.1.0", "", { "dependencies": { "micromark-util-character": "^2.0.0", "micromark-util-sanitize-uri": "^2.0.0", "micromark-util-symbol": "^2.0.0", "micromark-util-types": "^2.0.0" } }, "sha512-oOg7knzhicgQ3t4QCjCWgTmfNhvQbDDnJeVu9v81r7NltNCVmhPy1fJRX27pISafdjL+SVc4d3l48Gb6pbRypw=="], + + "micromark-extension-gfm-footnote": ["micromark-extension-gfm-footnote@2.1.0", "", { "dependencies": { "devlop": "^1.0.0", "micromark-core-commonmark": "^2.0.0", "micromark-factory-space": "^2.0.0", "micromark-util-character": "^2.0.0", "micromark-util-normalize-identifier": "^2.0.0", "micromark-util-sanitize-uri": "^2.0.0", "micromark-util-symbol": "^2.0.0", "micromark-util-types": "^2.0.0" } }, "sha512-/yPhxI1ntnDNsiHtzLKYnE3vf9JZ6cAisqVDauhp4CEHxlb4uoOTxOCJ+9s51bIB8U1N1FJ1RXOKTIlD5B/gqw=="], + + "micromark-extension-gfm-strikethrough": ["micromark-extension-gfm-strikethrough@2.1.0", "", { "dependencies": { "devlop": "^1.0.0", "micromark-util-chunked": "^2.0.0", "micromark-util-classify-character": "^2.0.0", "micromark-util-resolve-all": "^2.0.0", "micromark-util-symbol": "^2.0.0", "micromark-util-types": "^2.0.0" } }, "sha512-ADVjpOOkjz1hhkZLlBiYA9cR2Anf8F4HqZUO6e5eDcPQd0Txw5fxLzzxnEkSkfnD0wziSGiv7sYhk/ktvbf1uw=="], + + "micromark-extension-gfm-table": ["micromark-extension-gfm-table@2.1.1", "", { "dependencies": { "devlop": "^1.0.0", "micromark-factory-space": "^2.0.0", "micromark-util-character": "^2.0.0", "micromark-util-symbol": "^2.0.0", "micromark-util-types": "^2.0.0" } }, "sha512-t2OU/dXXioARrC6yWfJ4hqB7rct14e8f7m0cbI5hUmDyyIlwv5vEtooptH8INkbLzOatzKuVbQmAYcbWoyz6Dg=="], + + "micromark-extension-gfm-tagfilter": ["micromark-extension-gfm-tagfilter@2.0.0", "", { "dependencies": { "micromark-util-types": "^2.0.0" } }, "sha512-xHlTOmuCSotIA8TW1mDIM6X2O1SiX5P9IuDtqGonFhEK0qgRI4yeC6vMxEV2dgyr2TiD+2PQ10o+cOhdVAcwfg=="], + + "micromark-extension-gfm-task-list-item": ["micromark-extension-gfm-task-list-item@2.1.0", "", { "dependencies": { "devlop": "^1.0.0", "micromark-factory-space": "^2.0.0", "micromark-util-character": "^2.0.0", "micromark-util-symbol": "^2.0.0", "micromark-util-types": "^2.0.0" } }, "sha512-qIBZhqxqI6fjLDYFTBIa4eivDMnP+OZqsNwmQ3xNLE4Cxwc+zfQEfbs6tzAo2Hjq+bh6q5F+Z8/cksrLFYWQQw=="], + "micromark-factory-destination": ["micromark-factory-destination@2.0.1", "", { "dependencies": { "micromark-util-character": "^2.0.0", "micromark-util-symbol": "^2.0.0", "micromark-util-types": "^2.0.0" } }, "sha512-Xe6rDdJlkmbFRExpTOmRj9N3MaWmbAgdpSrBQvCFqhezUn4AHqJHbaEnfbVYYiexVSs//tqOdY/DxhjdCiJnIA=="], "micromark-factory-label": ["micromark-factory-label@2.0.1", "", { "dependencies": { "devlop": "^1.0.0", "micromark-util-character": "^2.0.0", "micromark-util-symbol": "^2.0.0", "micromark-util-types": "^2.0.0" } }, "sha512-VFMekyQExqIW7xIChcXn4ok29YE3rnuyveW3wZQWWqF4Nv9Wk5rgJ99KzPvHjkmPXF93FXIbBp6YdW3t71/7Vg=="], @@ -844,10 +873,14 @@ "remark-breaks": ["remark-breaks@4.0.0", "", { "dependencies": { "@types/mdast": "^4.0.0", "mdast-util-newline-to-break": "^2.0.0", "unified": "^11.0.0" } }, "sha512-IjEjJOkH4FuJvHZVIW0QCDWxcG96kCq7An/KVH2NfJe6rKZU2AsHeB3OEjPNRxi4QC34Xdx7I2KGYn6IpT7gxQ=="], + "remark-gfm": ["remark-gfm@4.0.1", "", { "dependencies": { "@types/mdast": "^4.0.0", "mdast-util-gfm": "^3.0.0", "micromark-extension-gfm": "^3.0.0", "remark-parse": "^11.0.0", "remark-stringify": "^11.0.0", "unified": "^11.0.0" } }, "sha512-1quofZ2RQ9EWdeN34S79+KExV1764+wCUGop5CPL1WGdD0ocPpu91lzPGbwWMECpEpd42kJGQwzRfyov9j4yNg=="], + "remark-parse": ["remark-parse@11.0.0", "", { "dependencies": { "@types/mdast": "^4.0.0", "mdast-util-from-markdown": "^2.0.0", "micromark-util-types": "^2.0.0", "unified": "^11.0.0" } }, "sha512-FCxlKLNGknS5ba/1lmpYijMUzX2esxW5xQqjWxw2eHFfS2MSdaHVINFmhjo+qN1WhZhNimq0dZATN9pH0IDrpA=="], "remark-rehype": ["remark-rehype@11.1.2", "", { "dependencies": { "@types/hast": "^3.0.0", "@types/mdast": "^4.0.0", "mdast-util-to-hast": "^13.0.0", "unified": "^11.0.0", "vfile": "^6.0.0" } }, "sha512-Dh7l57ianaEoIpzbp0PC9UKAdCSVklD8E5Rpw7ETfbTl3FqcOOgq5q2LVDhgGCkaBv7p24JXikPdvhhmHvKMsw=="], + "remark-stringify": ["remark-stringify@11.0.0", "", { "dependencies": { "@types/mdast": "^4.0.0", "mdast-util-to-markdown": "^2.0.0", "unified": "^11.0.0" } }, "sha512-1OSmLd3awB/t8qdoEOMazZkNsfVTeY4fTsgzcQFdXNq8ToTN4ZGwrMnlda4K6smTFKD+GRV6O48i6Z4iKgPPpw=="], + "resolve": ["resolve@1.22.11", "", { "dependencies": { "is-core-module": "^2.16.1", "path-parse": "^1.0.7", "supports-preserve-symlinks-flag": "^1.0.0" }, "bin": { "resolve": "bin/resolve" } }, "sha512-RfqAvLnMl313r7c9oclB1HhUEAezcpLjz95wFH4LVuhk9JF/r22qmVP9AMmOU4vMX7Q8pN8jwNg/CSpdFnMjTQ=="], "resolve-from": ["resolve-from@4.0.0", "", {}, "sha512-pb/MYmXstAkysRFx8piNI1tGFNQIFA3vkE3Gq4EuA1dF6gHp/+vgZqsCGJapvy8N3Q+4o7FwvquPJcnZ7RYy4g=="], diff --git a/docs/package.json b/docs/package.json index 253ad06..4bafb7f 100644 --- a/docs/package.json +++ b/docs/package.json @@ -13,7 +13,8 @@ "react": "19.2.1", "react-dom": "19.2.1", "react-markdown": "^10.1.0", - "remark-breaks": "^4.0.0" + "remark-breaks": "^4.0.0", + "remark-gfm": "^4.0.1" }, "devDependencies": { "@tailwindcss/postcss": "^4", diff --git a/docs/src/components/policy-chat.tsx b/docs/src/components/policy-chat.tsx index 5a5b101..5a687fa 100644 --- a/docs/src/components/policy-chat.tsx +++ b/docs/src/components/policy-chat.tsx @@ -3,12 +3,14 @@ import { useState, useRef, useEffect, useMemo } from "react"; import ReactMarkdown from "react-markdown"; import remarkBreaks from "remark-breaks"; +import remarkGfm from "remark-gfm"; import { useApi } from "./api-context"; interface Message { role: "user" | "assistant"; content: string; status?: "pending" | "running" | "completed" | "failed"; + steps?: ParsedStep[]; } interface LogEntry { @@ -60,13 +62,46 @@ function parseLogEntry(message: string): ParsedStep { } catch { // Not valid JSON } - // Clean up tool name for display - const displayName = toolName - .replace(/_/g, " ") - .replace(/parameters get$/, "") - .replace(/parameters post$/, "") - .replace(/household calculate post$/, "Calculate household") - .replace(/list /g, "Search "); + // Map tool names to human-readable labels + const toolNameMap: Record = { + // Parameters + "list_parameters_parameters__get": "Search parameters", + "get_parameter_parameters__parameter_id__get": "Get parameter", + "list_parameter_values_parameter_values__get": "Get parameter values", + "get_parameter_value_parameter_values__parameter_value_id__get": "Get parameter value", + // Variables + "list_variables_variables__get": "Search variables", + "get_variable_variables__variable_id__get": "Get variable", + // Policies + "create_policy_policies__post": "Create policy", + "get_policy_policies__policy_id__get": "Get policy", + "list_policies_policies__get": "List policies", + // Household + "calculate_household_household_calculate_post": "Calculate household", + "get_household_job_status_household_calculate__job_id__get": "Poll household job", + // Household impact + "calculate_household_impact_comparison_household_impact_post": "Calculate household impact", + "get_household_impact_job_status_household_impact__job_id__get": "Poll household impact", + // Economic impact + "economic_impact_analysis_economic_impact_post": "Run economic analysis", + "get_economic_impact_status_analysis_economic_impact__report_id__get": "Poll economic analysis", + // Datasets + "list_datasets_datasets__get": "List datasets", + "get_dataset_datasets__dataset_id__get": "Get dataset", + // Models + "list_tax_benefit_models_tax_benefit_models__get": "List models", + "get_tax_benefit_model_tax_benefit_models__model_id__get": "Get model", + // Simulations + "list_simulations_simulations__get": "List simulations", + "get_simulation_simulations__simulation_id__get": "Get simulation", + // Utility + "sleep": "Wait", + }; + const displayName = toolNameMap[toolName] || toolName + .replace(/_+/g, " ") + .replace(/\s+(get|post|put|delete)$/i, "") + .replace(/\s+/g, " ") + .trim(); return { type: "tool_use", title: displayName, @@ -145,7 +180,7 @@ function ToolCard({ step }: { step: ParsedStep }) { const [isExpanded, setIsExpanded] = useState(false); if (step.type === "agent") { - return null; // Hide agent messages, they're redundant with progress indicator + return null; } if (step.type === "tool_use") { @@ -153,13 +188,13 @@ function ToolCard({ step }: { step: ParsedStep }) {
{isExpanded && ( -
-
{step.content}
+
+
{step.content}
)}
@@ -213,7 +247,7 @@ function ToolCard({ step }: { step: ParsedStep }) { if (step.type === "assistant") { return (
-

{step.content}

+

{step.content}

); } @@ -229,22 +263,29 @@ function ProgressIndicator({ logs }: { logs: LogEntry[] }) { const hasHousehold = logs.some(l => l.message.includes("household")); const isComplete = logs.some(l => l.message.includes("Completed")); - if (isComplete) return "Complete"; - if (hasAnalysis) return "Running analysis..."; - if (hasPolicy) return "Creating policy..."; - if (hasHousehold) return "Calculating..."; - if (hasSearch) return "Searching parameters..."; - return "Starting..."; + if (isComplete) return "complete"; + if (hasAnalysis) return "running analysis..."; + if (hasPolicy) return "creating policy..."; + if (hasHousehold) return "calculating..."; + if (hasSearch) return "searching parameters..."; + return "starting..."; }, [logs]); if (logs.length === 0) return null; return ( -
- {stage !== "Complete" && ( -
+
+ {stage !== "complete" && ( +
)} - {stage} + {stage === "complete" && ( +
+ + + +
+ )} + {stage}
); } @@ -308,6 +349,11 @@ export function PolicyChat() { : "Analysis failed. Please try again."; } + // Parse and store steps with the message so they persist + const finalSteps = (data.logs || []) + .map((log: LogEntry) => parseLogEntry(log.message)) + .filter((step: ParsedStep) => step.type !== "unknown"); + setMessages((prev) => { const newMessages = [...prev]; const lastIndex = newMessages.length - 1; @@ -316,6 +362,7 @@ export function PolicyChat() { ...newMessages[lastIndex], content: finalContent, status: data.status, + steps: finalSteps, }; } return newMessages; @@ -396,9 +443,9 @@ export function PolicyChat() { const exampleQuestions = [ "What is the UK personal allowance for 2026?", - "Calculate tax for someone earning £50,000 in the UK", - "What would happen if we increased child benefit by 10%?", - "What benefits would a single parent with two children receive?", + "Calculate tax for someone earning £50,000", + "What if we increased child benefit by 10%?", + "What benefits would a single parent receive?", ]; return ( @@ -443,7 +490,7 @@ export function PolicyChat() { @@ -457,24 +504,24 @@ export function PolicyChat() { {message.role === "user" ? (
-

{message.content}

+

{message.content}

) : (
{/* Running state with live steps */} {(message.status === "pending" || message.status === "running") && ( -
+
{message.status === "pending" ? ( -
-
- Starting analysis... +
+
+ starting...
) : (
- {parsedSteps.slice(-10).map((step, j) => ( + {parsedSteps.slice(-12).map((step, j) => ( ))}
@@ -484,18 +531,18 @@ export function PolicyChat() { {/* Completed/failed state */} {(message.status === "completed" || message.status === "failed") && ( -
+
{/* Collapsible steps summary */} - {parsedSteps.length > 0 && ( + {message.steps && message.steps.length > 0 && (
- - + + - {parsedSteps.filter(s => s.type === "tool_use").length} tool calls executed + {message.steps.filter(s => s.type === "tool_use").length} tool calls -
- {parsedSteps.map((step, j) => ( +
+ {message.steps.map((step, j) => ( ))}
@@ -503,13 +550,13 @@ export function PolicyChat() { )} {/* Final response */} -
-
- +
+ {message.content}
@@ -534,12 +581,12 @@ export function PolicyChat() { onChange={(e) => setInput(e.target.value)} placeholder="Ask a policy question..." disabled={isLoading} - className="flex-1 px-4 py-3 text-sm font-mono border border-[var(--color-border)] rounded-xl bg-white focus:outline-none focus:ring-2 focus:ring-[var(--color-pe-green)] focus:border-transparent disabled:opacity-50 placeholder:text-[var(--color-text-muted)]" + className="flex-1 px-4 py-2.5 text-[13px] font-mono border border-[var(--color-border)] rounded-lg bg-white focus:outline-none focus:ring-2 focus:ring-[var(--color-pe-green)] focus:border-transparent disabled:opacity-50 placeholder:text-[var(--color-text-muted)]" />
-
); diff --git a/scripts/seed.py b/scripts/seed.py index 5ef2ed2..f3fbfa8 100644 --- a/scripts/seed.py +++ b/scripts/seed.py @@ -385,7 +385,8 @@ def seed_datasets(session, lite: bool = False): # UK datasets console.print(" Creating UK datasets...") - uk_datasets = ensure_uk_datasets() + data_folder = str(Path(__file__).parent.parent / "data") + uk_datasets = ensure_uk_datasets(data_folder=data_folder) # In lite mode, only upload FRS 2026 if lite: @@ -439,7 +440,7 @@ def seed_datasets(session, lite: bool = False): # US datasets console.print(" Creating US datasets...") - us_datasets = ensure_us_datasets() + us_datasets = ensure_us_datasets(data_folder=data_folder) # In lite mode, only upload CPS 2026 if lite: diff --git a/src/policyengine_api/agent_sandbox.py b/src/policyengine_api/agent_sandbox.py index 411d2f2..3987cda 100644 --- a/src/policyengine_api/agent_sandbox.py +++ b/src/policyengine_api/agent_sandbox.py @@ -45,11 +45,34 @@ - POST /analysis/economic-impact with tax_benefit_model_name, policy_id and dataset_id - GET /analysis/economic-impact/{report_id} for results (includes decile_impacts and program_statistics) +## Response formatting + +Follow PolicyEngine's writing style: + +1. **Active voice**: "The reform reduces poverty by 3.2%" not "Poverty is reduced by 3.2%" +2. **Quantitative precision**: Use specific numbers, avoid vague words like "significantly" or "substantially" +3. **Neutral tone**: Describe what policies do, not whether they're good or bad +4. **Tables for data**: Present breakdowns and comparisons in markdown tables + +Example response format: +| Item | Amount | +|------|--------| +| Income tax | £7,486 | +| National Insurance | £2,994 | +| **Total tax** | **£10,480** | + +- Gross income: £50,000 +- Net income: £39,520 +- Effective tax rate: 21.0% + +Avoid: "significantly reduces", "substantial savings", "unfortunately", "great news" +Prefer: specific percentages, pound/dollar amounts, neutral descriptions + ## Guidelines 1. Use the API tools to get accurate, current data -2. Be concise but thorough -3. For UK, amounts are in GBP. For US, amounts are in USD. +2. Be concise - lead with key numbers +3. For UK, amounts are in GBP (£). For US, amounts are in USD ($) 4. When polling async endpoints, use the sleep tool to wait 5-10 seconds between requests """ From b277187f3020b06bc2b5ec984c33b00e6f0cd208 Mon Sep 17 00:00:00 2001 From: Nikhil Woodruff Date: Tue, 30 Dec 2025 14:45:00 +0000 Subject: [PATCH 4/4] feat: improve agent chat UX and add conversation history - Fix tool name display with proper mapping (e.g. "Calculate household") - Add conversation history support for multi-turn chat - Move response styling to global CSS with Inter font - Add PolicyEngine style guide to agent prompt (tables, active voice) - Persist tool steps per message so they don't disappear --- docs/src/app/globals.css | 122 ++++++++++++++++++++++++++ docs/src/components/policy-chat.tsx | 119 +++---------------------- src/policyengine_api/agent_sandbox.py | 9 +- src/policyengine_api/api/agent.py | 20 ++++- 4 files changed, 161 insertions(+), 109 deletions(-) diff --git a/docs/src/app/globals.css b/docs/src/app/globals.css index 8c00e5f..68ea6b7 100644 --- a/docs/src/app/globals.css +++ b/docs/src/app/globals.css @@ -100,3 +100,125 @@ code { background: var(--color-pe-green); color: white; } + +/* Agent response content */ +.response-content { + font-family: var(--font-inter), -apple-system, BlinkMacSystemFont, "Segoe UI", sans-serif; + font-size: 15px; + line-height: 1.7; + color: #1e293b; + -webkit-font-smoothing: antialiased; +} +.response-content p { + margin: 0.875em 0; +} +.response-content p:first-child { + margin-top: 0; +} +.response-content p:last-child { + margin-bottom: 0; +} +.response-content h1, +.response-content h2, +.response-content h3 { + font-weight: 600; + color: #0f172a; + margin-top: 1.5em; + margin-bottom: 0.5em; + line-height: 1.3; +} +.response-content h1 { + font-size: 1.375em; +} +.response-content h2 { + font-size: 1.125em; +} +.response-content h3 { + font-size: 1em; +} +.response-content h1:first-child, +.response-content h2:first-child, +.response-content h3:first-child { + margin-top: 0; +} +.response-content strong { + font-weight: 600; + color: #0f172a; +} +.response-content ul, +.response-content ol { + margin: 0.875em 0; + padding-left: 1.5em; +} +.response-content li { + margin: 0.375em 0; +} +.response-content li::marker { + color: #64748b; +} +.response-content code { + font-family: "JetBrains Mono", ui-monospace, monospace; + font-size: 0.875em; + background: #f1f5f9; + padding: 0.2em 0.4em; + border-radius: 4px; + color: #334155; +} +.response-content pre { + font-family: "JetBrains Mono", ui-monospace, monospace; + font-size: 13px; + background: #1e293b; + color: #e2e8f0; + padding: 1em 1.25em; + border-radius: 8px; + overflow-x: auto; + margin: 1em 0; + line-height: 1.6; +} +.response-content pre code { + background: none; + padding: 0; + font-size: inherit; + color: inherit; +} +.response-content table { + width: 100%; + border-collapse: collapse; + margin: 1em 0; + font-size: 14px; +} +.response-content th { + background: #f8fafc; + border: 1px solid #e2e8f0; + padding: 0.625em 0.875em; + text-align: left; + font-weight: 600; + color: #475569; +} +.response-content td { + border: 1px solid #e2e8f0; + padding: 0.625em 0.875em; + color: #334155; +} +.response-content tr:hover td { + background: #f8fafc; +} +.response-content blockquote { + border-left: 3px solid #2c6e49; + padding-left: 1em; + margin: 1em 0; + color: #64748b; + font-style: italic; +} +.response-content a { + color: #2c6e49; + text-decoration: underline; +} +.response-content a:hover { + color: #1a4a2e; +} +.response-content hr { + border: none; + border-top: 1px solid #e2e8f0; + margin: 1.5em 0; +} diff --git a/docs/src/components/policy-chat.tsx b/docs/src/components/policy-chat.tsx index 5a687fa..c47ab8f 100644 --- a/docs/src/components/policy-chat.tsx +++ b/docs/src/components/policy-chat.tsx @@ -388,6 +388,11 @@ export function PolicyChat() { pollIntervalRef.current = null; } + // Build history from completed messages (exclude pending/running ones) + const history = messages + .filter(m => m.status === "completed" || m.role === "user") + .map(m => ({ role: m.role, content: m.content })); + setMessages((prev) => [...prev, { role: "user", content: userMessage }]); setMessages((prev) => [ ...prev, @@ -398,7 +403,7 @@ export function PolicyChat() { const res = await fetch(`${baseUrl}/agent/run`, { method: "POST", headers: { "Content-Type": "application/json" }, - body: JSON.stringify({ question: userMessage }), + body: JSON.stringify({ question: userMessage, history }), }); if (!res.ok) throw new Error(`HTTP ${res.status}`); @@ -555,7 +560,13 @@ export function PolicyChat() { ? "bg-red-50 border border-red-200" : "bg-white border border-[var(--color-border)]" }`}> -
+
{message.content} @@ -605,7 +616,7 @@ export function PolicyChat() {
-
); diff --git a/src/policyengine_api/agent_sandbox.py b/src/policyengine_api/agent_sandbox.py index 3987cda..09101dd 100644 --- a/src/policyengine_api/agent_sandbox.py +++ b/src/policyengine_api/agent_sandbox.py @@ -341,6 +341,7 @@ def _run_agent_impl( question: str, api_base_url: str = "https://v2.api.policyengine.org", call_id: str = "", + history: list[dict] | None = None, max_turns: int = 30, ) -> dict: """Core agent implementation.""" @@ -376,7 +377,13 @@ def log(msg: str) -> None: claude_tools.append(SLEEP_TOOL) client = anthropic.Anthropic() - messages = [{"role": "user", "content": question}] + + # Build messages with conversation history + messages = [] + if history: + for msg in history: + messages.append({"role": msg["role"], "content": msg["content"]}) + messages.append({"role": "user", "content": question}) final_response = None turns = 0 diff --git a/src/policyengine_api/api/agent.py b/src/policyengine_api/api/agent.py index 7389211..9ea4b8a 100644 --- a/src/policyengine_api/api/agent.py +++ b/src/policyengine_api/api/agent.py @@ -19,10 +19,18 @@ router = APIRouter(prefix="/agent", tags=["agent"]) +class ConversationMessage(BaseModel): + """A message in the conversation history.""" + + role: str # "user" or "assistant" + content: str + + class RunRequest(BaseModel): """Request to run the agent.""" question: str + history: list[ConversationMessage] = [] class RunResponse(BaseModel): @@ -67,12 +75,18 @@ class StatusResponse(BaseModel): _logs: dict[str, list[LogEntry]] = {} -def _run_local_agent(call_id: str, question: str, api_base_url: str) -> None: +def _run_local_agent( + call_id: str, + question: str, + api_base_url: str, + history: list[ConversationMessage] | None = None, +) -> None: """Run agent locally in a background thread.""" from policyengine_api.agent_sandbox import _run_agent_impl try: - result = _run_agent_impl(question, api_base_url, call_id) + history_dicts = [{"role": m.role, "content": m.content} for m in (history or [])] + result = _run_agent_impl(question, api_base_url, call_id, history_dicts) _calls[call_id]["status"] = result.get("status", "completed") _calls[call_id]["result"] = result except Exception as e: @@ -139,7 +153,7 @@ async def run_agent(request: RunRequest) -> RunResponse: # Run in background using asyncio loop = asyncio.get_event_loop() loop.run_in_executor( - None, _run_local_agent, call_id, request.question, api_base_url + None, _run_local_agent, call_id, request.question, api_base_url, request.history ) return RunResponse(call_id=call_id, status="running")