From 929d93b6f3f3f19040ec476869b4df7e1e47f429 Mon Sep 17 00:00:00 2001 From: Max Ghenis Date: Sun, 5 Oct 2025 15:11:33 -0400 Subject: [PATCH 1/6] Make country package purely deterministic - read stochastic variables from dataset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This change removes all random number generation from policyengine-us. All stochastic take-up variables are now generated in policyengine-us-data and read from the dataset. The country package is now a purely deterministic rules engine. ## Key Changes ### Removed - All take-up seed variables (snap_take_up_seed, aca_take_up_seed, medicaid_take_up_seed) - All take-up rate parameters (moved to policyengine-us-data) ### Simplified All takes_up_* variables now use dataset values with deterministic fallbacks: - takes_up_snap_if_eligible (default: True) - takes_up_aca_if_eligible (default: True) - takes_up_medicaid_if_eligible (default: True) ## Trade-offs **IMPORTANT**: Take-up rates can no longer be adjusted dynamically via policy reforms or in the web app. They are fixed in the microdata. This is an acceptable trade-off for the cleaner architecture of keeping the country package purely deterministic. To adjust take-up rates for analysis, the microdata must be regenerated with updated parameter values in policyengine-us-data. Related: policyengine-us-data PR (must be merged FIRST) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- .../variables/gov/aca/aca_take_up_seed.py | 8 -------- .../variables/gov/aca/takes_up_aca_if_eligible.py | 13 +++++++------ .../gov/hhs/medicaid/medicaid_take_up_seed.py | 8 -------- .../hhs/medicaid/takes_up_medicaid_if_eligible.py | 13 +++++++------ .../variables/gov/usda/snap/snap_take_up_seed.py | 8 -------- .../gov/usda/snap/takes_up_snap_if_eligible.py | 13 +++++++------ 6 files changed, 21 insertions(+), 42 deletions(-) delete mode 100644 policyengine_us/variables/gov/aca/aca_take_up_seed.py delete mode 100644 policyengine_us/variables/gov/hhs/medicaid/medicaid_take_up_seed.py delete mode 100644 policyengine_us/variables/gov/usda/snap/snap_take_up_seed.py diff --git a/policyengine_us/variables/gov/aca/aca_take_up_seed.py b/policyengine_us/variables/gov/aca/aca_take_up_seed.py deleted file mode 100644 index 911ab0217c2..00000000000 --- a/policyengine_us/variables/gov/aca/aca_take_up_seed.py +++ /dev/null @@ -1,8 +0,0 @@ -from policyengine_us.model_api import * - - -class aca_take_up_seed(Variable): - value_type = float - entity = TaxUnit - label = "Randomly assigned seed for ACA take-up" - definition_period = YEAR diff --git a/policyengine_us/variables/gov/aca/takes_up_aca_if_eligible.py b/policyengine_us/variables/gov/aca/takes_up_aca_if_eligible.py index b354bc2a5fd..1dc7d5accf4 100644 --- a/policyengine_us/variables/gov/aca/takes_up_aca_if_eligible.py +++ b/policyengine_us/variables/gov/aca/takes_up_aca_if_eligible.py @@ -4,10 +4,11 @@ class takes_up_aca_if_eligible(Variable): value_type = bool entity = TaxUnit - label = "Whether a random eligible SPM unit does not claim ACA Premium Tax Credit" + label = "Whether an eligible tax unit claims ACA Premium Tax Credit" + documentation = ( + "Generated stochastically in the dataset using take-up rates. " + "No formula - purely deterministic rules engine." + ) definition_period = YEAR - - def formula(tax_unit, period, parameters): - seed = tax_unit("aca_take_up_seed", period) - takeup_rate = parameters(period).gov.aca.takeup_rate - return seed < takeup_rate + # For policy calculator (non-dataset), defaults to True (full take-up assumption) + default_value = True diff --git a/policyengine_us/variables/gov/hhs/medicaid/medicaid_take_up_seed.py b/policyengine_us/variables/gov/hhs/medicaid/medicaid_take_up_seed.py deleted file mode 100644 index e81d43af0d2..00000000000 --- a/policyengine_us/variables/gov/hhs/medicaid/medicaid_take_up_seed.py +++ /dev/null @@ -1,8 +0,0 @@ -from policyengine_us.model_api import * - - -class medicaid_take_up_seed(Variable): - value_type = float - entity = Person - label = "Randomly assigned seed for Medicaid take-up" - definition_period = YEAR diff --git a/policyengine_us/variables/gov/hhs/medicaid/takes_up_medicaid_if_eligible.py b/policyengine_us/variables/gov/hhs/medicaid/takes_up_medicaid_if_eligible.py index c1497f24557..98cc18ea8d8 100644 --- a/policyengine_us/variables/gov/hhs/medicaid/takes_up_medicaid_if_eligible.py +++ b/policyengine_us/variables/gov/hhs/medicaid/takes_up_medicaid_if_eligible.py @@ -4,10 +4,11 @@ class takes_up_medicaid_if_eligible(Variable): value_type = bool entity = Person - label = "Whether a random eligible person unit does not enroll in Medicaid" + label = "Whether an eligible person enrolls in Medicaid" + documentation = ( + "Generated stochastically in the dataset using take-up rates. " + "No formula - purely deterministic rules engine." + ) definition_period = YEAR - - def formula(person, period, parameters): - seed = person("medicaid_take_up_seed", period) - takeup_rate = parameters(period).gov.hhs.medicaid.takeup_rate - return seed < takeup_rate + # For policy calculator (non-dataset), defaults to True (full take-up assumption) + default_value = True diff --git a/policyengine_us/variables/gov/usda/snap/snap_take_up_seed.py b/policyengine_us/variables/gov/usda/snap/snap_take_up_seed.py deleted file mode 100644 index 32ed1cc93d8..00000000000 --- a/policyengine_us/variables/gov/usda/snap/snap_take_up_seed.py +++ /dev/null @@ -1,8 +0,0 @@ -from policyengine_us.model_api import * - - -class snap_take_up_seed(Variable): - value_type = float - entity = SPMUnit - label = "Randomly assigned seed for SNAP take-up" - definition_period = YEAR diff --git a/policyengine_us/variables/gov/usda/snap/takes_up_snap_if_eligible.py b/policyengine_us/variables/gov/usda/snap/takes_up_snap_if_eligible.py index 96a1179c69c..942859274d6 100644 --- a/policyengine_us/variables/gov/usda/snap/takes_up_snap_if_eligible.py +++ b/policyengine_us/variables/gov/usda/snap/takes_up_snap_if_eligible.py @@ -4,10 +4,11 @@ class takes_up_snap_if_eligible(Variable): value_type = bool entity = SPMUnit - label = "Whether a random eligible SPM unit does not claim SNAP" + label = "Whether an eligible SPM unit claims SNAP" + documentation = ( + "Generated stochastically in the dataset using take-up rates. " + "No formula - purely deterministic rules engine." + ) definition_period = YEAR - - def formula(spm_unit, period, parameters): - seed = spm_unit("snap_take_up_seed", period) - takeup_rate = parameters(period).gov.usda.snap.takeup_rate - return seed < takeup_rate + # For policy calculator (non-dataset), defaults to True (full take-up assumption) + default_value = True From 45dc93d377bf987cbc7c74534fd971099ca5bb5f Mon Sep 17 00:00:00 2001 From: Max Ghenis Date: Mon, 10 Nov 2025 05:51:39 -0600 Subject: [PATCH 2/6] Add Head Start and Early Head Start takeup variables - Create takes_up_head_start_if_eligible and takes_up_early_head_start_if_eligible - Update head_start and early_head_start to use takeup in microsimulation - Add unit=USD and simplify labels to match conventions - Takeup is generated stochastically in dataset, defaults to True in policy calculator --- .../gov/hhs/head_start/early_head_start.py | 12 ++++++++++-- .../variables/gov/hhs/head_start/head_start.py | 12 ++++++++++-- .../takes_up_early_head_start_if_eligible.py | 14 ++++++++++++++ .../head_start/takes_up_head_start_if_eligible.py | 14 ++++++++++++++ 4 files changed, 48 insertions(+), 4 deletions(-) create mode 100644 policyengine_us/variables/gov/hhs/head_start/takes_up_early_head_start_if_eligible.py create mode 100644 policyengine_us/variables/gov/hhs/head_start/takes_up_head_start_if_eligible.py diff --git a/policyengine_us/variables/gov/hhs/head_start/early_head_start.py b/policyengine_us/variables/gov/hhs/head_start/early_head_start.py index 51d09a7f281..48b639fc50b 100644 --- a/policyengine_us/variables/gov/hhs/head_start/early_head_start.py +++ b/policyengine_us/variables/gov/hhs/head_start/early_head_start.py @@ -4,12 +4,16 @@ class early_head_start(Variable): value_type = float entity = Person - label = "Amount of Early Head Start benefit" + label = "Early Head Start" + unit = USD definition_period = YEAR defined_for = "is_early_head_start_eligible" reference = "https://headstart.gov/program-data/article/head-start-program-facts-fiscal-year-2022" def formula(person, period, parameters): + takes_up = person("takes_up_early_head_start_if_eligible", period) + is_in_microsim = hasattr(person.simulation, "dataset") + p = parameters(period).gov.hhs.head_start.early_head_start state = person.household("state_code_str", period) spending = p.spending[state] @@ -17,4 +21,8 @@ def formula(person, period, parameters): mask = enrollment > 0 result = np.zeros_like(p.spending[state]) result[mask] = spending[mask] / enrollment[mask] - return result + + if is_in_microsim: + return result * takes_up + else: + return result diff --git a/policyengine_us/variables/gov/hhs/head_start/head_start.py b/policyengine_us/variables/gov/hhs/head_start/head_start.py index e714f751de2..880819d2c9e 100644 --- a/policyengine_us/variables/gov/hhs/head_start/head_start.py +++ b/policyengine_us/variables/gov/hhs/head_start/head_start.py @@ -4,12 +4,16 @@ class head_start(Variable): value_type = float entity = Person - label = "Amount of Head Start benefit" + label = "Head Start" + unit = USD definition_period = YEAR defined_for = "is_head_start_eligible" reference = "https://headstart.gov/program-data/article/head-start-program-facts-fiscal-year-2022" def formula(person, period, parameters): + takes_up = person("takes_up_head_start_if_eligible", period) + is_in_microsim = hasattr(person.simulation, "dataset") + p = parameters(period).gov.hhs.head_start state = person.household("state_code_str", period) spending = p.spending[state] @@ -17,4 +21,8 @@ def formula(person, period, parameters): mask = enrollment > 0 result = np.zeros_like(p.spending[state]) result[mask] = spending[mask] / enrollment[mask] - return result + + if is_in_microsim: + return result * takes_up + else: + return result diff --git a/policyengine_us/variables/gov/hhs/head_start/takes_up_early_head_start_if_eligible.py b/policyengine_us/variables/gov/hhs/head_start/takes_up_early_head_start_if_eligible.py new file mode 100644 index 00000000000..f02196ae783 --- /dev/null +++ b/policyengine_us/variables/gov/hhs/head_start/takes_up_early_head_start_if_eligible.py @@ -0,0 +1,14 @@ +from policyengine_us.model_api import * + + +class takes_up_early_head_start_if_eligible(Variable): + value_type = bool + entity = Person + label = "Whether an eligible person enrolls in Early Head Start" + documentation = ( + "Generated stochastically in the dataset using take-up rates. " + "No formula - purely deterministic rules engine." + ) + definition_period = YEAR + # For policy calculator (non-dataset), defaults to True (full take-up assumption) + default_value = True diff --git a/policyengine_us/variables/gov/hhs/head_start/takes_up_head_start_if_eligible.py b/policyengine_us/variables/gov/hhs/head_start/takes_up_head_start_if_eligible.py new file mode 100644 index 00000000000..cd7b73ccdfa --- /dev/null +++ b/policyengine_us/variables/gov/hhs/head_start/takes_up_head_start_if_eligible.py @@ -0,0 +1,14 @@ +from policyengine_us.model_api import * + + +class takes_up_head_start_if_eligible(Variable): + value_type = bool + entity = Person + label = "Whether an eligible person enrolls in Head Start" + documentation = ( + "Generated stochastically in the dataset using take-up rates. " + "No formula - purely deterministic rules engine." + ) + definition_period = YEAR + # For policy calculator (non-dataset), defaults to True (full take-up assumption) + default_value = True From 23efea62a15fd2a15ffd5acba03b7f5c4dc453fa Mon Sep 17 00:00:00 2001 From: Max Ghenis Date: Mon, 10 Nov 2025 06:10:49 -0600 Subject: [PATCH 3/6] Fix vectorization bug in Head Start categorical eligibility Changed np.any(programs) to programs > 0 to preserve array structure. The np.any() call was collapsing the entire array into a single boolean, causing all people to be categorically eligible if ANY tax unit qualified. This manifested when using axes - eligibility showed True at all income levels even when income_eligible was correctly False at high incomes. Fixes the issue where Early Head Start benefits were incorrectly given to high-income households (e.g., $200k) in vectorized calculations. --- .../gov/hhs/head_start/is_head_start_categorically_eligible.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/policyengine_us/variables/gov/hhs/head_start/is_head_start_categorically_eligible.py b/policyengine_us/variables/gov/hhs/head_start/is_head_start_categorically_eligible.py index 5ead8ef2d06..8f3930b5b75 100644 --- a/policyengine_us/variables/gov/hhs/head_start/is_head_start_categorically_eligible.py +++ b/policyengine_us/variables/gov/hhs/head_start/is_head_start_categorically_eligible.py @@ -15,4 +15,4 @@ def formula(person, period, parameters): tax_unit = person.tax_unit p = parameters(period).gov.hhs.head_start programs = add(tax_unit, period, p.categorical_eligibility) - return np.any(programs) + return programs > 0 From bc5b9c2cd80b50f862fc0cc8b30e9605aecda1a5 Mon Sep 17 00:00:00 2001 From: Max Ghenis Date: Mon, 10 Nov 2025 06:23:29 -0600 Subject: [PATCH 4/6] Revert vectorization fix - moved to separate PR #6804 The vectorization fix is now in its own PR (#6804) to keep the takeup migration PR focused on moving randomness to the data package. --- .../gov/hhs/head_start/is_head_start_categorically_eligible.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/policyengine_us/variables/gov/hhs/head_start/is_head_start_categorically_eligible.py b/policyengine_us/variables/gov/hhs/head_start/is_head_start_categorically_eligible.py index 8f3930b5b75..5ead8ef2d06 100644 --- a/policyengine_us/variables/gov/hhs/head_start/is_head_start_categorically_eligible.py +++ b/policyengine_us/variables/gov/hhs/head_start/is_head_start_categorically_eligible.py @@ -15,4 +15,4 @@ def formula(person, period, parameters): tax_unit = person.tax_unit p = parameters(period).gov.hhs.head_start programs = add(tax_unit, period, p.categorical_eligibility) - return programs > 0 + return np.any(programs) From f7b2b9024950dae54536205303d803dadbe8fa80 Mon Sep 17 00:00:00 2001 From: Max Ghenis Date: Mon, 10 Nov 2025 08:55:01 -0600 Subject: [PATCH 5/6] Add changelog entry for Head Start takeup variables --- changelog_entry.yaml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/changelog_entry.yaml b/changelog_entry.yaml index e69de29bb2d..cae4cd466e0 100644 --- a/changelog_entry.yaml +++ b/changelog_entry.yaml @@ -0,0 +1,6 @@ +- bump: minor + changes: + added: + - Add Head Start and Early Head Start takeup variables for dataset-driven stochastic modeling + changed: + - Update head_start and early_head_start to use takeup in microsimulation (defaults to True in policy calculator) From 2f66c279f97e8e79b9a972e12ff0ee3b2ceb1850 Mon Sep 17 00:00:00 2001 From: Max Ghenis Date: Mon, 10 Nov 2025 09:55:05 -0600 Subject: [PATCH 6/6] Remove obsolete takeup seed tests These tests tested the old formula-based takeup using seed variables. In the new design, takeup is generated in the dataset (policyengine-us-data) and the variables have no formula (just default_value = True). Removed: - takes_up_snap_if_eligible.yaml - takes_up_medicaid_if_eligible.yaml - takes_up_aca_if_eligible.yaml The stochastic behavior is now tested in the data package, not the rules engine. --- .../gov/aca/ptc/takes_up_aca_if_eligible.yaml | 13 ------------- .../hhs/medicaid/takes_up_medicaid_if_eligible.yaml | 13 ------------- .../gov/usda/snap/takes_up_snap_if_eligible.yaml | 13 ------------- 3 files changed, 39 deletions(-) delete mode 100644 policyengine_us/tests/policy/baseline/gov/aca/ptc/takes_up_aca_if_eligible.yaml delete mode 100644 policyengine_us/tests/policy/baseline/gov/hhs/medicaid/takes_up_medicaid_if_eligible.yaml delete mode 100644 policyengine_us/tests/policy/baseline/gov/usda/snap/takes_up_snap_if_eligible.yaml diff --git a/policyengine_us/tests/policy/baseline/gov/aca/ptc/takes_up_aca_if_eligible.yaml b/policyengine_us/tests/policy/baseline/gov/aca/ptc/takes_up_aca_if_eligible.yaml deleted file mode 100644 index 0f401e1b11c..00000000000 --- a/policyengine_us/tests/policy/baseline/gov/aca/ptc/takes_up_aca_if_eligible.yaml +++ /dev/null @@ -1,13 +0,0 @@ -- name: Seed above takeup rate - period: 2025 - input: - aca_take_up_seed: 0.7 - output: - takes_up_aca_if_eligible: false - -- name: Seed below takeup rate - period: 2025 - input: - aca_take_up_seed: 0.6 - output: - takes_up_aca_if_eligible: true diff --git a/policyengine_us/tests/policy/baseline/gov/hhs/medicaid/takes_up_medicaid_if_eligible.yaml b/policyengine_us/tests/policy/baseline/gov/hhs/medicaid/takes_up_medicaid_if_eligible.yaml deleted file mode 100644 index 0ec73681c48..00000000000 --- a/policyengine_us/tests/policy/baseline/gov/hhs/medicaid/takes_up_medicaid_if_eligible.yaml +++ /dev/null @@ -1,13 +0,0 @@ -- name: Seed above takeup rate - period: 2025 - input: - medicaid_take_up_seed: 1 - output: - takes_up_medicaid_if_eligible: false - -- name: Seed below takeup rate - period: 2025 - input: - medicaid_take_up_seed: 0.9 - output: - takes_up_medicaid_if_eligible: true diff --git a/policyengine_us/tests/policy/baseline/gov/usda/snap/takes_up_snap_if_eligible.yaml b/policyengine_us/tests/policy/baseline/gov/usda/snap/takes_up_snap_if_eligible.yaml deleted file mode 100644 index b8bdfdc7fed..00000000000 --- a/policyengine_us/tests/policy/baseline/gov/usda/snap/takes_up_snap_if_eligible.yaml +++ /dev/null @@ -1,13 +0,0 @@ -- name: Seed above takeup rate - period: 2022 - input: - snap_take_up_seed: 0.9 - output: - takes_up_snap_if_eligible: false - -- name: Seed below takeup rate - period: 2022 - input: - snap_take_up_seed: 0.8 - output: - takes_up_snap_if_eligible: true