From e6a0ac506376158e359b6060e218b88a8efce9cc Mon Sep 17 00:00:00 2001 From: David Trimmer Date: Tue, 18 Nov 2025 19:09:45 -0500 Subject: [PATCH 1/5] PA EITC Fixes #95 --- .../Congressional-Hackathon-2025 | 1 + us/states/pa/data_exploration.ipynb | 397 ++++++++++++++++ us/states/pa/pa_dataset_summary_weighted.csv | 14 + us/states/pa/pa_eitc_reform_analysis.ipynb | 445 ++++++++++++++++++ us/states/pa/pa_eitc_reform_results.csv | 2 + 5 files changed, 859 insertions(+) create mode 160000 obbba_district_impacts/Congressional-Hackathon-2025 create mode 100644 us/states/pa/data_exploration.ipynb create mode 100644 us/states/pa/pa_dataset_summary_weighted.csv create mode 100644 us/states/pa/pa_eitc_reform_analysis.ipynb create mode 100644 us/states/pa/pa_eitc_reform_results.csv diff --git a/obbba_district_impacts/Congressional-Hackathon-2025 b/obbba_district_impacts/Congressional-Hackathon-2025 new file mode 160000 index 0000000..3f6d05e --- /dev/null +++ b/obbba_district_impacts/Congressional-Hackathon-2025 @@ -0,0 +1 @@ +Subproject commit 3f6d05e76400c6e396a3a4eddd34a7b3f6919fc3 diff --git a/us/states/pa/data_exploration.ipynb b/us/states/pa/data_exploration.ipynb new file mode 100644 index 0000000..843c424 --- /dev/null +++ b/us/states/pa/data_exploration.ipynb @@ -0,0 +1,397 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# PA Dataset Exploration\n", + "\n", + "This notebook explores the Pennsylvania (PA) dataset to understand household counts, income distribution, and demographic characteristics." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "c:\\Users\\dtsax\\envs\\pe\\Lib\\site-packages\\tqdm\\auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + " from .autonotebook import tqdm as notebook_tqdm\n" + ] + } + ], + "source": [ + "from policyengine_us import Microsimulation\n", + "import pandas as pd\n", + "import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`\n" + ] + } + ], + "source": [ + "# Load PA dataset\n", + "sim = Microsimulation(dataset='hf://policyengine/test/PA.h5')" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Number of households in dataset: 20,180\n", + "Household count (weighted): 4,435,467\n", + "Person count (weighted): 12,863,313\n" + ] + } + ], + "source": [ + "# Check dataset size\n", + "household_weight = sim.calculate(\"household_weight\", period=2025)\n", + "household_count = sim.calculate(\"household_count\", period=2025, map_to=\"household\")\n", + "person_count = sim.calculate(\"person_count\", period=2025, map_to=\"household\")\n", + "\n", + "print(f\"Number of households in dataset: {len(household_weight):,}\")\n", + "print(f\"Household count (weighted): {household_count.sum():,.0f}\")\n", + "print(f\"Person count (weighted): {person_count.sum():,.0f}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Income distribution:\n", + " Median AGI: $71,734\n", + " 75th percentile: $149,456\n", + " 90th percentile: $268,015\n", + " 95th percentile: $379,910\n", + " Max AGI: $1,838,621\n" + ] + } + ], + "source": [ + "# Check household income distribution\n", + "agi = sim.calculate(\"adjusted_gross_income\", period=2025, map_to=\"household\")\n", + "print(f\"Income distribution:\")\n", + "print(f\" Median AGI: ${agi.median():,.0f}\")\n", + "print(f\" 75th percentile: ${agi.quantile(0.75):,.0f}\")\n", + "print(f\" 90th percentile: ${agi.quantile(0.90):,.0f}\")\n", + "print(f\" 95th percentile: ${agi.quantile(0.95):,.0f}\")\n", + "print(f\" Max AGI: ${agi.max():,.0f}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Households with children (weighted):\n", + " Total households with children: 1,457,610\n", + " Households with 1 child: 734,446\n", + " Households with 2 children: 481,892\n", + " Households with 3+ children: 241,273\n" + ] + } + ], + "source": [ + "# Check households with children\n", + "is_child = sim.calculate(\"is_child\", period=2025, map_to=\"person\")\n", + "household_id = sim.calculate(\"household_id\", period=2025, map_to=\"person\")\n", + "household_weight = sim.calculate(\"household_weight\", period=2025, map_to=\"person\")\n", + "\n", + "# Create DataFrame\n", + "df_households = pd.DataFrame({\n", + " 'household_id': household_id,\n", + " 'is_child': is_child,\n", + " 'household_weight': household_weight\n", + "})\n", + "\n", + "# Count children per household\n", + "children_per_household = df_households.groupby('household_id').agg({\n", + " 'is_child': 'sum',\n", + " 'household_weight': 'first'\n", + "}).reset_index()\n", + "\n", + "# Calculate weighted household counts\n", + "total_households_with_children = children_per_household[children_per_household['is_child'] > 0]['household_weight'].sum()\n", + "households_with_1_child = children_per_household[children_per_household['is_child'] == 1]['household_weight'].sum()\n", + "households_with_2_children = children_per_household[children_per_household['is_child'] == 2]['household_weight'].sum()\n", + "households_with_3plus_children = children_per_household[children_per_household['is_child'] >= 3]['household_weight'].sum()\n", + "\n", + "print(f\"\\nHouseholds with children (weighted):\")\n", + "print(f\" Total households with children: {total_households_with_children:,.0f}\")\n", + "print(f\" Households with 1 child: {households_with_1_child:,.0f}\")\n", + "print(f\" Households with 2 children: {households_with_2_children:,.0f}\")\n", + "print(f\" Households with 3+ children: {households_with_3plus_children:,.0f}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Children by age:\n", + " Total children under 18: 2,494,202\n", + " Children under 6: 780,623\n" + ] + } + ], + "source": [ + "# Check children by age groups\n", + "df = pd.DataFrame({\n", + " \"household_id\": sim.calculate(\"household_id\", map_to=\"person\"),\n", + " \"tax_unit_id\": sim.calculate(\"tax_unit_id\", map_to=\"person\"),\n", + " \"person_id\": sim.calculate(\"person_id\", map_to=\"person\"),\n", + " \"age\": sim.calculate(\"age\", map_to=\"person\"),\n", + " \"person_weight\": sim.calculate(\"person_weight\", map_to=\"person\")\n", + "})\n", + "\n", + "# Filter for children and apply weights\n", + "children_under_18_df = df[df['age'] < 18]\n", + "children_under_6_df = df[df['age'] < 6]\n", + "\n", + "# Calculate weighted totals\n", + "total_children = children_under_18_df['person_weight'].sum()\n", + "children_under_6 = children_under_6_df['person_weight'].sum()\n", + "\n", + "print(f\"\\nChildren by age:\")\n", + "print(f\" Total children under 18: {total_children:,.0f}\")\n", + "print(f\" Children under 6: {children_under_6:,.0f}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "============================================================\n", + "PA DATASET SUMMARY - WEIGHTED (Population Estimates)\n", + "============================================================\n", + " Metric Value\n", + " Household count (weighted) 4,435,467\n", + " Person count (weighted) 12,863,313\n", + " Median AGI $71,734\n", + " 75th percentile AGI $149,456\n", + " 90th percentile AGI $268,015\n", + " 95th percentile AGI $379,910\n", + " Max AGI $1,838,621\n", + "Total households with children 1,457,610\n", + " Households with 1 child 734,446\n", + " Households with 2 children 481,892\n", + " Households with 3+ children 241,273\n", + " Total children under 18 2,494,202\n", + " Children under 6 780,623\n", + "============================================================\n", + "\n", + "Summary saved to: pa_dataset_summary_weighted.csv\n" + ] + } + ], + "source": [ + "# Create weighted summary table\n", + "weighted_summary_data = {\n", + " 'Metric': [\n", + " 'Household count (weighted)',\n", + " 'Person count (weighted)',\n", + " 'Median AGI',\n", + " '75th percentile AGI',\n", + " '90th percentile AGI',\n", + " '95th percentile AGI',\n", + " 'Max AGI',\n", + " 'Total households with children',\n", + " 'Households with 1 child',\n", + " 'Households with 2 children',\n", + " 'Households with 3+ children',\n", + " 'Total children under 18',\n", + " 'Children under 6'\n", + " ],\n", + " 'Value': [\n", + " f\"{household_count.sum():,.0f}\",\n", + " f\"{person_count.sum():,.0f}\",\n", + " f\"${agi.median():,.0f}\",\n", + " f\"${agi.quantile(0.75):,.0f}\",\n", + " f\"${agi.quantile(0.90):,.0f}\",\n", + " f\"${agi.quantile(0.95):,.0f}\",\n", + " f\"${agi.max():,.0f}\",\n", + " f\"{total_households_with_children:,.0f}\",\n", + " f\"{households_with_1_child:,.0f}\",\n", + " f\"{households_with_2_children:,.0f}\",\n", + " f\"{households_with_3plus_children:,.0f}\",\n", + " f\"{total_children:,.0f}\",\n", + " f\"{children_under_6:,.0f}\"\n", + " ]\n", + "}\n", + "\n", + "weighted_df = pd.DataFrame(weighted_summary_data)\n", + "\n", + "print(\"\\n\" + \"=\"*60)\n", + "print(\"PA DATASET SUMMARY - WEIGHTED (Population Estimates)\")\n", + "print(\"=\"*60)\n", + "print(weighted_df.to_string(index=False))\n", + "print(\"=\"*60)\n", + "\n", + "# Save table\n", + "weighted_df.to_csv('pa_dataset_summary_weighted.csv', index=False)\n", + "print(\"\\nSummary saved to: pa_dataset_summary_weighted.csv\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Households with $0 income (uses agi and household_weight from earlier cells)\n", + "zero_income_mask = agi == 0\n", + "zero_income_weighted = household_weight[zero_income_mask].sum()\n", + "zero_income_unweighted = zero_income_mask.sum()\n", + "\n", + "print(\"\\n\" + \"=\"*70)\n", + "print(\"HOUSEHOLDS WITH $0 INCOME\")\n", + "print(\"=\"*70)\n", + "print(f\"Weighted count: {zero_income_weighted:,.0f}\")\n", + "print(f\"Unweighted count: {zero_income_unweighted:,}\")\n", + "print(f\"\\nPercentage of all households with $0 income:\")\n", + "print(f\" {zero_income_weighted / household_weight.sum() * 100:.2f}%\")\n", + "print(\"=\"*70)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "==========================================================================================\n", + "HOUSEHOLD COUNTS BY INCOME BRACKET\n", + "==========================================================================================\n", + "Income Bracket Households (Weighted) % of All Households Households (Unweighted)\n", + " $0-$10k 239,725,630 15.27% 4,540\n", + " $10k-$20k 37,046,016 2.36% 765\n", + " $20k-$30k 44,020,114 2.80% 723\n", + " $30k-$40k 108,601,465 6.92% 1,264\n", + " $40k-$50k 77,534,722 4.94% 1,034\n", + " $50k-$60k 66,831,837 4.26% 937\n", + "==========================================================================================\n", + "\n", + "Total households in $0-$60k range:\n", + " Weighted: 573,759,784\n", + " Unweighted: 9,263\n", + "\n", + "Percentage of all households in $0-$60k range:\n", + " 36.56%\n" + ] + } + ], + "source": [ + "# Household counts by income brackets (uses agi and household_weight from earlier cells)\n", + "# Define income brackets from $0-$10k up to $50k-$60k\n", + "income_brackets = [\n", + " (0, 10000, \"$0-$10k\"),\n", + " (10000, 20000, \"$10k-$20k\"),\n", + " (20000, 30000, \"$20k-$30k\"),\n", + " (30000, 40000, \"$30k-$40k\"),\n", + " (40000, 50000, \"$40k-$50k\"),\n", + " (50000, 60000, \"$50k-$60k\")\n", + "]\n", + "\n", + "# Get total households for percentage calculation\n", + "total_households_weighted = household_weight.sum()\n", + "\n", + "# Calculate weighted household counts for each bracket\n", + "bracket_data = []\n", + "for lower, upper, label in income_brackets:\n", + " mask = (agi >= lower) & (agi < upper)\n", + " weighted_count = household_weight[mask].sum()\n", + " unweighted_count = mask.sum()\n", + " pct_of_total = (weighted_count / total_households_weighted) * 100\n", + " \n", + " bracket_data.append({\n", + " \"Income Bracket\": label,\n", + " \"Households (Weighted)\": f\"{weighted_count:,.0f}\",\n", + " \"% of All Households\": f\"{pct_of_total:.2f}%\",\n", + " \"Households (Unweighted)\": f\"{unweighted_count:,}\"\n", + " })\n", + "\n", + "income_df = pd.DataFrame(bracket_data)\n", + "\n", + "print(\"\\n\" + \"=\"*90)\n", + "print(\"HOUSEHOLD COUNTS BY INCOME BRACKET\")\n", + "print(\"=\"*90)\n", + "print(income_df.to_string(index=False))\n", + "print(\"=\"*90)\n", + "\n", + "# Also calculate total across all brackets\n", + "total_weighted = sum([household_weight[(agi >= lower) & (agi < upper)].sum() for lower, upper, _ in income_brackets])\n", + "total_unweighted = sum([((agi >= lower) & (agi < upper)).sum() for lower, upper, _ in income_brackets])\n", + "print(f\"\\nTotal households in $0-$60k range:\")\n", + "print(f\" Weighted: {total_weighted:,.0f}\")\n", + "print(f\" Unweighted: {total_unweighted:,}\")\n", + "print(f\"\\nPercentage of all households in $0-$60k range:\")\n", + "print(f\" {total_weighted / total_households_weighted * 100:.2f}%\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "pe", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.10" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/us/states/pa/pa_dataset_summary_weighted.csv b/us/states/pa/pa_dataset_summary_weighted.csv new file mode 100644 index 0000000..3d520be --- /dev/null +++ b/us/states/pa/pa_dataset_summary_weighted.csv @@ -0,0 +1,14 @@ +Metric,Value +Household count (weighted),"4,435,467" +Person count (weighted),"12,863,313" +Median AGI,"$71,734" +75th percentile AGI,"$149,456" +90th percentile AGI,"$268,015" +95th percentile AGI,"$379,910" +Max AGI,"$1,838,621" +Total households with children,"1,457,610" +Households with 1 child,"734,446" +Households with 2 children,"481,892" +Households with 3+ children,"241,273" +Total children under 18,"2,494,202" +Children under 6,"780,623" diff --git a/us/states/pa/pa_eitc_reform_analysis.ipynb b/us/states/pa/pa_eitc_reform_analysis.ipynb new file mode 100644 index 0000000..525ea0b --- /dev/null +++ b/us/states/pa/pa_eitc_reform_analysis.ipynb @@ -0,0 +1,445 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Pennsylvania EITC Reform Analysis (2025)\n", + "\n", + "This notebook analyzes the impact of Pennsylvania's Working Pennsylvanians Tax Credit (EITC).\n", + "\n", + "## Baseline\n", + "- PA EITC is set to 0% (no state EITC)\n", + "\n", + "## Reform (Current Law)\n", + "- PA EITC matches 10% of the federal EITC\n", + "\n", + "## Metrics\n", + "We calculate:\n", + "- Budgetary impact (net cost)\n", + "- Winners (percentage of population affected)\n", + "- Overall poverty impact\n", + "- Child poverty impact" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "from policyengine_us import Microsimulation\n", + "from policyengine_core.reforms import Reform\n", + "import pandas as pd\n", + "import numpy as np" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Helper Functions" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "def calculate_poverty(sim, period=2025, child_only=False):\n", + " \"\"\"\n", + " Calculate poverty rate and count.\n", + " \n", + " Args:\n", + " sim: Microsimulation object\n", + " period: Year to analyze\n", + " child_only: If True, only count children under 18\n", + " \n", + " Returns:\n", + " poverty_rate: Weighted poverty rate\n", + " people_in_poverty: Unweighted count\n", + " \"\"\"\n", + " age = sim.calculate(\"age\", period=period)\n", + " is_in_poverty = sim.calculate(\"person_in_poverty\", period=period)\n", + " person_weight = sim.calculate(\"person_weight\", period=period)\n", + " \n", + " if child_only:\n", + " mask = age < 18\n", + " else:\n", + " mask = np.ones_like(age, dtype=bool)\n", + " \n", + " # Weighted poverty rate\n", + " weighted_in_poverty = (is_in_poverty[mask] * person_weight[mask]).sum()\n", + " weighted_total = person_weight[mask].sum()\n", + " poverty_rate = weighted_in_poverty / weighted_total if weighted_total > 0 else 0\n", + " \n", + " # Unweighted count\n", + " unweighted_in_poverty = is_in_poverty[mask].sum()\n", + " unweighted_total = mask.sum()\n", + " \n", + " return {\n", + " \"poverty_rate\": poverty_rate,\n", + " \"people_in_poverty\": unweighted_in_poverty,\n", + " \"total_people\": unweighted_total\n", + " }\n", + "\n", + "def calculate_budgetary_impact(baseline_sim, reform_sim, variable, period=2025):\n", + " \"\"\"\n", + " Calculate the budgetary impact (net cost) of a reform.\n", + " \"\"\"\n", + " baseline_value = baseline_sim.calculate(variable, period=period, map_to=\"household\").sum()\n", + " reform_value = reform_sim.calculate(variable, period=period, map_to=\"household\").sum()\n", + " \n", + " return reform_value - baseline_value\n", + "\n", + "def calculate_winners(baseline_sim, reform_sim, period=2025):\n", + " \"\"\"\n", + " Calculate winners from a reform at the person level.\n", + " Winners: People in households with higher net income under reform\n", + " Returns percentage of total population.\n", + " \"\"\"\n", + " # Get household-level income change\n", + " baseline_income = baseline_sim.calculate(\"household_net_income\", period=period, map_to=\"household\")\n", + " reform_income = reform_sim.calculate(\"household_net_income\", period=period, map_to=\"household\")\n", + " income_change = reform_income - baseline_income\n", + " \n", + " # Map to person level\n", + " household_id_person = baseline_sim.calculate(\"household_id\", period=period, map_to=\"person\")\n", + " household_id_household = baseline_sim.calculate(\"household_id\", period=period, map_to=\"household\")\n", + " \n", + " # Create mapping of household_id to income_change\n", + " income_change_dict = dict(zip(household_id_household, income_change))\n", + " \n", + " # Map income change to each person\n", + " person_income_change = np.array([income_change_dict.get(hh_id, 0) for hh_id in household_id_person])\n", + " \n", + " # Count people who are winners\n", + " people_winning = (person_income_change > 1).sum() # Gained more than $1\n", + " total_people = len(person_income_change)\n", + " \n", + " # Calculate percentage\n", + " pct_winners = (people_winning / total_people * 100) if total_people > 0 else 0\n", + " \n", + " # Average gain for winning households\n", + " avg_gain = income_change[income_change > 1].mean() if (income_change > 1).sum() > 0 else 0\n", + " \n", + " return {\n", + " \"people_winning\": people_winning,\n", + " \"total_people\": total_people,\n", + " \"pct_winners\": pct_winners,\n", + " \"avg_gain\": avg_gain\n", + " }\n", + "\n", + "def format_currency(value):\n", + " \"\"\"Format value as currency in millions.\"\"\"\n", + " return f\"${value/1e6:.2f}M\"\n", + "\n", + "def format_percent(value):\n", + " \"\"\"Format value as percentage.\"\"\"\n", + " return f\"{value*100:.2f}%\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Define Baseline and Reform" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Reform functions defined!\n" + ] + } + ], + "source": [ + "def create_baseline():\n", + " \"\"\"Baseline: PA EITC at 0%\"\"\"\n", + " return Reform.from_dict(\n", + " {\n", + " \"gov.states.pa.tax.income.credits.eitc.match\": {\n", + " \"2025-01-01.2100-12-31\": 0.0\n", + " }\n", + " },\n", + " country_id=\"us\",\n", + " )\n", + "\n", + "def create_reform():\n", + " \"\"\"Reform: PA EITC at 10% (current law)\"\"\"\n", + " return Reform.from_dict(\n", + " {\n", + " \"gov.states.pa.tax.income.credits.eitc.match\": {\n", + " \"2025-01-01.2100-12-31\": 0.1\n", + " }\n", + " },\n", + " country_id=\"us\",\n", + " )\n", + "\n", + "print(\"Reform functions defined!\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Load Simulations" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Loading baseline (PA EITC at 0%)...\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "✓ Baseline loaded\n", + "\n", + "Loading reform (PA EITC at 10%)...\n", + "✓ Reform loaded\n", + "\n", + "============================================================\n", + "All simulations ready!\n", + "============================================================\n" + ] + } + ], + "source": [ + "print(\"Loading baseline (PA EITC at 0%)...\")\n", + "baseline_reform = create_baseline()\n", + "baseline = Microsimulation(dataset='hf://policyengine/test/PA.h5', reform=baseline_reform)\n", + "print(\"✓ Baseline loaded\")\n", + "\n", + "print(\"\\nLoading reform (PA EITC at 10%)...\")\n", + "reform = create_reform()\n", + "reform_sim = Microsimulation(dataset='hf://policyengine/test/PA.h5', reform=reform)\n", + "print(\"✓ Reform loaded\")\n", + "\n", + "print(\"\\n\" + \"=\"*60)\n", + "print(\"All simulations ready!\")\n", + "print(\"=\"*60)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Calculate Impacts" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "✓ All impacts calculated\n" + ] + } + ], + "source": [ + "# Baseline metrics\n", + "baseline_overall_pov = calculate_poverty(baseline, child_only=False)\n", + "baseline_child_pov = calculate_poverty(baseline, child_only=True)\n", + "\n", + "# Reform metrics\n", + "reform_overall_pov = calculate_poverty(reform_sim, child_only=False)\n", + "reform_child_pov = calculate_poverty(reform_sim, child_only=True)\n", + "\n", + "# Budgetary impact\n", + "eitc_cost = calculate_budgetary_impact(baseline, reform_sim, \"pa_eitc\")\n", + "\n", + "# Winners (at person level)\n", + "winners = calculate_winners(baseline, reform_sim)\n", + "\n", + "print(\"✓ All impacts calculated\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Results Summary" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "================================================================================\n", + "PA EITC REFORM IMPACTS (2025)\n", + "Baseline: PA EITC at 0% | Reform: PA EITC at 10% federal match\n", + "================================================================================\n", + "\n", + "================================BUDGETARY IMPACT================================\n", + "PA EITC net cost: $88.92M\n", + "\n", + "==============================WINNERS (POPULATION)==============================\n", + "People gaining income: 7,408 (14.42% of population)\n", + " Average household gain: $170.28\n", + "\n", + "============================POVERTY IMPACT - OVERALL============================\n", + "Baseline poverty rate: 16.11%\n", + "Reform poverty rate: 15.93%\n", + "Absolute reduction: 0.18%\n", + "Relative reduction: 1.10%\n", + "People lifted from poverty: 19,239\n", + "\n", + "===========================POVERTY IMPACT - CHILDREN============================\n", + "Baseline child poverty rate: 20.61%\n", + "Reform child poverty rate: 20.24%\n", + "Absolute reduction: 0.38%\n", + "Relative reduction: 1.83%\n", + "Children lifted from poverty: 6,983\n", + "================================================================================\n" + ] + } + ], + "source": [ + "print(\"\\n\" + \"=\"*80)\n", + "print(\"PA EITC REFORM IMPACTS (2025)\")\n", + "print(\"Baseline: PA EITC at 0% | Reform: PA EITC at 10% federal match\")\n", + "print(\"=\"*80)\n", + "\n", + "print(f\"\\n{'BUDGETARY IMPACT':=^80}\")\n", + "print(f\"PA EITC net cost: {format_currency(eitc_cost)}\")\n", + "\n", + "print(f\"\\n{'WINNERS (POPULATION)':=^80}\")\n", + "print(f\"People gaining income: {winners['people_winning']:,} ({winners['pct_winners']:.2f}% of population)\")\n", + "print(f\" Average household gain: ${winners['avg_gain']:,.2f}\")\n", + "\n", + "print(f\"\\n{'POVERTY IMPACT - OVERALL':=^80}\")\n", + "print(f\"Baseline poverty rate: {format_percent(baseline_overall_pov['poverty_rate'])}\")\n", + "print(f\"Reform poverty rate: {format_percent(reform_overall_pov['poverty_rate'])}\")\n", + "overall_pov_reduction = baseline_overall_pov['poverty_rate'] - reform_overall_pov['poverty_rate']\n", + "overall_pov_pct_reduction = (overall_pov_reduction / baseline_overall_pov['poverty_rate'] * 100) if baseline_overall_pov['poverty_rate'] > 0 else 0\n", + "print(f\"Absolute reduction: {format_percent(overall_pov_reduction)}\")\n", + "print(f\"Relative reduction: {overall_pov_pct_reduction:.2f}%\")\n", + "print(f\"People lifted from poverty: {int(baseline_overall_pov['people_in_poverty'] - reform_overall_pov['people_in_poverty']):,}\")\n", + "\n", + "print(f\"\\n{'POVERTY IMPACT - CHILDREN':=^80}\")\n", + "print(f\"Baseline child poverty rate: {format_percent(baseline_child_pov['poverty_rate'])}\")\n", + "print(f\"Reform child poverty rate: {format_percent(reform_child_pov['poverty_rate'])}\")\n", + "child_pov_reduction = baseline_child_pov['poverty_rate'] - reform_child_pov['poverty_rate']\n", + "child_pov_pct_reduction = (child_pov_reduction / baseline_child_pov['poverty_rate'] * 100) if baseline_child_pov['poverty_rate'] > 0 else 0\n", + "print(f\"Absolute reduction: {format_percent(child_pov_reduction)}\")\n", + "print(f\"Relative reduction: {child_pov_pct_reduction:.2f}%\")\n", + "print(f\"Children lifted from poverty: {int(baseline_child_pov['people_in_poverty'] - reform_child_pov['people_in_poverty']):,}\")\n", + "print(\"=\"*80)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Export Results" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "==============================================================================================================\n", + "PA EITC REFORM SUMMARY\n", + "==============================================================================================================\n", + "Scenario PA EITC Match Net Cost Overall Poverty Change (%) Child Poverty Change (%) % Population Winning\n", + " Reform 10% $88.92M 1.10% 1.83% 14.42%\n", + "==============================================================================================================\n", + "\n", + "✓ Exported to: pa_eitc_reform_results.csv\n" + ] + } + ], + "source": [ + "# Calculate poverty changes\n", + "overall_pov_reduction = baseline_overall_pov['poverty_rate'] - reform_overall_pov['poverty_rate']\n", + "overall_pov_pct_reduction = (overall_pov_reduction / baseline_overall_pov['poverty_rate'] * 100) if baseline_overall_pov['poverty_rate'] > 0 else 0\n", + "child_pov_reduction = baseline_child_pov['poverty_rate'] - reform_child_pov['poverty_rate']\n", + "child_pov_pct_reduction = (child_pov_reduction / baseline_child_pov['poverty_rate'] * 100) if baseline_child_pov['poverty_rate'] > 0 else 0\n", + "\n", + "# Create results DataFrame (reform only)\n", + "results = [\n", + " {\n", + " \"Scenario\": \"Reform\",\n", + " \"PA EITC Match\": \"10%\",\n", + " \"Net Cost\": format_currency(eitc_cost),\n", + " \"Overall Poverty Change (%)\": f\"{overall_pov_pct_reduction:.2f}%\",\n", + " \"Child Poverty Change (%)\": f\"{child_pov_pct_reduction:.2f}%\",\n", + " \"% Population Winning\": f\"{winners['pct_winners']:.2f}%\"\n", + " }\n", + "]\n", + "\n", + "df_results = pd.DataFrame(results)\n", + "\n", + "print(\"\\n\" + \"=\"*110)\n", + "print(\"PA EITC REFORM SUMMARY\")\n", + "print(\"=\"*110)\n", + "print(df_results.to_string(index=False))\n", + "print(\"=\"*110)\n", + "\n", + "# Export to CSV\n", + "df_results.to_csv(\"pa_eitc_reform_results.csv\", index=False)\n", + "print(\"\\n✓ Exported to: pa_eitc_reform_results.csv\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "pe", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.10" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/us/states/pa/pa_eitc_reform_results.csv b/us/states/pa/pa_eitc_reform_results.csv new file mode 100644 index 0000000..826ebaf --- /dev/null +++ b/us/states/pa/pa_eitc_reform_results.csv @@ -0,0 +1,2 @@ +Scenario,PA EITC Match,Net Cost,Overall Poverty Change (%),Child Poverty Change (%),% Population Winning +Reform,10%,$88.92M,1.10%,1.83%,14.42% From 7c7479901d346b04f1075f83000434fd5bb14321 Mon Sep 17 00:00:00 2001 From: David Trimmer Date: Mon, 1 Dec 2025 14:23:29 -0500 Subject: [PATCH 2/5] PA EITC Fixes #95 --- us/states/pa/data_exploration.ipynb | 48 ++++++++++++++----- us/states/pa/pa_eitc_reform_analysis.ipynb | 55 +++++++++------------- us/states/pa/pa_eitc_reform_results.csv | 2 +- 3 files changed, 57 insertions(+), 48 deletions(-) diff --git a/us/states/pa/data_exploration.ipynb b/us/states/pa/data_exploration.ipynb index 843c424..7338362 100644 --- a/us/states/pa/data_exploration.ipynb +++ b/us/states/pa/data_exploration.ipynb @@ -274,13 +274,34 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "======================================================================\n", + "HOUSEHOLDS WITH $0 INCOME\n", + "======================================================================\n", + "Weighted count: 106,517,064\n", + "Unweighted count: 445,821.65727878007\n", + "\n", + "Percentage of all households with $0 income:\n", + " 6.79%\n", + "======================================================================\n" + ] + } + ], "source": [ - "# Households with $0 income (uses agi and household_weight from earlier cells)\n", - "zero_income_mask = agi == 0\n", - "zero_income_weighted = household_weight[zero_income_mask].sum()\n", + "# Households with $0 income\n", + "# Re-fetch household_weight at household level (was overwritten in cell 5 at person level)\n", + "household_weight_hh = sim.calculate(\"household_weight\", period=2025)\n", + "agi_hh = sim.calculate(\"adjusted_gross_income\", period=2025, map_to=\"household\")\n", + "\n", + "zero_income_mask = agi_hh == 0\n", + "zero_income_weighted = household_weight_hh[zero_income_mask].sum()\n", "zero_income_unweighted = zero_income_mask.sum()\n", "\n", "print(\"\\n\" + \"=\"*70)\n", @@ -289,13 +310,13 @@ "print(f\"Weighted count: {zero_income_weighted:,.0f}\")\n", "print(f\"Unweighted count: {zero_income_unweighted:,}\")\n", "print(f\"\\nPercentage of all households with $0 income:\")\n", - "print(f\" {zero_income_weighted / household_weight.sum() * 100:.2f}%\")\n", + "print(f\" {zero_income_weighted / household_weight_hh.sum() * 100:.2f}%\")\n", "print(\"=\"*70)" ] }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 10, "metadata": {}, "outputs": [ { @@ -325,7 +346,8 @@ } ], "source": [ - "# Household counts by income brackets (uses agi and household_weight from earlier cells)\n", + "# Household counts by income brackets\n", + "# Use household-level variables from previous cell\n", "# Define income brackets from $0-$10k up to $50k-$60k\n", "income_brackets = [\n", " (0, 10000, \"$0-$10k\"),\n", @@ -337,13 +359,13 @@ "]\n", "\n", "# Get total households for percentage calculation\n", - "total_households_weighted = household_weight.sum()\n", + "total_households_weighted = household_weight_hh.sum()\n", "\n", "# Calculate weighted household counts for each bracket\n", "bracket_data = []\n", "for lower, upper, label in income_brackets:\n", - " mask = (agi >= lower) & (agi < upper)\n", - " weighted_count = household_weight[mask].sum()\n", + " mask = (agi_hh >= lower) & (agi_hh < upper)\n", + " weighted_count = household_weight_hh[mask].sum()\n", " unweighted_count = mask.sum()\n", " pct_of_total = (weighted_count / total_households_weighted) * 100\n", " \n", @@ -363,8 +385,8 @@ "print(\"=\"*90)\n", "\n", "# Also calculate total across all brackets\n", - "total_weighted = sum([household_weight[(agi >= lower) & (agi < upper)].sum() for lower, upper, _ in income_brackets])\n", - "total_unweighted = sum([((agi >= lower) & (agi < upper)).sum() for lower, upper, _ in income_brackets])\n", + "total_weighted = sum([household_weight_hh[(agi_hh >= lower) & (agi_hh < upper)].sum() for lower, upper, _ in income_brackets])\n", + "total_unweighted = sum([((agi_hh >= lower) & (agi_hh < upper)).sum() for lower, upper, _ in income_brackets])\n", "print(f\"\\nTotal households in $0-$60k range:\")\n", "print(f\" Weighted: {total_weighted:,.0f}\")\n", "print(f\" Unweighted: {total_unweighted:,}\")\n", diff --git a/us/states/pa/pa_eitc_reform_analysis.ipynb b/us/states/pa/pa_eitc_reform_analysis.ipynb index 525ea0b..d76387a 100644 --- a/us/states/pa/pa_eitc_reform_analysis.ipynb +++ b/us/states/pa/pa_eitc_reform_analysis.ipynb @@ -24,7 +24,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 8, "metadata": {}, "outputs": [], "source": [ @@ -43,7 +43,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -149,7 +149,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 3, "metadata": {}, "outputs": [ { @@ -195,27 +195,14 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Loading baseline (PA EITC at 0%)...\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ + "Loading baseline (PA EITC at 0%)...\n", "✓ Baseline loaded\n", "\n", "Loading reform (PA EITC at 10%)...\n", @@ -252,7 +239,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 5, "metadata": {}, "outputs": [ { @@ -290,7 +277,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 6, "metadata": {}, "outputs": [ { @@ -311,18 +298,18 @@ " Average household gain: $170.28\n", "\n", "============================POVERTY IMPACT - OVERALL============================\n", - "Baseline poverty rate: 16.11%\n", - "Reform poverty rate: 15.93%\n", - "Absolute reduction: 0.18%\n", - "Relative reduction: 1.10%\n", - "People lifted from poverty: 19,239\n", + "Baseline poverty rate: 16.33%\n", + "Reform poverty rate: 16.31%\n", + "Absolute reduction: 0.02%\n", + "Relative reduction: 0.14%\n", + "People lifted from poverty: 5,273\n", "\n", "===========================POVERTY IMPACT - CHILDREN============================\n", - "Baseline child poverty rate: 20.61%\n", - "Reform child poverty rate: 20.24%\n", - "Absolute reduction: 0.38%\n", - "Relative reduction: 1.83%\n", - "Children lifted from poverty: 6,983\n", + "Baseline child poverty rate: 21.03%\n", + "Reform child poverty rate: 21.03%\n", + "Absolute reduction: 0.00%\n", + "Relative reduction: 0.00%\n", + "Children lifted from poverty: 0\n", "================================================================================\n" ] } @@ -369,7 +356,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 7, "metadata": {}, "outputs": [ { @@ -381,7 +368,7 @@ "PA EITC REFORM SUMMARY\n", "==============================================================================================================\n", "Scenario PA EITC Match Net Cost Overall Poverty Change (%) Child Poverty Change (%) % Population Winning\n", - " Reform 10% $88.92M 1.10% 1.83% 14.42%\n", + " Reform 10% $88.92M 0.14% 0.00% 14.42%\n", "==============================================================================================================\n", "\n", "✓ Exported to: pa_eitc_reform_results.csv\n" @@ -423,7 +410,7 @@ ], "metadata": { "kernelspec": { - "display_name": "pe", + "display_name": "Python 3", "language": "python", "name": "python3" }, @@ -437,7 +424,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.10" + "version": "3.11.5" } }, "nbformat": 4, diff --git a/us/states/pa/pa_eitc_reform_results.csv b/us/states/pa/pa_eitc_reform_results.csv index 826ebaf..9c2ae22 100644 --- a/us/states/pa/pa_eitc_reform_results.csv +++ b/us/states/pa/pa_eitc_reform_results.csv @@ -1,2 +1,2 @@ Scenario,PA EITC Match,Net Cost,Overall Poverty Change (%),Child Poverty Change (%),% Population Winning -Reform,10%,$88.92M,1.10%,1.83%,14.42% +Reform,10%,$88.92M,0.14%,0.00%,14.42% From c17c07619341ffa38518e19eb703eb3d20fee110 Mon Sep 17 00:00:00 2001 From: David Trimmer Date: Mon, 1 Dec 2025 15:11:53 -0500 Subject: [PATCH 3/5] new data --- us/states/pa/data_exploration.ipynb | 180 ++++++++----------- us/states/pa/pa_dataset_summary_weighted.csv | 26 +-- us/states/pa/pa_eitc_reform_analysis.ipynb | 46 +++-- us/states/pa/pa_eitc_reform_results.csv | 2 +- us/states/pa/test_dataset.ipynb | 62 +++++++ 5 files changed, 183 insertions(+), 133 deletions(-) create mode 100644 us/states/pa/test_dataset.ipynb diff --git a/us/states/pa/data_exploration.ipynb b/us/states/pa/data_exploration.ipynb index 7338362..1438223 100644 --- a/us/states/pa/data_exploration.ipynb +++ b/us/states/pa/data_exploration.ipynb @@ -13,20 +13,13 @@ "cell_type": "code", "execution_count": 1, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "c:\\Users\\dtsax\\envs\\pe\\Lib\\site-packages\\tqdm\\auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", - " from .autonotebook import tqdm as notebook_tqdm\n" - ] - } - ], + "outputs": [], "source": [ "from policyengine_us import Microsimulation\n", "import pandas as pd\n", - "import numpy as np" + "import numpy as np\n", + "\n", + "PA_DATASET = \"hf://policyengine/policyengine-us-data/states/PA.h5\"" ] }, { @@ -35,16 +28,23 @@ "metadata": {}, "outputs": [ { - "name": "stderr", - "output_type": "stream", - "text": [ - "Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`\n" - ] + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "5801ad26ee654449ab3be3dc62d09e8b", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "PA.h5: 0%| | 0.00/149M [00:00= lower) & (agi_hh < upper)\n", - " weighted_count = household_weight_hh[mask].sum()\n", - " unweighted_count = mask.sum()\n", - " pct_of_total = (weighted_count / total_households_weighted) * 100\n", + " count = weights[mask].sum()\n", + " pct_of_total = (count / total_households) * 100\n", " \n", " bracket_data.append({\n", " \"Income Bracket\": label,\n", - " \"Households (Weighted)\": f\"{weighted_count:,.0f}\",\n", - " \"% of All Households\": f\"{pct_of_total:.2f}%\",\n", - " \"Households (Unweighted)\": f\"{unweighted_count:,}\"\n", + " \"Households\": f\"{count:,.0f}\",\n", + " \"% of All Households\": f\"{pct_of_total:.2f}%\"\n", " })\n", "\n", "income_df = pd.DataFrame(bracket_data)\n", "\n", - "print(\"\\n\" + \"=\"*90)\n", + "print(\"\\n\" + \"=\"*70)\n", "print(\"HOUSEHOLD COUNTS BY INCOME BRACKET\")\n", - "print(\"=\"*90)\n", + "print(\"=\"*70)\n", "print(income_df.to_string(index=False))\n", - "print(\"=\"*90)\n", + "print(\"=\"*70)\n", "\n", - "# Also calculate total across all brackets\n", - "total_weighted = sum([household_weight_hh[(agi_hh >= lower) & (agi_hh < upper)].sum() for lower, upper, _ in income_brackets])\n", - "total_unweighted = sum([((agi_hh >= lower) & (agi_hh < upper)).sum() for lower, upper, _ in income_brackets])\n", - "print(f\"\\nTotal households in $0-$60k range:\")\n", - "print(f\" Weighted: {total_weighted:,.0f}\")\n", - "print(f\" Unweighted: {total_unweighted:,}\")\n", - "print(f\"\\nPercentage of all households in $0-$60k range:\")\n", - "print(f\" {total_weighted / total_households_weighted * 100:.2f}%\")" + "# Total in $0-$60k range\n", + "total_in_range = sum([weights[(agi_hh >= lower) & (agi_hh < upper)].sum() for lower, upper, _ in income_brackets])\n", + "print(f\"\\nTotal households in $0-$60k range: {total_in_range:,.0f}\")\n", + "print(f\"Percentage of all households in $0-$60k range: {total_in_range / total_households * 100:.2f}%\")" ] } ], "metadata": { "kernelspec": { - "display_name": "pe", + "display_name": "Python 3", "language": "python", "name": "python3" }, @@ -411,7 +389,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.10" + "version": "3.11.5" } }, "nbformat": 4, diff --git a/us/states/pa/pa_dataset_summary_weighted.csv b/us/states/pa/pa_dataset_summary_weighted.csv index 3d520be..a81a10c 100644 --- a/us/states/pa/pa_dataset_summary_weighted.csv +++ b/us/states/pa/pa_dataset_summary_weighted.csv @@ -1,14 +1,14 @@ Metric,Value -Household count (weighted),"4,435,467" -Person count (weighted),"12,863,313" -Median AGI,"$71,734" -75th percentile AGI,"$149,456" -90th percentile AGI,"$268,015" -95th percentile AGI,"$379,910" -Max AGI,"$1,838,621" -Total households with children,"1,457,610" -Households with 1 child,"734,446" -Households with 2 children,"481,892" -Households with 3+ children,"241,273" -Total children under 18,"2,494,202" -Children under 6,"780,623" +Household count (weighted),"4,662,650" +Person count (weighted),"13,217,679" +Median AGI,"$73,962" +75th percentile AGI,"$169,351" +90th percentile AGI,"$404,412" +95th percentile AGI,"$511,573" +Max AGI,"$3,229,514" +Total households with children,"1,489,087" +Households with 1 child,"720,370" +Households with 2 children,"504,238" +Households with 3+ children,"264,479" +Total children under 18,"2,597,022" +Children under 6,"799,168" diff --git a/us/states/pa/pa_eitc_reform_analysis.ipynb b/us/states/pa/pa_eitc_reform_analysis.ipynb index d76387a..51b3393 100644 --- a/us/states/pa/pa_eitc_reform_analysis.ipynb +++ b/us/states/pa/pa_eitc_reform_analysis.ipynb @@ -24,14 +24,16 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "from policyengine_us import Microsimulation\n", "from policyengine_core.reforms import Reform\n", "import pandas as pd\n", - "import numpy as np" + "import numpy as np\n", + "\n", + "PA_DATASET = \"hf://policyengine/policyengine-us-data/states/PA.h5\"" ] }, { @@ -217,12 +219,12 @@ "source": [ "print(\"Loading baseline (PA EITC at 0%)...\")\n", "baseline_reform = create_baseline()\n", - "baseline = Microsimulation(dataset='hf://policyengine/test/PA.h5', reform=baseline_reform)\n", + "baseline = Microsimulation(dataset=PA_DATASET, reform=baseline_reform)\n", "print(\"✓ Baseline loaded\")\n", "\n", "print(\"\\nLoading reform (PA EITC at 10%)...\")\n", "reform = create_reform()\n", - "reform_sim = Microsimulation(dataset='hf://policyengine/test/PA.h5', reform=reform)\n", + "reform_sim = Microsimulation(dataset=PA_DATASET, reform=reform)\n", "print(\"✓ Reform loaded\")\n", "\n", "print(\"\\n\" + \"=\"*60)\n", @@ -242,6 +244,14 @@ "execution_count": 5, "metadata": {}, "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Invalid values for enum StateGroup: ['PA']. These will be encoded as index 0.\n", + "Invalid values for enum StateGroup: ['PA']. These will be encoded as index 0.\n" + ] + }, { "name": "stdout", "output_type": "stream", @@ -291,25 +301,25 @@ "================================================================================\n", "\n", "================================BUDGETARY IMPACT================================\n", - "PA EITC net cost: $88.92M\n", + "PA EITC net cost: $221.73M\n", "\n", "==============================WINNERS (POPULATION)==============================\n", - "People gaining income: 7,408 (14.42% of population)\n", - " Average household gain: $170.28\n", + "People gaining income: 18,872 (9.03% of population)\n", + " Average household gain: $298.55\n", "\n", "============================POVERTY IMPACT - OVERALL============================\n", - "Baseline poverty rate: 16.33%\n", - "Reform poverty rate: 16.31%\n", - "Absolute reduction: 0.02%\n", - "Relative reduction: 0.14%\n", - "People lifted from poverty: 5,273\n", + "Baseline poverty rate: 12.24%\n", + "Reform poverty rate: 11.68%\n", + "Absolute reduction: 0.56%\n", + "Relative reduction: 4.58%\n", + "People lifted from poverty: 43,702\n", "\n", "===========================POVERTY IMPACT - CHILDREN============================\n", - "Baseline child poverty rate: 21.03%\n", - "Reform child poverty rate: 21.03%\n", - "Absolute reduction: 0.00%\n", - "Relative reduction: 0.00%\n", - "Children lifted from poverty: 0\n", + "Baseline child poverty rate: 10.96%\n", + "Reform child poverty rate: 10.82%\n", + "Absolute reduction: 0.14%\n", + "Relative reduction: 1.29%\n", + "Children lifted from poverty: 4,276\n", "================================================================================\n" ] } @@ -368,7 +378,7 @@ "PA EITC REFORM SUMMARY\n", "==============================================================================================================\n", "Scenario PA EITC Match Net Cost Overall Poverty Change (%) Child Poverty Change (%) % Population Winning\n", - " Reform 10% $88.92M 0.14% 0.00% 14.42%\n", + " Reform 10% $221.73M 4.58% 1.29% 9.03%\n", "==============================================================================================================\n", "\n", "✓ Exported to: pa_eitc_reform_results.csv\n" diff --git a/us/states/pa/pa_eitc_reform_results.csv b/us/states/pa/pa_eitc_reform_results.csv index 9c2ae22..0c80080 100644 --- a/us/states/pa/pa_eitc_reform_results.csv +++ b/us/states/pa/pa_eitc_reform_results.csv @@ -1,2 +1,2 @@ Scenario,PA EITC Match,Net Cost,Overall Poverty Change (%),Child Poverty Change (%),% Population Winning -Reform,10%,$88.92M,0.14%,0.00%,14.42% +Reform,10%,$221.73M,4.58%,1.29%,9.03% diff --git a/us/states/pa/test_dataset.ipynb b/us/states/pa/test_dataset.ipynb new file mode 100644 index 0000000..09e4470 --- /dev/null +++ b/us/states/pa/test_dataset.ipynb @@ -0,0 +1,62 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "cell-0", + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "194e07960bcb4dffbe230453d20cdfd5", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "PA.h5: 0%| | 0.00/149M [00:00 Date: Mon, 1 Dec 2025 15:22:34 -0500 Subject: [PATCH 4/5] update --- us/states/pa/pa_eitc_reform_analysis.ipynb | 100 +++++++++++---------- us/states/pa/pa_eitc_reform_results.csv | 2 +- 2 files changed, 55 insertions(+), 47 deletions(-) diff --git a/us/states/pa/pa_eitc_reform_analysis.ipynb b/us/states/pa/pa_eitc_reform_analysis.ipynb index 51b3393..8ff23aa 100644 --- a/us/states/pa/pa_eitc_reform_analysis.ipynb +++ b/us/states/pa/pa_eitc_reform_analysis.ipynb @@ -24,7 +24,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 9, "metadata": {}, "outputs": [], "source": [ @@ -45,7 +45,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 10, "metadata": {}, "outputs": [], "source": [ @@ -60,11 +60,11 @@ " \n", " Returns:\n", " poverty_rate: Weighted poverty rate\n", - " people_in_poverty: Unweighted count\n", + " people_in_poverty: Weighted count\n", " \"\"\"\n", - " age = sim.calculate(\"age\", period=period)\n", - " is_in_poverty = sim.calculate(\"person_in_poverty\", period=period)\n", - " person_weight = sim.calculate(\"person_weight\", period=period)\n", + " age = np.array(sim.calculate(\"age\", period=period))\n", + " is_in_poverty = np.array(sim.calculate(\"person_in_poverty\", period=period))\n", + " person_weight = np.array(sim.calculate(\"person_weight\", period=period))\n", " \n", " if child_only:\n", " mask = age < 18\n", @@ -76,14 +76,13 @@ " weighted_total = person_weight[mask].sum()\n", " poverty_rate = weighted_in_poverty / weighted_total if weighted_total > 0 else 0\n", " \n", - " # Unweighted count\n", - " unweighted_in_poverty = is_in_poverty[mask].sum()\n", - " unweighted_total = mask.sum()\n", + " # Weighted count of people in poverty\n", + " people_in_poverty = weighted_in_poverty\n", " \n", " return {\n", " \"poverty_rate\": poverty_rate,\n", - " \"people_in_poverty\": unweighted_in_poverty,\n", - " \"total_people\": unweighted_total\n", + " \"people_in_poverty\": people_in_poverty,\n", + " \"total_people\": weighted_total\n", " }\n", "\n", "def calculate_budgetary_impact(baseline_sim, reform_sim, variable, period=2025):\n", @@ -97,18 +96,20 @@ "\n", "def calculate_winners(baseline_sim, reform_sim, period=2025):\n", " \"\"\"\n", - " Calculate winners from a reform at the person level.\n", - " Winners: People in households with higher net income under reform\n", - " Returns percentage of total population.\n", + " Calculate winners from a reform at the person level (weighted).\n", + " Winners: People in households with higher net income under reform.\n", + " Returns weighted count and percentage of total population.\n", " \"\"\"\n", " # Get household-level income change\n", - " baseline_income = baseline_sim.calculate(\"household_net_income\", period=period, map_to=\"household\")\n", - " reform_income = reform_sim.calculate(\"household_net_income\", period=period, map_to=\"household\")\n", + " baseline_income = np.array(baseline_sim.calculate(\"household_net_income\", period=period, map_to=\"household\"))\n", + " reform_income = np.array(reform_sim.calculate(\"household_net_income\", period=period, map_to=\"household\"))\n", + " household_weight = np.array(baseline_sim.calculate(\"household_weight\", period=period))\n", " income_change = reform_income - baseline_income\n", " \n", - " # Map to person level\n", - " household_id_person = baseline_sim.calculate(\"household_id\", period=period, map_to=\"person\")\n", - " household_id_household = baseline_sim.calculate(\"household_id\", period=period, map_to=\"household\")\n", + " # Get person-level data\n", + " household_id_person = np.array(baseline_sim.calculate(\"household_id\", period=period, map_to=\"person\"))\n", + " household_id_household = np.array(baseline_sim.calculate(\"household_id\", period=period, map_to=\"household\"))\n", + " person_weight = np.array(baseline_sim.calculate(\"person_weight\", period=period))\n", " \n", " # Create mapping of household_id to income_change\n", " income_change_dict = dict(zip(household_id_household, income_change))\n", @@ -116,15 +117,20 @@ " # Map income change to each person\n", " person_income_change = np.array([income_change_dict.get(hh_id, 0) for hh_id in household_id_person])\n", " \n", - " # Count people who are winners\n", - " people_winning = (person_income_change > 1).sum() # Gained more than $1\n", - " total_people = len(person_income_change)\n", + " # Weighted count of people who are winners (gained more than $1)\n", + " winners_mask = person_income_change > 1\n", + " people_winning = person_weight[winners_mask].sum()\n", + " total_people = person_weight.sum()\n", " \n", " # Calculate percentage\n", " pct_winners = (people_winning / total_people * 100) if total_people > 0 else 0\n", " \n", - " # Average gain for winning households\n", - " avg_gain = income_change[income_change > 1].mean() if (income_change > 1).sum() > 0 else 0\n", + " # Average gain for winning households (weighted)\n", + " winning_hh_mask = income_change > 1\n", + " if winning_hh_mask.sum() > 0:\n", + " avg_gain = np.average(income_change[winning_hh_mask], weights=household_weight[winning_hh_mask])\n", + " else:\n", + " avg_gain = 0\n", " \n", " return {\n", " \"people_winning\": people_winning,\n", @@ -151,7 +157,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 11, "metadata": {}, "outputs": [ { @@ -197,7 +203,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 12, "metadata": {}, "outputs": [ { @@ -241,7 +247,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 13, "metadata": {}, "outputs": [ { @@ -287,7 +293,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 14, "metadata": {}, "outputs": [ { @@ -304,22 +310,22 @@ "PA EITC net cost: $221.73M\n", "\n", "==============================WINNERS (POPULATION)==============================\n", - "People gaining income: 18,872 (9.03% of population)\n", - " Average household gain: $298.55\n", + "People gaining income: 3,066,550 (23.20% of population)\n", + "Average gain per household: $298.55\n", "\n", "============================POVERTY IMPACT - OVERALL============================\n", - "Baseline poverty rate: 12.24%\n", - "Reform poverty rate: 11.68%\n", - "Absolute reduction: 0.56%\n", - "Relative reduction: 4.58%\n", - "People lifted from poverty: 43,702\n", + "Baseline poverty rate: 13.54%\n", + "Reform poverty rate: 13.21%\n", + "Absolute reduction: 0.33%\n", + "Relative reduction: 2.44%\n", + "People lifted from poverty: 43,703\n", "\n", "===========================POVERTY IMPACT - CHILDREN============================\n", - "Baseline child poverty rate: 10.96%\n", - "Reform child poverty rate: 10.82%\n", - "Absolute reduction: 0.14%\n", - "Relative reduction: 1.29%\n", - "Children lifted from poverty: 4,276\n", + "Baseline child poverty rate: 11.31%\n", + "Reform child poverty rate: 11.15%\n", + "Absolute reduction: 0.16%\n", + "Relative reduction: 1.43%\n", + "Children lifted from poverty: 4,277\n", "================================================================================\n" ] } @@ -334,8 +340,8 @@ "print(f\"PA EITC net cost: {format_currency(eitc_cost)}\")\n", "\n", "print(f\"\\n{'WINNERS (POPULATION)':=^80}\")\n", - "print(f\"People gaining income: {winners['people_winning']:,} ({winners['pct_winners']:.2f}% of population)\")\n", - "print(f\" Average household gain: ${winners['avg_gain']:,.2f}\")\n", + "print(f\"People gaining income: {winners['people_winning']:,.0f} ({winners['pct_winners']:.2f}% of population)\")\n", + "print(f\"Average gain per household: ${winners['avg_gain']:,.2f}\")\n", "\n", "print(f\"\\n{'POVERTY IMPACT - OVERALL':=^80}\")\n", "print(f\"Baseline poverty rate: {format_percent(baseline_overall_pov['poverty_rate'])}\")\n", @@ -344,7 +350,8 @@ "overall_pov_pct_reduction = (overall_pov_reduction / baseline_overall_pov['poverty_rate'] * 100) if baseline_overall_pov['poverty_rate'] > 0 else 0\n", "print(f\"Absolute reduction: {format_percent(overall_pov_reduction)}\")\n", "print(f\"Relative reduction: {overall_pov_pct_reduction:.2f}%\")\n", - "print(f\"People lifted from poverty: {int(baseline_overall_pov['people_in_poverty'] - reform_overall_pov['people_in_poverty']):,}\")\n", + "people_lifted = baseline_overall_pov['people_in_poverty'] - reform_overall_pov['people_in_poverty']\n", + "print(f\"People lifted from poverty: {people_lifted:,.0f}\")\n", "\n", "print(f\"\\n{'POVERTY IMPACT - CHILDREN':=^80}\")\n", "print(f\"Baseline child poverty rate: {format_percent(baseline_child_pov['poverty_rate'])}\")\n", @@ -353,7 +360,8 @@ "child_pov_pct_reduction = (child_pov_reduction / baseline_child_pov['poverty_rate'] * 100) if baseline_child_pov['poverty_rate'] > 0 else 0\n", "print(f\"Absolute reduction: {format_percent(child_pov_reduction)}\")\n", "print(f\"Relative reduction: {child_pov_pct_reduction:.2f}%\")\n", - "print(f\"Children lifted from poverty: {int(baseline_child_pov['people_in_poverty'] - reform_child_pov['people_in_poverty']):,}\")\n", + "children_lifted = baseline_child_pov['people_in_poverty'] - reform_child_pov['people_in_poverty']\n", + "print(f\"Children lifted from poverty: {children_lifted:,.0f}\")\n", "print(\"=\"*80)" ] }, @@ -366,7 +374,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 15, "metadata": {}, "outputs": [ { @@ -378,7 +386,7 @@ "PA EITC REFORM SUMMARY\n", "==============================================================================================================\n", "Scenario PA EITC Match Net Cost Overall Poverty Change (%) Child Poverty Change (%) % Population Winning\n", - " Reform 10% $221.73M 4.58% 1.29% 9.03%\n", + " Reform 10% $221.73M 2.44% 1.43% 23.20%\n", "==============================================================================================================\n", "\n", "✓ Exported to: pa_eitc_reform_results.csv\n" diff --git a/us/states/pa/pa_eitc_reform_results.csv b/us/states/pa/pa_eitc_reform_results.csv index 0c80080..7c4f0b1 100644 --- a/us/states/pa/pa_eitc_reform_results.csv +++ b/us/states/pa/pa_eitc_reform_results.csv @@ -1,2 +1,2 @@ Scenario,PA EITC Match,Net Cost,Overall Poverty Change (%),Child Poverty Change (%),% Population Winning -Reform,10%,$221.73M,4.58%,1.29%,9.03% +Reform,10%,$221.73M,2.44%,1.43%,23.20% From a4104d31f078a5372e59fc8fb7ed91a2e7bc25dc Mon Sep 17 00:00:00 2001 From: David Trimmer Date: Mon, 1 Dec 2025 15:27:00 -0500 Subject: [PATCH 5/5] household winners --- us/states/pa/pa_eitc_reform_analysis.ipynb | 41 ++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/us/states/pa/pa_eitc_reform_analysis.ipynb b/us/states/pa/pa_eitc_reform_analysis.ipynb index 8ff23aa..30b4f24 100644 --- a/us/states/pa/pa_eitc_reform_analysis.ipynb +++ b/us/states/pa/pa_eitc_reform_analysis.ipynb @@ -365,6 +365,47 @@ "print(\"=\"*80)" ] }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "======================================================================\n", + "HOUSEHOLDS BENEFITTING FROM PA EITC\n", + "======================================================================\n", + "Households benefitting: 742,696\n", + "Total households: 4,662,650\n", + "Percentage of households: 15.93%\n", + "======================================================================\n" + ] + } + ], + "source": [ + "# Calculate households benefitting (weighted)\n", + "baseline_hh_income = np.array(baseline.calculate(\"household_net_income\", period=2025, map_to=\"household\"))\n", + "reform_hh_income = np.array(reform_sim.calculate(\"household_net_income\", period=2025, map_to=\"household\"))\n", + "household_weight = np.array(baseline.calculate(\"household_weight\", period=2025))\n", + "\n", + "hh_income_change = reform_hh_income - baseline_hh_income\n", + "hh_benefitting_mask = hh_income_change > 1 # Gained more than $1\n", + "\n", + "households_benefitting = household_weight[hh_benefitting_mask].sum()\n", + "total_households = household_weight.sum()\n", + "pct_households_benefitting = (households_benefitting / total_households) * 100\n", + "\n", + "print(\"=\"*70)\n", + "print(\"HOUSEHOLDS BENEFITTING FROM PA EITC\")\n", + "print(\"=\"*70)\n", + "print(f\"Households benefitting: {households_benefitting:,.0f}\")\n", + "print(f\"Total households: {total_households:,.0f}\")\n", + "print(f\"Percentage of households: {pct_households_benefitting:.2f}%\")\n", + "print(\"=\"*70)" + ] + }, { "cell_type": "markdown", "metadata": {},