diff --git a/obbba_district_impacts/Congressional-Hackathon-2025 b/obbba_district_impacts/Congressional-Hackathon-2025 new file mode 160000 index 0000000..3f6d05e --- /dev/null +++ b/obbba_district_impacts/Congressional-Hackathon-2025 @@ -0,0 +1 @@ +Subproject commit 3f6d05e76400c6e396a3a4eddd34a7b3f6919fc3 diff --git a/us/states/pa/data_exploration.ipynb b/us/states/pa/data_exploration.ipynb new file mode 100644 index 0000000..1438223 --- /dev/null +++ b/us/states/pa/data_exploration.ipynb @@ -0,0 +1,397 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# PA Dataset Exploration\n", + "\n", + "This notebook explores the Pennsylvania (PA) dataset to understand household counts, income distribution, and demographic characteristics." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from policyengine_us import Microsimulation\n", + "import pandas as pd\n", + "import numpy as np\n", + "\n", + "PA_DATASET = \"hf://policyengine/policyengine-us-data/states/PA.h5\"" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "5801ad26ee654449ab3be3dc62d09e8b", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "PA.h5: 0%| | 0.00/149M [00:00 0]['household_weight'].sum()\n", + "households_with_1_child = children_per_household[children_per_household['is_child'] == 1]['household_weight'].sum()\n", + "households_with_2_children = children_per_household[children_per_household['is_child'] == 2]['household_weight'].sum()\n", + "households_with_3plus_children = children_per_household[children_per_household['is_child'] >= 3]['household_weight'].sum()\n", + "\n", + "print(f\"\\nHouseholds with children (weighted):\")\n", + "print(f\" Total households with children: {total_households_with_children:,.0f}\")\n", + "print(f\" Households with 1 child: {households_with_1_child:,.0f}\")\n", + "print(f\" Households with 2 children: {households_with_2_children:,.0f}\")\n", + "print(f\" Households with 3+ children: {households_with_3plus_children:,.0f}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Children by age:\n", + " Total children under 18: 2,597,022\n", + " Children under 6: 799,168\n" + ] + } + ], + "source": [ + "# Check children by age groups\n", + "df = pd.DataFrame({\n", + " \"household_id\": sim.calculate(\"household_id\", map_to=\"person\"),\n", + " \"tax_unit_id\": sim.calculate(\"tax_unit_id\", map_to=\"person\"),\n", + " \"person_id\": sim.calculate(\"person_id\", map_to=\"person\"),\n", + " \"age\": sim.calculate(\"age\", map_to=\"person\"),\n", + " \"person_weight\": sim.calculate(\"person_weight\", map_to=\"person\")\n", + "})\n", + "\n", + "# Filter for children and apply weights\n", + "children_under_18_df = df[df['age'] < 18]\n", + "children_under_6_df = df[df['age'] < 6]\n", + "\n", + "# Calculate weighted totals\n", + "total_children = children_under_18_df['person_weight'].sum()\n", + "children_under_6 = children_under_6_df['person_weight'].sum()\n", + "\n", + "print(f\"\\nChildren by age:\")\n", + "print(f\" Total children under 18: {total_children:,.0f}\")\n", + "print(f\" Children under 6: {children_under_6:,.0f}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "============================================================\n", + "PA DATASET SUMMARY - WEIGHTED (Population Estimates)\n", + "============================================================\n", + " Metric Value\n", + " Household count (weighted) 4,662,650\n", + " Person count (weighted) 13,217,679\n", + " Median AGI $73,962\n", + " 75th percentile AGI $169,351\n", + " 90th percentile AGI $404,412\n", + " 95th percentile AGI $511,573\n", + " Max AGI $3,229,514\n", + "Total households with children 1,489,087\n", + " Households with 1 child 720,370\n", + " Households with 2 children 504,238\n", + " Households with 3+ children 264,479\n", + " Total children under 18 2,597,022\n", + " Children under 6 799,168\n", + "============================================================\n", + "\n", + "Summary saved to: pa_dataset_summary_weighted.csv\n" + ] + } + ], + "source": [ + "# Create weighted summary table\n", + "weighted_summary_data = {\n", + " 'Metric': [\n", + " 'Household count (weighted)',\n", + " 'Person count (weighted)',\n", + " 'Median AGI',\n", + " '75th percentile AGI',\n", + " '90th percentile AGI',\n", + " '95th percentile AGI',\n", + " 'Max AGI',\n", + " 'Total households with children',\n", + " 'Households with 1 child',\n", + " 'Households with 2 children',\n", + " 'Households with 3+ children',\n", + " 'Total children under 18',\n", + " 'Children under 6'\n", + " ],\n", + " 'Value': [\n", + " f\"{household_count.sum():,.0f}\",\n", + " f\"{person_count.sum():,.0f}\",\n", + " f\"${agi.median():,.0f}\",\n", + " f\"${agi.quantile(0.75):,.0f}\",\n", + " f\"${agi.quantile(0.90):,.0f}\",\n", + " f\"${agi.quantile(0.95):,.0f}\",\n", + " f\"${agi.max():,.0f}\",\n", + " f\"{total_households_with_children:,.0f}\",\n", + " f\"{households_with_1_child:,.0f}\",\n", + " f\"{households_with_2_children:,.0f}\",\n", + " f\"{households_with_3plus_children:,.0f}\",\n", + " f\"{total_children:,.0f}\",\n", + " f\"{children_under_6:,.0f}\"\n", + " ]\n", + "}\n", + "\n", + "weighted_df = pd.DataFrame(weighted_summary_data)\n", + "\n", + "print(\"\\n\" + \"=\"*60)\n", + "print(\"PA DATASET SUMMARY - WEIGHTED (Population Estimates)\")\n", + "print(\"=\"*60)\n", + "print(weighted_df.to_string(index=False))\n", + "print(\"=\"*60)\n", + "\n", + "# Save table\n", + "weighted_df.to_csv('pa_dataset_summary_weighted.csv', index=False)\n", + "print(\"\\nSummary saved to: pa_dataset_summary_weighted.csv\")" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "======================================================================\n", + "HOUSEHOLDS WITH $0 INCOME\n", + "======================================================================\n", + "Household count: 368,283\n", + "Percentage of all households: 7.90%\n", + "======================================================================\n" + ] + } + ], + "source": [ + "# Households with $0 income\n", + "agi_hh = np.array(sim.calculate(\"adjusted_gross_income\", period=2025, map_to=\"household\"))\n", + "weights = np.array(sim.calculate(\"household_weight\", period=2025))\n", + "\n", + "zero_income_mask = agi_hh == 0\n", + "zero_income_count = weights[zero_income_mask].sum()\n", + "total_households = weights.sum()\n", + "\n", + "print(\"\\n\" + \"=\"*70)\n", + "print(\"HOUSEHOLDS WITH $0 INCOME\")\n", + "print(\"=\"*70)\n", + "print(f\"Household count: {zero_income_count:,.0f}\")\n", + "print(f\"Percentage of all households: {zero_income_count / total_households * 100:.2f}%\")\n", + "print(\"=\"*70)" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "======================================================================\n", + "HOUSEHOLD COUNTS BY INCOME BRACKET\n", + "======================================================================\n", + "Income Bracket Households % of All Households\n", + " $0-$10k 786,029 16.86%\n", + " $10k-$20k 177,932 3.82%\n", + " $20k-$30k 151,871 3.26%\n", + " $30k-$40k 394,030 8.45%\n", + " $40k-$50k 240,967 5.17%\n", + " $50k-$60k 200,283 4.30%\n", + "======================================================================\n", + "\n", + "Total households in $0-$60k range: 1,951,112\n", + "Percentage of all households in $0-$60k range: 41.85%\n" + ] + } + ], + "source": [ + "# Household counts by income brackets\n", + "income_brackets = [\n", + " (0, 10000, \"$0-$10k\"),\n", + " (10000, 20000, \"$10k-$20k\"),\n", + " (20000, 30000, \"$20k-$30k\"),\n", + " (30000, 40000, \"$30k-$40k\"),\n", + " (40000, 50000, \"$40k-$50k\"),\n", + " (50000, 60000, \"$50k-$60k\")\n", + "]\n", + "\n", + "bracket_data = []\n", + "for lower, upper, label in income_brackets:\n", + " mask = (agi_hh >= lower) & (agi_hh < upper)\n", + " count = weights[mask].sum()\n", + " pct_of_total = (count / total_households) * 100\n", + " \n", + " bracket_data.append({\n", + " \"Income Bracket\": label,\n", + " \"Households\": f\"{count:,.0f}\",\n", + " \"% of All Households\": f\"{pct_of_total:.2f}%\"\n", + " })\n", + "\n", + "income_df = pd.DataFrame(bracket_data)\n", + "\n", + "print(\"\\n\" + \"=\"*70)\n", + "print(\"HOUSEHOLD COUNTS BY INCOME BRACKET\")\n", + "print(\"=\"*70)\n", + "print(income_df.to_string(index=False))\n", + "print(\"=\"*70)\n", + "\n", + "# Total in $0-$60k range\n", + "total_in_range = sum([weights[(agi_hh >= lower) & (agi_hh < upper)].sum() for lower, upper, _ in income_brackets])\n", + "print(f\"\\nTotal households in $0-$60k range: {total_in_range:,.0f}\")\n", + "print(f\"Percentage of all households in $0-$60k range: {total_in_range / total_households * 100:.2f}%\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.5" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/us/states/pa/pa_dataset_summary_weighted.csv b/us/states/pa/pa_dataset_summary_weighted.csv new file mode 100644 index 0000000..a81a10c --- /dev/null +++ b/us/states/pa/pa_dataset_summary_weighted.csv @@ -0,0 +1,14 @@ +Metric,Value +Household count (weighted),"4,662,650" +Person count (weighted),"13,217,679" +Median AGI,"$73,962" +75th percentile AGI,"$169,351" +90th percentile AGI,"$404,412" +95th percentile AGI,"$511,573" +Max AGI,"$3,229,514" +Total households with children,"1,489,087" +Households with 1 child,"720,370" +Households with 2 children,"504,238" +Households with 3+ children,"264,479" +Total children under 18,"2,597,022" +Children under 6,"799,168" diff --git a/us/states/pa/pa_eitc_reform_analysis.ipynb b/us/states/pa/pa_eitc_reform_analysis.ipynb new file mode 100644 index 0000000..30b4f24 --- /dev/null +++ b/us/states/pa/pa_eitc_reform_analysis.ipynb @@ -0,0 +1,491 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Pennsylvania EITC Reform Analysis (2025)\n", + "\n", + "This notebook analyzes the impact of Pennsylvania's Working Pennsylvanians Tax Credit (EITC).\n", + "\n", + "## Baseline\n", + "- PA EITC is set to 0% (no state EITC)\n", + "\n", + "## Reform (Current Law)\n", + "- PA EITC matches 10% of the federal EITC\n", + "\n", + "## Metrics\n", + "We calculate:\n", + "- Budgetary impact (net cost)\n", + "- Winners (percentage of population affected)\n", + "- Overall poverty impact\n", + "- Child poverty impact" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "from policyengine_us import Microsimulation\n", + "from policyengine_core.reforms import Reform\n", + "import pandas as pd\n", + "import numpy as np\n", + "\n", + "PA_DATASET = \"hf://policyengine/policyengine-us-data/states/PA.h5\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Helper Functions" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "def calculate_poverty(sim, period=2025, child_only=False):\n", + " \"\"\"\n", + " Calculate poverty rate and count.\n", + " \n", + " Args:\n", + " sim: Microsimulation object\n", + " period: Year to analyze\n", + " child_only: If True, only count children under 18\n", + " \n", + " Returns:\n", + " poverty_rate: Weighted poverty rate\n", + " people_in_poverty: Weighted count\n", + " \"\"\"\n", + " age = np.array(sim.calculate(\"age\", period=period))\n", + " is_in_poverty = np.array(sim.calculate(\"person_in_poverty\", period=period))\n", + " person_weight = np.array(sim.calculate(\"person_weight\", period=period))\n", + " \n", + " if child_only:\n", + " mask = age < 18\n", + " else:\n", + " mask = np.ones_like(age, dtype=bool)\n", + " \n", + " # Weighted poverty rate\n", + " weighted_in_poverty = (is_in_poverty[mask] * person_weight[mask]).sum()\n", + " weighted_total = person_weight[mask].sum()\n", + " poverty_rate = weighted_in_poverty / weighted_total if weighted_total > 0 else 0\n", + " \n", + " # Weighted count of people in poverty\n", + " people_in_poverty = weighted_in_poverty\n", + " \n", + " return {\n", + " \"poverty_rate\": poverty_rate,\n", + " \"people_in_poverty\": people_in_poverty,\n", + " \"total_people\": weighted_total\n", + " }\n", + "\n", + "def calculate_budgetary_impact(baseline_sim, reform_sim, variable, period=2025):\n", + " \"\"\"\n", + " Calculate the budgetary impact (net cost) of a reform.\n", + " \"\"\"\n", + " baseline_value = baseline_sim.calculate(variable, period=period, map_to=\"household\").sum()\n", + " reform_value = reform_sim.calculate(variable, period=period, map_to=\"household\").sum()\n", + " \n", + " return reform_value - baseline_value\n", + "\n", + "def calculate_winners(baseline_sim, reform_sim, period=2025):\n", + " \"\"\"\n", + " Calculate winners from a reform at the person level (weighted).\n", + " Winners: People in households with higher net income under reform.\n", + " Returns weighted count and percentage of total population.\n", + " \"\"\"\n", + " # Get household-level income change\n", + " baseline_income = np.array(baseline_sim.calculate(\"household_net_income\", period=period, map_to=\"household\"))\n", + " reform_income = np.array(reform_sim.calculate(\"household_net_income\", period=period, map_to=\"household\"))\n", + " household_weight = np.array(baseline_sim.calculate(\"household_weight\", period=period))\n", + " income_change = reform_income - baseline_income\n", + " \n", + " # Get person-level data\n", + " household_id_person = np.array(baseline_sim.calculate(\"household_id\", period=period, map_to=\"person\"))\n", + " household_id_household = np.array(baseline_sim.calculate(\"household_id\", period=period, map_to=\"household\"))\n", + " person_weight = np.array(baseline_sim.calculate(\"person_weight\", period=period))\n", + " \n", + " # Create mapping of household_id to income_change\n", + " income_change_dict = dict(zip(household_id_household, income_change))\n", + " \n", + " # Map income change to each person\n", + " person_income_change = np.array([income_change_dict.get(hh_id, 0) for hh_id in household_id_person])\n", + " \n", + " # Weighted count of people who are winners (gained more than $1)\n", + " winners_mask = person_income_change > 1\n", + " people_winning = person_weight[winners_mask].sum()\n", + " total_people = person_weight.sum()\n", + " \n", + " # Calculate percentage\n", + " pct_winners = (people_winning / total_people * 100) if total_people > 0 else 0\n", + " \n", + " # Average gain for winning households (weighted)\n", + " winning_hh_mask = income_change > 1\n", + " if winning_hh_mask.sum() > 0:\n", + " avg_gain = np.average(income_change[winning_hh_mask], weights=household_weight[winning_hh_mask])\n", + " else:\n", + " avg_gain = 0\n", + " \n", + " return {\n", + " \"people_winning\": people_winning,\n", + " \"total_people\": total_people,\n", + " \"pct_winners\": pct_winners,\n", + " \"avg_gain\": avg_gain\n", + " }\n", + "\n", + "def format_currency(value):\n", + " \"\"\"Format value as currency in millions.\"\"\"\n", + " return f\"${value/1e6:.2f}M\"\n", + "\n", + "def format_percent(value):\n", + " \"\"\"Format value as percentage.\"\"\"\n", + " return f\"{value*100:.2f}%\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Define Baseline and Reform" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Reform functions defined!\n" + ] + } + ], + "source": [ + "def create_baseline():\n", + " \"\"\"Baseline: PA EITC at 0%\"\"\"\n", + " return Reform.from_dict(\n", + " {\n", + " \"gov.states.pa.tax.income.credits.eitc.match\": {\n", + " \"2025-01-01.2100-12-31\": 0.0\n", + " }\n", + " },\n", + " country_id=\"us\",\n", + " )\n", + "\n", + "def create_reform():\n", + " \"\"\"Reform: PA EITC at 10% (current law)\"\"\"\n", + " return Reform.from_dict(\n", + " {\n", + " \"gov.states.pa.tax.income.credits.eitc.match\": {\n", + " \"2025-01-01.2100-12-31\": 0.1\n", + " }\n", + " },\n", + " country_id=\"us\",\n", + " )\n", + "\n", + "print(\"Reform functions defined!\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Load Simulations" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Loading baseline (PA EITC at 0%)...\n", + "✓ Baseline loaded\n", + "\n", + "Loading reform (PA EITC at 10%)...\n", + "✓ Reform loaded\n", + "\n", + "============================================================\n", + "All simulations ready!\n", + "============================================================\n" + ] + } + ], + "source": [ + "print(\"Loading baseline (PA EITC at 0%)...\")\n", + "baseline_reform = create_baseline()\n", + "baseline = Microsimulation(dataset=PA_DATASET, reform=baseline_reform)\n", + "print(\"✓ Baseline loaded\")\n", + "\n", + "print(\"\\nLoading reform (PA EITC at 10%)...\")\n", + "reform = create_reform()\n", + "reform_sim = Microsimulation(dataset=PA_DATASET, reform=reform)\n", + "print(\"✓ Reform loaded\")\n", + "\n", + "print(\"\\n\" + \"=\"*60)\n", + "print(\"All simulations ready!\")\n", + "print(\"=\"*60)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Calculate Impacts" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Invalid values for enum StateGroup: ['PA']. These will be encoded as index 0.\n", + "Invalid values for enum StateGroup: ['PA']. These will be encoded as index 0.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "✓ All impacts calculated\n" + ] + } + ], + "source": [ + "# Baseline metrics\n", + "baseline_overall_pov = calculate_poverty(baseline, child_only=False)\n", + "baseline_child_pov = calculate_poverty(baseline, child_only=True)\n", + "\n", + "# Reform metrics\n", + "reform_overall_pov = calculate_poverty(reform_sim, child_only=False)\n", + "reform_child_pov = calculate_poverty(reform_sim, child_only=True)\n", + "\n", + "# Budgetary impact\n", + "eitc_cost = calculate_budgetary_impact(baseline, reform_sim, \"pa_eitc\")\n", + "\n", + "# Winners (at person level)\n", + "winners = calculate_winners(baseline, reform_sim)\n", + "\n", + "print(\"✓ All impacts calculated\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Results Summary" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "================================================================================\n", + "PA EITC REFORM IMPACTS (2025)\n", + "Baseline: PA EITC at 0% | Reform: PA EITC at 10% federal match\n", + "================================================================================\n", + "\n", + "================================BUDGETARY IMPACT================================\n", + "PA EITC net cost: $221.73M\n", + "\n", + "==============================WINNERS (POPULATION)==============================\n", + "People gaining income: 3,066,550 (23.20% of population)\n", + "Average gain per household: $298.55\n", + "\n", + "============================POVERTY IMPACT - OVERALL============================\n", + "Baseline poverty rate: 13.54%\n", + "Reform poverty rate: 13.21%\n", + "Absolute reduction: 0.33%\n", + "Relative reduction: 2.44%\n", + "People lifted from poverty: 43,703\n", + "\n", + "===========================POVERTY IMPACT - CHILDREN============================\n", + "Baseline child poverty rate: 11.31%\n", + "Reform child poverty rate: 11.15%\n", + "Absolute reduction: 0.16%\n", + "Relative reduction: 1.43%\n", + "Children lifted from poverty: 4,277\n", + "================================================================================\n" + ] + } + ], + "source": [ + "print(\"\\n\" + \"=\"*80)\n", + "print(\"PA EITC REFORM IMPACTS (2025)\")\n", + "print(\"Baseline: PA EITC at 0% | Reform: PA EITC at 10% federal match\")\n", + "print(\"=\"*80)\n", + "\n", + "print(f\"\\n{'BUDGETARY IMPACT':=^80}\")\n", + "print(f\"PA EITC net cost: {format_currency(eitc_cost)}\")\n", + "\n", + "print(f\"\\n{'WINNERS (POPULATION)':=^80}\")\n", + "print(f\"People gaining income: {winners['people_winning']:,.0f} ({winners['pct_winners']:.2f}% of population)\")\n", + "print(f\"Average gain per household: ${winners['avg_gain']:,.2f}\")\n", + "\n", + "print(f\"\\n{'POVERTY IMPACT - OVERALL':=^80}\")\n", + "print(f\"Baseline poverty rate: {format_percent(baseline_overall_pov['poverty_rate'])}\")\n", + "print(f\"Reform poverty rate: {format_percent(reform_overall_pov['poverty_rate'])}\")\n", + "overall_pov_reduction = baseline_overall_pov['poverty_rate'] - reform_overall_pov['poverty_rate']\n", + "overall_pov_pct_reduction = (overall_pov_reduction / baseline_overall_pov['poverty_rate'] * 100) if baseline_overall_pov['poverty_rate'] > 0 else 0\n", + "print(f\"Absolute reduction: {format_percent(overall_pov_reduction)}\")\n", + "print(f\"Relative reduction: {overall_pov_pct_reduction:.2f}%\")\n", + "people_lifted = baseline_overall_pov['people_in_poverty'] - reform_overall_pov['people_in_poverty']\n", + "print(f\"People lifted from poverty: {people_lifted:,.0f}\")\n", + "\n", + "print(f\"\\n{'POVERTY IMPACT - CHILDREN':=^80}\")\n", + "print(f\"Baseline child poverty rate: {format_percent(baseline_child_pov['poverty_rate'])}\")\n", + "print(f\"Reform child poverty rate: {format_percent(reform_child_pov['poverty_rate'])}\")\n", + "child_pov_reduction = baseline_child_pov['poverty_rate'] - reform_child_pov['poverty_rate']\n", + "child_pov_pct_reduction = (child_pov_reduction / baseline_child_pov['poverty_rate'] * 100) if baseline_child_pov['poverty_rate'] > 0 else 0\n", + "print(f\"Absolute reduction: {format_percent(child_pov_reduction)}\")\n", + "print(f\"Relative reduction: {child_pov_pct_reduction:.2f}%\")\n", + "children_lifted = baseline_child_pov['people_in_poverty'] - reform_child_pov['people_in_poverty']\n", + "print(f\"Children lifted from poverty: {children_lifted:,.0f}\")\n", + "print(\"=\"*80)" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "======================================================================\n", + "HOUSEHOLDS BENEFITTING FROM PA EITC\n", + "======================================================================\n", + "Households benefitting: 742,696\n", + "Total households: 4,662,650\n", + "Percentage of households: 15.93%\n", + "======================================================================\n" + ] + } + ], + "source": [ + "# Calculate households benefitting (weighted)\n", + "baseline_hh_income = np.array(baseline.calculate(\"household_net_income\", period=2025, map_to=\"household\"))\n", + "reform_hh_income = np.array(reform_sim.calculate(\"household_net_income\", period=2025, map_to=\"household\"))\n", + "household_weight = np.array(baseline.calculate(\"household_weight\", period=2025))\n", + "\n", + "hh_income_change = reform_hh_income - baseline_hh_income\n", + "hh_benefitting_mask = hh_income_change > 1 # Gained more than $1\n", + "\n", + "households_benefitting = household_weight[hh_benefitting_mask].sum()\n", + "total_households = household_weight.sum()\n", + "pct_households_benefitting = (households_benefitting / total_households) * 100\n", + "\n", + "print(\"=\"*70)\n", + "print(\"HOUSEHOLDS BENEFITTING FROM PA EITC\")\n", + "print(\"=\"*70)\n", + "print(f\"Households benefitting: {households_benefitting:,.0f}\")\n", + "print(f\"Total households: {total_households:,.0f}\")\n", + "print(f\"Percentage of households: {pct_households_benefitting:.2f}%\")\n", + "print(\"=\"*70)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Export Results" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "==============================================================================================================\n", + "PA EITC REFORM SUMMARY\n", + "==============================================================================================================\n", + "Scenario PA EITC Match Net Cost Overall Poverty Change (%) Child Poverty Change (%) % Population Winning\n", + " Reform 10% $221.73M 2.44% 1.43% 23.20%\n", + "==============================================================================================================\n", + "\n", + "✓ Exported to: pa_eitc_reform_results.csv\n" + ] + } + ], + "source": [ + "# Calculate poverty changes\n", + "overall_pov_reduction = baseline_overall_pov['poverty_rate'] - reform_overall_pov['poverty_rate']\n", + "overall_pov_pct_reduction = (overall_pov_reduction / baseline_overall_pov['poverty_rate'] * 100) if baseline_overall_pov['poverty_rate'] > 0 else 0\n", + "child_pov_reduction = baseline_child_pov['poverty_rate'] - reform_child_pov['poverty_rate']\n", + "child_pov_pct_reduction = (child_pov_reduction / baseline_child_pov['poverty_rate'] * 100) if baseline_child_pov['poverty_rate'] > 0 else 0\n", + "\n", + "# Create results DataFrame (reform only)\n", + "results = [\n", + " {\n", + " \"Scenario\": \"Reform\",\n", + " \"PA EITC Match\": \"10%\",\n", + " \"Net Cost\": format_currency(eitc_cost),\n", + " \"Overall Poverty Change (%)\": f\"{overall_pov_pct_reduction:.2f}%\",\n", + " \"Child Poverty Change (%)\": f\"{child_pov_pct_reduction:.2f}%\",\n", + " \"% Population Winning\": f\"{winners['pct_winners']:.2f}%\"\n", + " }\n", + "]\n", + "\n", + "df_results = pd.DataFrame(results)\n", + "\n", + "print(\"\\n\" + \"=\"*110)\n", + "print(\"PA EITC REFORM SUMMARY\")\n", + "print(\"=\"*110)\n", + "print(df_results.to_string(index=False))\n", + "print(\"=\"*110)\n", + "\n", + "# Export to CSV\n", + "df_results.to_csv(\"pa_eitc_reform_results.csv\", index=False)\n", + "print(\"\\n✓ Exported to: pa_eitc_reform_results.csv\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.5" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/us/states/pa/pa_eitc_reform_results.csv b/us/states/pa/pa_eitc_reform_results.csv new file mode 100644 index 0000000..7c4f0b1 --- /dev/null +++ b/us/states/pa/pa_eitc_reform_results.csv @@ -0,0 +1,2 @@ +Scenario,PA EITC Match,Net Cost,Overall Poverty Change (%),Child Poverty Change (%),% Population Winning +Reform,10%,$221.73M,2.44%,1.43%,23.20% diff --git a/us/states/pa/test_dataset.ipynb b/us/states/pa/test_dataset.ipynb new file mode 100644 index 0000000..09e4470 --- /dev/null +++ b/us/states/pa/test_dataset.ipynb @@ -0,0 +1,62 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "cell-0", + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "194e07960bcb4dffbe230453d20cdfd5", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "PA.h5: 0%| | 0.00/149M [00:00