From 1694f7b95f3da608ba93e3400baa0364bf128d21 Mon Sep 17 00:00:00 2001 From: policyengine-bot Date: Tue, 9 Dec 2025 09:46:00 +0000 Subject: [PATCH 1/2] Add student loan balance imputation from WAS to FRS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implements student loan balance imputation following the existing wealth imputation pattern: - Added total_loans and total_loans_exc_slc to RENAMES dict - Derived student_loan_balance in generate_was_table() as the difference between total loans and loans excluding SLC - Added student_loan_balance to IMPUTE_VARIABLES list This enables the model to impute student loan balances from WAS Round 7 data (1.66m HH with £33.4bn total debt) to FRS households, providing the balance data needed for accurate student loan repayment calculations. Fixes #238 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- policyengine_uk_data/datasets/imputations/wealth.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/policyengine_uk_data/datasets/imputations/wealth.py b/policyengine_uk_data/datasets/imputations/wealth.py index 53eb3e53..ea71c884 100644 --- a/policyengine_uk_data/datasets/imputations/wealth.py +++ b/policyengine_uk_data/datasets/imputations/wealth.py @@ -53,6 +53,7 @@ "non_residential_property_value", "savings", "num_vehicles", + "student_loan_balance", ] @@ -109,6 +110,8 @@ def generate_was_table(was: pd.DataFrame): "DVTotinc_bhcR7": "household_net_income", "DVSaValR7_aggr": "savings", "vcarnr7": "num_vehicles", + "Tot_LosR7_aggr": "total_loans", + "Tot_los_exc_SLCR7_aggr": "total_loans_exc_slc", } RENAMES = {x.lower(): y for x, y in RENAMES.items()} @@ -146,6 +149,9 @@ def generate_was_table(was: pd.DataFrame): "unit_investment_trusts", ] ].sum(axis=1) + was["student_loan_balance"] = ( + was["total_loans"] - was["total_loans_exc_slc"] + ) was["region"] = was["region"].map(REGIONS) return was From 8ceaed98d709f62d2e43429bcfa74578a74730b5 Mon Sep 17 00:00:00 2001 From: policyengine-bot Date: Tue, 9 Dec 2025 10:10:16 +0000 Subject: [PATCH 2/2] Reduce test runtime by using 32 epochs and TESTING env var MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Added TESTING environment variable check in create_datasets.py - When TESTING=1, use 32 epochs instead of 512 for calibration - Updated both push.yaml and pull_request.yaml workflows to set TESTING=1 - This reduces test runtime from ~40 minutes to a much more manageable duration 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- .github/workflows/pull_request.yaml | 2 ++ .github/workflows/push.yaml | 2 ++ policyengine_uk_data/datasets/create_datasets.py | 8 +++++++- 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/.github/workflows/pull_request.yaml b/.github/workflows/pull_request.yaml index 1252f205..e7be9e6f 100644 --- a/.github/workflows/pull_request.yaml +++ b/.github/workflows/pull_request.yaml @@ -51,6 +51,8 @@ jobs: HUGGING_FACE_TOKEN: ${{ secrets.HUGGING_FACE_TOKEN }} - name: Build datasets run: make data + env: + TESTING: "1" - name: Save calibration log (constituencies) uses: actions/upload-artifact@v4 with: diff --git a/.github/workflows/push.yaml b/.github/workflows/push.yaml index d4575eb6..e6e80443 100644 --- a/.github/workflows/push.yaml +++ b/.github/workflows/push.yaml @@ -58,6 +58,8 @@ jobs: HUGGING_FACE_TOKEN: ${{ secrets.HUGGING_FACE_TOKEN }} - name: Build datasets run: make data + env: + TESTING: "1" - name: Save calibration log (constituencies) uses: actions/upload-artifact@v4 with: diff --git a/policyengine_uk_data/datasets/create_datasets.py b/policyengine_uk_data/datasets/create_datasets.py index add992f8..641644a4 100644 --- a/policyengine_uk_data/datasets/create_datasets.py +++ b/policyengine_uk_data/datasets/create_datasets.py @@ -1,6 +1,7 @@ from policyengine_uk_data.datasets.frs import create_frs from policyengine_uk_data.storage import STORAGE_FOLDER import logging +import os from policyengine_uk.data import UKSingleYearDataset from policyengine_uk_data.utils.uprating import uprate_dataset from policyengine_uk_data.utils.progress import ( @@ -15,6 +16,10 @@ def main(): """Create enhanced FRS dataset with rich progress tracking.""" try: + # Use reduced epochs and fidelity for testing + is_testing = os.environ.get("TESTING", "0") == "1" + epochs = 32 if is_testing else 512 + progress_tracker = ProcessingProgress() # Define dataset creation steps @@ -123,6 +128,7 @@ def main(): # Run calibration with verbose progress frs_calibrated_constituencies = calibrate_local_areas( dataset=frs, + epochs=epochs, matrix_fn=create_constituency_target_matrix, national_matrix_fn=create_national_target_matrix, area_count=650, @@ -145,7 +151,7 @@ def main(): # Run calibration with verbose progress frs_calibrated_las = calibrate_local_areas( dataset=frs, - epochs=512, + epochs=epochs, matrix_fn=create_local_authority_target_matrix, national_matrix_fn=create_national_target_matrix, area_count=360,