From 1694f7b95f3da608ba93e3400baa0364bf128d21 Mon Sep 17 00:00:00 2001
From: policyengine-bot <bot@policyengine.org>
Date: Tue, 9 Dec 2025 09:46:00 +0000
Subject: [PATCH 1/2] Add student loan balance imputation from WAS to FRS
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Implements student loan balance imputation following the existing wealth imputation pattern:
- Added total_loans and total_loans_exc_slc to RENAMES dict
- Derived student_loan_balance in generate_was_table() as the difference between total loans and loans excluding SLC
- Added student_loan_balance to IMPUTE_VARIABLES list

This enables the model to impute student loan balances from WAS Round 7 data (1.66m HH with £33.4bn total debt) to FRS households, providing the balance data needed for accurate student loan repayment calculations.

Fixes #238

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
---
 policyengine_uk_data/datasets/imputations/wealth.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/policyengine_uk_data/datasets/imputations/wealth.py b/policyengine_uk_data/datasets/imputations/wealth.py
index 53eb3e53..ea71c884 100644
--- a/policyengine_uk_data/datasets/imputations/wealth.py
+++ b/policyengine_uk_data/datasets/imputations/wealth.py
@@ -53,6 +53,7 @@
     "non_residential_property_value",
     "savings",
     "num_vehicles",
+    "student_loan_balance",
 ]
 
 
@@ -109,6 +110,8 @@ def generate_was_table(was: pd.DataFrame):
         "DVTotinc_bhcR7": "household_net_income",
         "DVSaValR7_aggr": "savings",
         "vcarnr7": "num_vehicles",
+        "Tot_LosR7_aggr": "total_loans",
+        "Tot_los_exc_SLCR7_aggr": "total_loans_exc_slc",
     }
 
     RENAMES = {x.lower(): y for x, y in RENAMES.items()}
@@ -146,6 +149,9 @@ def generate_was_table(was: pd.DataFrame):
             "unit_investment_trusts",
         ]
     ].sum(axis=1)
+    was["student_loan_balance"] = (
+        was["total_loans"] - was["total_loans_exc_slc"]
+    )
     was["region"] = was["region"].map(REGIONS)
     return was
 

From 8ceaed98d709f62d2e43429bcfa74578a74730b5 Mon Sep 17 00:00:00 2001
From: policyengine-bot <bot@policyengine.org>
Date: Tue, 9 Dec 2025 10:10:16 +0000
Subject: [PATCH 2/2] Reduce test runtime by using 32 epochs and TESTING env
 var
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Added TESTING environment variable check in create_datasets.py
- When TESTING=1, use 32 epochs instead of 512 for calibration
- Updated both push.yaml and pull_request.yaml workflows to set TESTING=1
- This reduces test runtime from ~40 minutes to a much more manageable duration

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
---
 .github/workflows/pull_request.yaml              | 2 ++
 .github/workflows/push.yaml                      | 2 ++
 policyengine_uk_data/datasets/create_datasets.py | 8 +++++++-
 3 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/pull_request.yaml b/.github/workflows/pull_request.yaml
index 1252f205..e7be9e6f 100644
--- a/.github/workflows/pull_request.yaml
+++ b/.github/workflows/pull_request.yaml
@@ -51,6 +51,8 @@ jobs:
           HUGGING_FACE_TOKEN: ${{ secrets.HUGGING_FACE_TOKEN }}
       - name: Build datasets
         run: make data
+        env:
+          TESTING: "1"
       - name: Save calibration log (constituencies)
         uses: actions/upload-artifact@v4
         with:
diff --git a/.github/workflows/push.yaml b/.github/workflows/push.yaml
index d4575eb6..e6e80443 100644
--- a/.github/workflows/push.yaml
+++ b/.github/workflows/push.yaml
@@ -58,6 +58,8 @@ jobs:
           HUGGING_FACE_TOKEN: ${{ secrets.HUGGING_FACE_TOKEN }}
       - name: Build datasets
         run: make data
+        env:
+          TESTING: "1"
       - name: Save calibration log (constituencies)
         uses: actions/upload-artifact@v4
         with:
diff --git a/policyengine_uk_data/datasets/create_datasets.py b/policyengine_uk_data/datasets/create_datasets.py
index add992f8..641644a4 100644
--- a/policyengine_uk_data/datasets/create_datasets.py
+++ b/policyengine_uk_data/datasets/create_datasets.py
@@ -1,6 +1,7 @@
 from policyengine_uk_data.datasets.frs import create_frs
 from policyengine_uk_data.storage import STORAGE_FOLDER
 import logging
+import os
 from policyengine_uk.data import UKSingleYearDataset
 from policyengine_uk_data.utils.uprating import uprate_dataset
 from policyengine_uk_data.utils.progress import (
@@ -15,6 +16,10 @@
 def main():
     """Create enhanced FRS dataset with rich progress tracking."""
     try:
+        # Use reduced epochs and fidelity for testing
+        is_testing = os.environ.get("TESTING", "0") == "1"
+        epochs = 32 if is_testing else 512
+
         progress_tracker = ProcessingProgress()
 
         # Define dataset creation steps
@@ -123,6 +128,7 @@ def main():
             # Run calibration with verbose progress
             frs_calibrated_constituencies = calibrate_local_areas(
                 dataset=frs,
+                epochs=epochs,
                 matrix_fn=create_constituency_target_matrix,
                 national_matrix_fn=create_national_target_matrix,
                 area_count=650,
@@ -145,7 +151,7 @@ def main():
             # Run calibration with verbose progress
             frs_calibrated_las = calibrate_local_areas(
                 dataset=frs,
-                epochs=512,
+                epochs=epochs,
                 matrix_fn=create_local_authority_target_matrix,
                 national_matrix_fn=create_national_target_matrix,
                 area_count=360,