diff --git a/.github/workflows/pull_request.yaml b/.github/workflows/pull_request.yaml index 1252f205..e7be9e6f 100644 --- a/.github/workflows/pull_request.yaml +++ b/.github/workflows/pull_request.yaml @@ -51,6 +51,8 @@ jobs: HUGGING_FACE_TOKEN: ${{ secrets.HUGGING_FACE_TOKEN }} - name: Build datasets run: make data + env: + TESTING: "1" - name: Save calibration log (constituencies) uses: actions/upload-artifact@v4 with: diff --git a/.github/workflows/push.yaml b/.github/workflows/push.yaml index d4575eb6..e6e80443 100644 --- a/.github/workflows/push.yaml +++ b/.github/workflows/push.yaml @@ -58,6 +58,8 @@ jobs: HUGGING_FACE_TOKEN: ${{ secrets.HUGGING_FACE_TOKEN }} - name: Build datasets run: make data + env: + TESTING: "1" - name: Save calibration log (constituencies) uses: actions/upload-artifact@v4 with: diff --git a/policyengine_uk_data/datasets/create_datasets.py b/policyengine_uk_data/datasets/create_datasets.py index add992f8..641644a4 100644 --- a/policyengine_uk_data/datasets/create_datasets.py +++ b/policyengine_uk_data/datasets/create_datasets.py @@ -1,6 +1,7 @@ from policyengine_uk_data.datasets.frs import create_frs from policyengine_uk_data.storage import STORAGE_FOLDER import logging +import os from policyengine_uk.data import UKSingleYearDataset from policyengine_uk_data.utils.uprating import uprate_dataset from policyengine_uk_data.utils.progress import ( @@ -15,6 +16,10 @@ def main(): """Create enhanced FRS dataset with rich progress tracking.""" try: + # Use reduced epochs and fidelity for testing + is_testing = os.environ.get("TESTING", "0") == "1" + epochs = 32 if is_testing else 512 + progress_tracker = ProcessingProgress() # Define dataset creation steps @@ -123,6 +128,7 @@ def main(): # Run calibration with verbose progress frs_calibrated_constituencies = calibrate_local_areas( dataset=frs, + epochs=epochs, matrix_fn=create_constituency_target_matrix, national_matrix_fn=create_national_target_matrix, area_count=650, @@ -145,7 +151,7 @@ def main(): # Run calibration with verbose progress frs_calibrated_las = calibrate_local_areas( dataset=frs, - epochs=512, + epochs=epochs, matrix_fn=create_local_authority_target_matrix, national_matrix_fn=create_national_target_matrix, area_count=360, diff --git a/policyengine_uk_data/datasets/imputations/wealth.py b/policyengine_uk_data/datasets/imputations/wealth.py index 53eb3e53..ea71c884 100644 --- a/policyengine_uk_data/datasets/imputations/wealth.py +++ b/policyengine_uk_data/datasets/imputations/wealth.py @@ -53,6 +53,7 @@ "non_residential_property_value", "savings", "num_vehicles", + "student_loan_balance", ] @@ -109,6 +110,8 @@ def generate_was_table(was: pd.DataFrame): "DVTotinc_bhcR7": "household_net_income", "DVSaValR7_aggr": "savings", "vcarnr7": "num_vehicles", + "Tot_LosR7_aggr": "total_loans", + "Tot_los_exc_SLCR7_aggr": "total_loans_exc_slc", } RENAMES = {x.lower(): y for x, y in RENAMES.items()} @@ -146,6 +149,9 @@ def generate_was_table(was: pd.DataFrame): "unit_investment_trusts", ] ].sum(axis=1) + was["student_loan_balance"] = ( + was["total_loans"] - was["total_loans_exc_slc"] + ) was["region"] = was["region"].map(REGIONS) return was