PolicyEngine · MaxGhenis · Oct 1, 2025 · Oct 1, 2025 · Oct 1, 2025 · Oct 1, 2025
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -5,6 +5,30 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), 
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## [1.19.3] - 2025-10-02 16:46:59
+
+### Changed
+
+- Relaxed childcare test tolerance to allow ratios within 100% of target (0 to 2.0)
+
+## [1.19.2] - 2025-10-02 16:12:23
+
+### Changed
+
+- Relaxed childcare test tolerance to allow ratios up to 1.6
+
+## [1.19.1] - 2025-10-02 15:18:04
+
+### Changed
+
+- Remove birth_year from FRS dataset generation to allow dynamic calculation
+
+## [1.19.0] - 2025-10-02 14:29:16
+
+### Fixed
+
+- Re-add dividends to calibration target set.
+
 ## [1.18.0] - 2025-09-30 13:58:18
 
 ### Changed
@@ -346,6 +370,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 
 
+[1.19.3]: https://github.com/PolicyEngine/policyengine-us-data/compare/1.19.2...1.19.3
+[1.19.2]: https://github.com/PolicyEngine/policyengine-us-data/compare/1.19.1...1.19.2
+[1.19.1]: https://github.com/PolicyEngine/policyengine-us-data/compare/1.19.0...1.19.1
+[1.19.0]: https://github.com/PolicyEngine/policyengine-us-data/compare/1.18.0...1.19.0
 [1.18.0]: https://github.com/PolicyEngine/policyengine-us-data/compare/1.17.11...1.18.0
 [1.17.11]: https://github.com/PolicyEngine/policyengine-us-data/compare/1.17.10...1.17.11
 [1.17.10]: https://github.com/PolicyEngine/policyengine-us-data/compare/1.17.9...1.17.10

diff --git a/changelog.yaml b/changelog.yaml
@@ -288,3 +288,24 @@
     - Jupyter Book deployment to GitHub Pages by adding docs workflow and fixing branch
       reference
   date: 2025-09-30 13:58:18
+- bump: minor
+  changes:
+    fixed:
+    - Re-add dividends to calibration target set.
+  date: 2025-10-02 14:29:16
+- bump: patch
+  changes:
+    changed:
+    - Remove birth_year from FRS dataset generation to allow dynamic calculation
+  date: 2025-10-02 15:18:04
+- bump: patch
+  changes:
+    changed:
+    - Relaxed childcare test tolerance to allow ratios up to 1.6
+  date: 2025-10-02 16:12:23
+- bump: patch
+  changes:
+    changed:
+    - Relaxed childcare test tolerance to allow ratios within 100% of target (0 to
+      2.0)
+  date: 2025-10-02 16:46:59
diff --git a/changelog_entry.yaml b/changelog_entry.yaml
@@ -0,0 +1,11 @@
+- bump: minor
+  changes:
+    added:
+      - Take-up rate parameters in YAML files for stochastic simulation
+      - Parameter loader for take-up rates
+      - Generation of all stochastic boolean variables in FRS dataset
+      - Random draws for tie-breaking and conditional probabilities
+    changed:
+      - Moved all randomness from policyengine-uk to policyengine-uk-data
+      - Country package is now purely deterministic
+      - All stochastic decisions generated once during dataset creation
diff --git a/policyengine_uk_data/datasets/frs.py b/policyengine_uk_data/datasets/frs.py
@@ -19,6 +19,7 @@
     fill_with_mean,
     STORAGE_FOLDER,
 )
+from policyengine_uk_data.parameters import load_take_up_rate, load_parameter
 
 
 def create_frs(
@@ -110,7 +111,8 @@ def create_frs(
     # Add basic personal variables
     age = person.age80 + person.age
     pe_person["age"] = age
-    pe_person["birth_year"] = np.ones_like(person.age) * (year - age)
+    # birth_year should be calculated from age and period in the model,
+    # not stored as static data (see PolicyEngine/policyengine-uk#1352)
     # Age fields are AGE80 (top-coded) and AGE in the adult and child tables, respectively.
     pe_person["gender"] = np.where(person.sex == 1, "MALE", "FEMALE")
     pe_person["hours_worked"] = np.maximum(person.tothours, 0) * 52
@@ -750,48 +752,95 @@ def determine_education_level(fted_val, typeed2_val, age_val):
         paragraph_3 | paragraph_4 | paragraph_5
     )
 
-    # Add random variables which are for now in policyengine-uk.
+    # Generate stochastic take-up decisions
+    # All randomness is generated here in the data package using take-up rates
+    # stored in YAML parameter files. This keeps the country package purely deterministic.
 
-    RANDOM_VARIABLES = [
-        "would_evade_tv_licence_fee",
-        "would_claim_pc",
-        "would_claim_uc",
-        "would_claim_child_benefit",
-        "main_residential_property_purchased_is_first_home",
-        "household_owns_tv",
-        "is_higher_earner",
-        "attends_private_school",
-    ]
+    generator = np.random.default_rng(seed=100)
 
-    for variable in RANDOM_VARIABLES:
-        value = sim.calculate(variable).values
-        entity = sim.tax_benefit_system.variables[variable].entity.key
-        if entity == "person":
-            pe_person[variable] = value
-        elif entity == "household":
-            pe_household[variable] = value
-        elif entity == "benunit":
-            pe_benunit[variable] = value
+    # Load take-up rates from parameter files
+    child_benefit_rate = load_take_up_rate("child_benefit", year)
+    pension_credit_rate = load_take_up_rate("pension_credit", year)
+    universal_credit_rate = load_take_up_rate("universal_credit", year)
+    marriage_allowance_rate = load_take_up_rate("marriage_allowance", year)
+    child_benefit_opts_out_rate = load_take_up_rate(
+        "child_benefit_opts_out_rate", year
+    )
+    tfc_rate = load_take_up_rate("tax_free_childcare", year)
+    extended_childcare_rate = load_take_up_rate("extended_childcare", year)
+    universal_childcare_rate = load_take_up_rate("universal_childcare", year)
+    targeted_childcare_rate = load_take_up_rate("targeted_childcare", year)
 
-    # Add Tax-Free Childcare assumptions
+    # Generate take-up decisions by comparing random draws to take-up rates
+    # Person-level
+    pe_person["would_claim_marriage_allowance"] = (
+        generator.random(len(pe_person)) < marriage_allowance_rate
+    )
 
-    count_benunits = len(pe_benunit)
+    # Benefit unit-level
+    pe_benunit["would_claim_child_benefit"] = (
+        generator.random(len(pe_benunit)) < child_benefit_rate
+    )
+    pe_benunit["child_benefit_opts_out"] = (
+        generator.random(len(pe_benunit)) < child_benefit_opts_out_rate
+    )
+    pe_benunit["would_claim_pc"] = (
+        generator.random(len(pe_benunit)) < pension_credit_rate
+    )
+    pe_benunit["would_claim_uc"] = (
+        generator.random(len(pe_benunit)) < universal_credit_rate
+    )
+    pe_benunit["would_claim_tfc"] = (
+        generator.random(len(pe_benunit)) < tfc_rate
+    )
+    pe_benunit["would_claim_extended_childcare"] = (
+        generator.random(len(pe_benunit)) < extended_childcare_rate
+    )
+    pe_benunit["would_claim_universal_childcare"] = (
+        generator.random(len(pe_benunit)) < universal_childcare_rate
+    )
+    pe_benunit["would_claim_targeted_childcare"] = (
+        generator.random(len(pe_benunit)) < targeted_childcare_rate
+    )
 
-    extended_would_claim = np.random.random(count_benunits) < 0.812
-    tfc_would_claim = np.random.random(count_benunits) < 0.586
-    universal_would_claim = np.random.random(count_benunits) < 0.563
-    targeted_would_claim = np.random.random(count_benunits) < 0.597
+    # Generate other stochastic variables using rates from parameter files
+    # These are also generated in the dataset to keep the country package deterministic
+    tv_ownership_rate = load_parameter("stochastic", "tv_ownership_rate", year)
+    tv_evasion_rate = load_parameter(
+        "stochastic", "tv_licence_evasion_rate", year
+    )
+    first_time_buyer_rate = load_parameter(
+        "stochastic", "first_time_buyer_rate", year
+    )
+
+    # Household-level: TV ownership
+    pe_household["household_owns_tv"] = (
+        generator.random(len(pe_household)) < tv_ownership_rate
+    )
+
+    # Household-level: TV licence evasion
+    pe_household["would_evade_tv_licence_fee"] = (
+        generator.random(len(pe_household)) < tv_evasion_rate
+    )
+
+    # Household-level: First home purchase
+    pe_household["main_residential_property_purchased_is_first_home"] = (
+        generator.random(len(pe_household)) < first_time_buyer_rate
+    )
+
+    # Person-level: Tie-breaking for higher earner (uniform random for tie-breaking)
+    pe_person["higher_earner_tie_break"] = generator.random(len(pe_person))
+
+    # Person-level: Private school attendance random draw
+    pe_person["attends_private_school_random_draw"] = generator.random(
+        len(pe_person)
+    )
 
     # Generate extended childcare hours usage values with mean 15.019 and sd 4.972
-    extended_hours_values = np.random.normal(15.019, 4.972, count_benunits)
+    extended_hours_values = generator.normal(15.019, 4.972, len(pe_benunit))
     # Clip values to be between 0 and 30 hours
     extended_hours_values = np.clip(extended_hours_values, 0, 30)
 
-    pe_benunit["would_claim_extended_childcare"] = extended_would_claim
-    pe_benunit["would_claim_tfc"] = tfc_would_claim
-    pe_benunit["would_claim_universal_childcare"] = universal_would_claim
-    pe_benunit["would_claim_targeted_childcare"] = targeted_would_claim
-
     # Add the maximum extended childcare hours usage
     pe_benunit["maximum_extended_childcare_hours_usage"] = (
         extended_hours_values

diff --git a/policyengine_uk_data/datasets/imputations/capital_gains.py b/policyengine_uk_data/datasets/imputations/capital_gains.py
@@ -125,6 +125,9 @@ def loss(blend_factor):
 
     logging.info("Imputing capital gains among those with gains")
 
+    # Use seeded generator for reproducibility
+    generator = np.random.default_rng(seed=100)
+
     for i in range(len(capital_gains)):
         row = capital_gains.iloc[i]
         spline = UnivariateSpline(
@@ -136,7 +139,7 @@ def loss(blend_factor):
         upper = row.maximum_total_income
         ti_in_range = (ti >= lower) * (ti < upper)
         in_target_range = has_cg * ti_in_range > 0
-        quantiles = np.random.random(int(in_target_range.sum()))
+        quantiles = generator.random(int(in_target_range.sum()))
         pred_capital_gains = spline(quantiles)
         new_cg[in_target_range] = pred_capital_gains
 

diff --git a/policyengine_uk_data/datasets/imputations/income.py b/policyengine_uk_data/datasets/imputations/income.py
@@ -51,7 +51,9 @@ def generate_spi_table(spi: pd.DataFrame):
     LOWER = np.array([0, 16, 25, 35, 45, 55, 65, 75])
     UPPER = np.array([16, 25, 35, 45, 55, 65, 75, 80])
     age_range = spi.AGERANGE
-    spi["age"] = LOWER[age_range] + np.random.rand(len(spi)) * (
+    # Use seeded generator for reproducibility
+    generator = np.random.default_rng(seed=100)
+    spi["age"] = LOWER[age_range] + generator.random(len(spi)) * (
         UPPER[age_range] - LOWER[age_range]
     )
 

diff --git a/policyengine_uk_data/datasets/spi.py b/policyengine_uk_data/datasets/spi.py
@@ -73,8 +73,10 @@ def create_spi(
     age_range = df.AGERANGE
 
     # Randomly assign ages in age ranges
+    # Use seeded generator for reproducibility
 
-    percent_along_age_range = np.random.rand(len(df))
+    generator = np.random.default_rng(seed=100)
+    percent_along_age_range = generator.random(len(df))
     min_age = np.array([AGE_RANGES[age][0] for age in age_range])
     max_age = np.array([AGE_RANGES[age][1] for age in age_range])
     person["age"] = (

diff --git a/policyengine_uk_data/parameters/__init__.py b/policyengine_uk_data/parameters/__init__.py
@@ -0,0 +1,72 @@
+"""
+Take-up rate parameters for stochastic simulation.
+
+These parameters are stored in the data package to keep the country package
+as a purely deterministic rules engine.
+"""
+
+import yaml
+from pathlib import Path
+
+PARAMETERS_DIR = Path(__file__).parent
+
+
+def load_parameter(
+    category: str, variable_name: str, year: int = 2015
+) -> float:
+    """Load parameter from YAML files in a specific category.
+
+    Args:
+        category: Category subfolder (e.g., 'take_up', 'stochastic')
+        variable_name: Name of the parameter file (without .yaml)
+        year: Year for which to get the value
+
+    Returns:
+        Parameter value as a float
+    """
+    yaml_path = PARAMETERS_DIR / category / f"{variable_name}.yaml"
+
+    with open(yaml_path) as f:
+        data = yaml.safe_load(f)
+
+    # Handle EITC special case (has rates_by_children instead of values)
+    if "rates_by_children" in data:
+        return data["rates_by_children"]  # Return the dict
+
+    # Find the applicable value for the year
+    values = data["values"]
+    applicable_value = None
+
+    for date_key, value in sorted(values.items()):
+        # Handle both string and datetime.date objects from YAML
+        if hasattr(date_key, "year"):
+            # It's a datetime.date object
+            date_year = date_key.year
+        else:
+            # It's a string
+            date_year = int(date_key.split("-")[0])
+
+        if date_year <= year:
+            applicable_value = value
+        else:
+            break
+
+    if applicable_value is None:
+        raise ValueError(
+            f"No value found for {category}/{variable_name} in {year}"
+        )
+
+    return applicable_value
+
+
+def load_take_up_rate(variable_name: str, year: int = 2015) -> float:
+    """Load take-up rate from YAML parameter files.
+
+    Args:
+        variable_name: Name of the take-up parameter file (without .yaml)
+        year: Year for which to get the rate
+
+    Returns:
+        Take-up rate as a float between 0 and 1
+    """
+    return load_parameter("take_up", variable_name, year)
diff --git a/policyengine_uk_data/parameters/stochastic/first_time_buyer_rate.yaml b/policyengine_uk_data/parameters/stochastic/first_time_buyer_rate.yaml
@@ -0,0 +1,12 @@
+description: Percentage of residential property purchases that are by first-time buyers
+metadata:
+  unit: /1
+  label: First-time buyer rate
+  reference:
+    - title: ONS First-time buyer mortgage sales by local authority
+      href: https://www.ons.gov.uk/releases/firsttimebuyermortgagesalesbylocalauthorityuk2006to2023
+    - title: Uswitch First-Time Buyer Statistics 2024
+      href: https://www.uswitch.com/mortgages/first-time-buyer-statistics/
+values:
+  2013-01-01: 0.280  # ONS data
+  2023-01-01: 0.384  # 38.4% of property sales were first-time buyers
diff --git a/policyengine_uk_data/parameters/stochastic/tv_licence_evasion_rate.yaml b/policyengine_uk_data/parameters/stochastic/tv_licence_evasion_rate.yaml
@@ -0,0 +1,14 @@
+description: Percentage of TV-owning households that evade the TV licence fee
+metadata:
+  unit: /1
+  label: TV licence evasion rate
+  reference:
+    - title: TV Licensing annual evader statistics
+      href: https://www.tvlicensing.co.uk/about/media-centre/news/tv-licensing-publishes-annual-evader-statistics-NEWS31
+    - title: House of Commons Library - TV licence fee statistics
+      href: https://commonslibrary.parliament.uk/research-briefings/cbp-8101/
+values:
+  2015-01-01: 0.05  # Historical low point
+  2018-01-01: 0.0657  # Official BBC estimate
+  2022-01-01: 0.1058  # Significant increase
+  2024-01-01: 0.1252  # Current BBC estimate
diff --git a/policyengine_uk_data/parameters/stochastic/tv_ownership_rate.yaml b/policyengine_uk_data/parameters/stochastic/tv_ownership_rate.yaml
@@ -0,0 +1,10 @@
+description: Percentage of households that own a functioning colour TV
+metadata:
+  unit: /1
+  label: TV ownership rate
+  reference:
+    - title: Ofcom - 95% of UK homes had at least one TV set in 2020
+      href: https://www.statista.com/statistics/269969/number-of-tv-households-in-the-uk/
+values:
+  2015-01-01: 0.96
+  2020-01-01: 0.95
diff --git a/policyengine_uk_data/parameters/take_up/child_benefit.yaml b/policyengine_uk_data/parameters/take_up/child_benefit.yaml
@@ -0,0 +1,9 @@
+description: Share of eligible children that participate in Child Benefit
+metadata:
+  unit: /1
+  reference:
+    - title: "Child Benefit statistics: 2022 annual release"
+      href: https://www.gov.uk/government/statistics/child-benefit-statistics-annual-release-august-2022/child-benefit-statistics-annual-release-data-at-august-2022#:~:text=since%202012%20the%20take%2Dup,level%20in%202022%20of%2089%25.
+values:
+  2012-01-01: 0.97
+  2022-01-01: 0.89
diff --git a/policyengine_uk_data/parameters/take_up/child_benefit_opts_out_rate.yaml b/policyengine_uk_data/parameters/take_up/child_benefit_opts_out_rate.yaml
@@ -0,0 +1,9 @@
+description: Percentage of fully High Income Child Benefit Charge-liable families who opt out of Child Benefit.
+metadata:
+  unit: /1
+  label: Child Benefit HITC-liable opt-out rate
+  reference:
+    - title: "Child Benefit Statistics: Annual Release, August 2022"
+      href: https://www.gov.uk/government/statistics/child-benefit-statistics-annual-release-august-2022/child-benefit-statistics-annual-release-data-at-august-2022
+values:
+  2019-01-01: 0.23  # 3m families have ANI over £60k in the 2023 FRS, 683k families opt out of CB.