diff --git a/changelog_entry.yaml b/changelog_entry.yaml index e69de29bb..f7d4f4ecd 100644 --- a/changelog_entry.yaml +++ b/changelog_entry.yaml @@ -0,0 +1,4 @@ +- bump: minor + changes: + changed: + - Remove randomness from country package by moving stochastic variable generation to data package. Variables now read pre-computed values from datasets for deterministic, reproducible calculations. diff --git a/policyengine_uk/tests/test_deterministic_variables.py b/policyengine_uk/tests/test_deterministic_variables.py new file mode 100644 index 000000000..0d0889b91 --- /dev/null +++ b/policyengine_uk/tests/test_deterministic_variables.py @@ -0,0 +1,288 @@ +"""Tests for deterministic stochastic variables. + +These tests verify that variables which previously used random() now: +1. Use default values correctly in policy calculator mode (no dataset) +2. Can be set explicitly in situations +3. Produce deterministic results +""" + +import pytest +from policyengine_uk import Simulation + + +class TestDefaultValues: + """Test that stochastic variables have correct default values.""" + + def test_would_claim_child_benefit_defaults_true(self): + sim = Simulation( + situation={ + "people": {"person": {"age": {2024: 30}}}, + "benunits": {"benunit": {"members": ["person"]}}, + "households": {"household": {"members": ["person"]}}, + } + ) + result = sim.calculate("would_claim_child_benefit", 2024) + assert result[0] == True + + def test_would_claim_uc_defaults_true(self): + sim = Simulation( + situation={ + "people": {"person": {"age": {2024: 30}}}, + "benunits": {"benunit": {"members": ["person"]}}, + "households": {"household": {"members": ["person"]}}, + } + ) + result = sim.calculate("would_claim_uc", 2024) + assert result[0] == True + + def test_would_claim_pc_defaults_true(self): + sim = Simulation( + situation={ + "people": {"person": {"age": {2024: 70}}}, + "benunits": {"benunit": {"members": ["person"]}}, + "households": {"household": {"members": ["person"]}}, + } + ) + result = sim.calculate("would_claim_pc", 2024) + assert result[0] == True + + def test_household_owns_tv_defaults_true(self): + sim = Simulation( + situation={ + "people": {"person": {"age": {2024: 30}}}, + "benunits": {"benunit": {"members": ["person"]}}, + "households": {"household": {"members": ["person"]}}, + } + ) + result = sim.calculate("household_owns_tv", 2024) + assert result[0] == True + + def test_would_evade_tv_licence_fee_defaults_false(self): + sim = Simulation( + situation={ + "people": {"person": {"age": {2024: 30}}}, + "benunits": {"benunit": {"members": ["person"]}}, + "households": {"household": {"members": ["person"]}}, + } + ) + result = sim.calculate("would_evade_tv_licence_fee", 2024) + assert result[0] == False + + def test_is_disabled_for_benefits_defaults_false(self): + sim = Simulation( + situation={ + "people": {"person": {"age": {2024: 30}}}, + "benunits": {"benunit": {"members": ["person"]}}, + "households": {"household": {"members": ["person"]}}, + } + ) + result = sim.calculate("is_disabled_for_benefits", 2024) + assert result[0] == False + + def test_would_claim_marriage_allowance_defaults_true(self): + sim = Simulation( + situation={ + "people": {"person": {"age": {2024: 30}}}, + "benunits": {"benunit": {"members": ["person"]}}, + "households": {"household": {"members": ["person"]}}, + } + ) + result = sim.calculate("would_claim_marriage_allowance", 2024) + assert result[0] == True + + def test_child_benefit_opts_out_defaults_false(self): + sim = Simulation( + situation={ + "people": {"person": {"age": {2024: 30}}}, + "benunits": {"benunit": {"members": ["person"]}}, + "households": {"household": {"members": ["person"]}}, + } + ) + result = sim.calculate("child_benefit_opts_out", 2024) + assert result[0] == False + + +class TestExplicitOverrides: + """Test that stochastic variables can be set explicitly.""" + + def test_would_claim_child_benefit_can_be_set_false(self): + sim = Simulation( + situation={ + "people": {"person": {"age": {2024: 30}}}, + "benunits": { + "benunit": { + "members": ["person"], + "would_claim_child_benefit": {2024: False}, + } + }, + "households": {"household": {"members": ["person"]}}, + } + ) + result = sim.calculate("would_claim_child_benefit", 2024) + assert result[0] == False + + def test_would_claim_uc_can_be_set_false(self): + sim = Simulation( + situation={ + "people": {"person": {"age": {2024: 30}}}, + "benunits": { + "benunit": { + "members": ["person"], + "would_claim_uc": {2024: False}, + } + }, + "households": {"household": {"members": ["person"]}}, + } + ) + result = sim.calculate("would_claim_uc", 2024) + assert result[0] == False + + def test_household_owns_tv_can_be_set_false(self): + sim = Simulation( + situation={ + "people": {"person": {"age": {2024: 30}}}, + "benunits": {"benunit": {"members": ["person"]}}, + "households": { + "household": { + "members": ["person"], + "household_owns_tv": {2024: False}, + } + }, + } + ) + result = sim.calculate("household_owns_tv", 2024) + assert result[0] == False + + +class TestIsHigherEarner: + """Test deterministic tie-breaking for is_higher_earner.""" + + def test_higher_income_wins(self): + """Person with higher income should be the higher earner.""" + sim = Simulation( + situation={ + "people": { + "person1": { + "age": {2024: 30}, + "employment_income": {2024: 60000}, + }, + "person2": { + "age": {2024: 40}, + "employment_income": {2024: 50000}, + }, + }, + "benunits": {"benunit": {"members": ["person1", "person2"]}}, + "households": { + "household": {"members": ["person1", "person2"]} + }, + } + ) + result = sim.calculate("is_higher_earner", 2024) + # person1 has higher income, should be True + assert result[0] == True + # person2 has lower income, should be False + assert result[1] == False + + def test_same_income_older_wins(self): + """With same income, older person should be the higher earner.""" + sim = Simulation( + situation={ + "people": { + "person1": { + "age": {2024: 40}, + "employment_income": {2024: 50000}, + }, + "person2": { + "age": {2024: 30}, + "employment_income": {2024: 50000}, + }, + }, + "benunits": {"benunit": {"members": ["person1", "person2"]}}, + "households": { + "household": {"members": ["person1", "person2"]} + }, + } + ) + result = sim.calculate("is_higher_earner", 2024) + # person1 is older, should win the tie + assert result[0] == True + # person2 is younger, should lose the tie + assert result[1] == False + + def test_is_deterministic(self): + """Same inputs should always produce same outputs.""" + situation = { + "people": { + "person1": { + "age": {2024: 35}, + "employment_income": {2024: 50000}, + }, + "person2": { + "age": {2024: 35}, + "employment_income": {2024: 50000}, + }, + }, + "benunits": {"benunit": {"members": ["person1", "person2"]}}, + "households": {"household": {"members": ["person1", "person2"]}}, + } + + results = [] + for _ in range(3): + sim = Simulation(situation=situation) + results.append(tuple(sim.calculate("is_higher_earner", 2024))) + + # All results should be identical + assert results[0] == results[1] == results[2] + + +class TestDeterminism: + """Test that calculations are deterministic across runs.""" + + def test_child_benefit_is_deterministic(self): + """Child benefit calculation should be deterministic.""" + situation = { + "people": { + "adult": {"age": {2024: 30}}, + "child": {"age": {2024: 5}}, + }, + "benunits": { + "benunit": { + "members": ["adult", "child"], + "would_claim_child_benefit": {2024: True}, + } + }, + "households": {"household": {"members": ["adult", "child"]}}, + } + + results = [] + for _ in range(3): + sim = Simulation(situation=situation) + results.append(float(sim.calculate("child_benefit", 2024)[0])) + + assert results[0] == results[1] == results[2] + + def test_marriage_allowance_is_deterministic(self): + """Marriage allowance should be deterministic.""" + situation = { + "people": { + "person1": { + "age": {2024: 35}, + "marital_status": {2024: "MARRIED"}, + "employment_income": {2024: 20000}, + }, + "person2": { + "age": {2024: 35}, + "marital_status": {2024: "MARRIED"}, + "employment_income": {2024: 50000}, + }, + }, + "benunits": {"benunit": {"members": ["person1", "person2"]}}, + "households": {"household": {"members": ["person1", "person2"]}}, + } + + results = [] + for _ in range(3): + sim = Simulation(situation=situation) + results.append(tuple(sim.calculate("marriage_allowance", 2024))) + + assert results[0] == results[1] == results[2] diff --git a/policyengine_uk/variables/contrib/labour/attends_private_school.py b/policyengine_uk/variables/contrib/labour/attends_private_school.py index 88971d517..2301ed41f 100644 --- a/policyengine_uk/variables/contrib/labour/attends_private_school.py +++ b/policyengine_uk/variables/contrib/labour/attends_private_school.py @@ -13,6 +13,21 @@ def interpolate_percentile(param, percentile): return v1 + (v2 - v1) * (percentile - p1) / (p2 - p1) +class attends_private_school_random_draw(Variable): + label = "Private school attendance random draw" + documentation = ( + "Random draw for determining private school attendance. " + "Generated stochastically in the dataset." + ) + entity = Person + definition_period = YEAR + value_type = float + + # No formula - when in dataset, OpenFisca uses dataset value automatically + # For policy calculator (non-dataset), defaults to 0.5 + default_value = 0.5 + + class attends_private_school(Variable): label = "attends private school" entity = Person @@ -76,6 +91,8 @@ def formula(person, period, parameters): * is_child ) - value = random(person) < p_attends_private_school + # Use pre-generated random draw from dataset instead of calling random() + random_draw = person("attends_private_school_random_draw", period) + value = random_draw < p_attends_private_school return value diff --git a/policyengine_uk/variables/gov/dcms/bbc/tv_licence/would_evade_tv_licence_fee.py b/policyengine_uk/variables/gov/dcms/bbc/tv_licence/would_evade_tv_licence_fee.py index fdf426e12..61be08139 100644 --- a/policyengine_uk/variables/gov/dcms/bbc/tv_licence/would_evade_tv_licence_fee.py +++ b/policyengine_uk/variables/gov/dcms/bbc/tv_licence/would_evade_tv_licence_fee.py @@ -4,12 +4,13 @@ class would_evade_tv_licence_fee(Variable): label = "Would evade TV licence fee" documentation = ( - "Whether this household would unlawfully evade the TV licence fee." + "Whether this household would unlawfully evade the TV licence fee. " + "Generated stochastically in the dataset using evasion rates." ) entity = Household definition_period = YEAR value_type = bool - def formula(household, period, parameters): - evasion_rate = parameters(period).gov.dcms.bbc.tv_licence.evasion_rate - return random(household) <= evasion_rate + # No formula - when in dataset, OpenFisca uses dataset value automatically + # For policy calculator (non-dataset), defaults to False + default_value = False diff --git a/policyengine_uk/variables/gov/dwp/pension_credit/would_claim.py b/policyengine_uk/variables/gov/dwp/pension_credit/would_claim.py index 48f9b020d..2c043b038 100644 --- a/policyengine_uk/variables/gov/dwp/pension_credit/would_claim.py +++ b/policyengine_uk/variables/gov/dwp/pension_credit/would_claim.py @@ -3,27 +3,14 @@ class would_claim_pc(Variable): label = "Would claim Pension Credit" + documentation = ( + "Whether this benefit unit would claim Pension Credit if eligible. " + "Generated stochastically in the dataset using take-up rates." + ) entity = BenUnit definition_period = YEAR value_type = bool - def formula(benunit, period, parameters): - reported_pc = add(benunit, period, ["pension_credit_reported"]) > 0 - claims_all_entitled_benefits = benunit( - "claims_all_entitled_benefits", period - ) - baseline = benunit("baseline_pension_credit_entitlement", period) > 0 - eligible = benunit("pension_credit_entitlement", period) > 0 - takeup_rate = parameters(period).gov.dwp.pension_credit.takeup - return select( - [ - reported_pc | claims_all_entitled_benefits, - ~baseline & eligible, - True, - ], - [ - True, - random(benunit) < takeup_rate, - False, - ], - ) + # No formula - when in dataset, OpenFisca uses dataset value automatically + # For policy calculator (non-dataset), defaults to True + default_value = True diff --git a/policyengine_uk/variables/gov/dwp/universal_credit/would_claim_uc.py b/policyengine_uk/variables/gov/dwp/universal_credit/would_claim_uc.py index 137d23b39..de0beecfe 100644 --- a/policyengine_uk/variables/gov/dwp/universal_credit/would_claim_uc.py +++ b/policyengine_uk/variables/gov/dwp/universal_credit/would_claim_uc.py @@ -6,16 +6,11 @@ class would_claim_uc(Variable): entity = BenUnit label = "Would claim Universal Credit" documentation = ( - "Whether this family would claim the Universal Credit if eligible" + "Whether this family would claim the Universal Credit if eligible. " + "Generated stochastically in the dataset using take-up rates." ) definition_period = YEAR - def formula(benunit, period, parameters): - takes_up = ( - random(benunit) - < parameters(period).gov.dwp.universal_credit.takeup_rate - ) - is_in_microsimulation = benunit.simulation.dataset is not None - if is_in_microsimulation: - return takes_up - return True + # No formula - when in dataset, OpenFisca uses dataset value automatically + # For policy calculator (non-dataset), defaults to True + default_value = True diff --git a/policyengine_uk/variables/gov/hmrc/child_benefit_opts_out.py b/policyengine_uk/variables/gov/hmrc/child_benefit_opts_out.py index a7526c47b..3de1c91c1 100644 --- a/policyengine_uk/variables/gov/hmrc/child_benefit_opts_out.py +++ b/policyengine_uk/variables/gov/hmrc/child_benefit_opts_out.py @@ -4,24 +4,14 @@ class child_benefit_opts_out(Variable): label = "opts out of Child Benefit" documentation = ( - "Whether this family would opt out of receiving Child Benefit payments" + "Whether this family would opt out of receiving Child Benefit " + "payments. Generated stochastically in the dataset using opt-out " + "rates." ) entity = BenUnit definition_period = YEAR value_type = bool - def formula(benunit, period, parameters): - if benunit.simulation.dataset is not None: - ani = benunit.members("adjusted_net_income", period) - hmrc = parameters(period).gov.hmrc - cb_hitc = hmrc.income_tax.charges.CB_HITC - cb = hmrc.child_benefit - in_phase_out = ani > cb_hitc.phase_out_end - return where( - benunit.any(in_phase_out), - random(benunit) < cb.opt_out_rate, - False, - ) - else: - # If we're not in a microsimulation, assume the family would not opt out - return False + # No formula - when in dataset, OpenFisca uses dataset value automatically + # For policy calculator (non-dataset), defaults to False + default_value = False diff --git a/policyengine_uk/variables/gov/hmrc/income_tax/allowances/marriage_allowance.py b/policyengine_uk/variables/gov/hmrc/income_tax/allowances/marriage_allowance.py index 642bc7fa6..3408b320a 100644 --- a/policyengine_uk/variables/gov/hmrc/income_tax/allowances/marriage_allowance.py +++ b/policyengine_uk/variables/gov/hmrc/income_tax/allowances/marriage_allowance.py @@ -1,6 +1,21 @@ from policyengine_uk.model_api import * +class would_claim_marriage_allowance(Variable): + label = "Would claim Marriage Allowance" + documentation = ( + "Whether this person would claim Marriage Allowance if eligible. " + "Generated stochastically in the dataset using take-up rates." + ) + entity = Person + definition_period = YEAR + value_type = bool + + # No formula - when in dataset, OpenFisca uses dataset value automatically + # For policy calculator (non-dataset), defaults to True + default_value = True + + class marriage_allowance(Variable): value_type = float entity = Person @@ -19,7 +34,6 @@ def formula(person, period, parameters): "partners_unused_personal_allowance", period ) allowances = parameters(period).gov.hmrc.income_tax.allowances - takeup_rate = allowances.marriage_allowance.takeup_rate capped_percentage = allowances.marriage_allowance.max max_amount = allowances.personal_allowance.amount * capped_percentage amount_if_eligible_pre_rounding = min_(transferable_amount, max_amount) @@ -29,4 +43,6 @@ def formula(person, period, parameters): np.ceil(amount_if_eligible_pre_rounding / rounding_increment) * rounding_increment ) - return eligible * amount_if_eligible * (random(person) < takeup_rate) + # Use pre-generated take-up decision from dataset + would_claim = person("would_claim_marriage_allowance", period) + return eligible * amount_if_eligible * would_claim diff --git a/policyengine_uk/variables/gov/hmrc/would_claim_child_benefit.py b/policyengine_uk/variables/gov/hmrc/would_claim_child_benefit.py index 08403d7bd..9a84f521a 100644 --- a/policyengine_uk/variables/gov/hmrc/would_claim_child_benefit.py +++ b/policyengine_uk/variables/gov/hmrc/would_claim_child_benefit.py @@ -4,19 +4,13 @@ class would_claim_child_benefit(Variable): label = "Would claim Child Benefit" documentation = ( - "Whether this benefit unit would claim Child Benefit if eligible" + "Whether this benefit unit would claim Child Benefit if eligible. " + "Generated stochastically in the dataset using take-up rates." ) entity = BenUnit definition_period = YEAR value_type = bool - def formula(benunit, period, parameters): - claims_all_entitled_benefits = benunit( - "claims_all_entitled_benefits", period - ) - takeup_rate = parameters(period).gov.hmrc.child_benefit.takeup - overall_p = takeup_rate.overall - random_takeup = (random(benunit) < overall_p) & ~benunit( - "child_benefit_opts_out", period - ) - return claims_all_entitled_benefits | random_takeup + # No formula - when in dataset, OpenFisca uses dataset value automatically + # For policy calculator (non-dataset), defaults to True + default_value = True diff --git a/policyengine_uk/variables/household/consumption/main_residential_property_purchased_is_first_home.py b/policyengine_uk/variables/household/consumption/main_residential_property_purchased_is_first_home.py index b49c5b4fe..1e67130f5 100644 --- a/policyengine_uk/variables/household/consumption/main_residential_property_purchased_is_first_home.py +++ b/policyengine_uk/variables/household/consumption/main_residential_property_purchased_is_first_home.py @@ -3,22 +3,15 @@ class main_residential_property_purchased_is_first_home(Variable): label = "Residential property bought is first home" - documentation = "Whether the residential property bought this year as a main residence was as a first-time buyer." + documentation = ( + "Whether the residential property bought this year as a main residence " + "was as a first-time buyer. Generated stochastically in the dataset " + "using first-time buyer rates by age." + ) entity = Household definition_period = YEAR value_type = bool - unit = GBP - def formula(household, period, parameters): - residential_sd = parameters( - period - ).gov.hmrc.stamp_duty.statistics.residential.household - age = household.sum( - household.members("is_household_head", period) - * household.members("age", period) - ) - percentage_claiming_ftbr = ( - residential_sd.first_time_buyers_relief.calc(age) - / residential_sd.transactions_by_age.calc(age) - ) - return random(household) < percentage_claiming_ftbr + # No formula - when in dataset, OpenFisca uses dataset value automatically + # For policy calculator (non-dataset), defaults to False + default_value = False diff --git a/policyengine_uk/variables/household/demographic/household_owns_tv.py b/policyengine_uk/variables/household/demographic/household_owns_tv.py index 6d22f2344..447c4a857 100644 --- a/policyengine_uk/variables/household/demographic/household_owns_tv.py +++ b/policyengine_uk/variables/household/demographic/household_owns_tv.py @@ -3,13 +3,14 @@ class household_owns_tv(Variable): label = "Owns a TV" - documentation = "Whether this household owns a functioning colour TV." + documentation = ( + "Whether this household owns a functioning colour TV. " + "Generated stochastically in the dataset using TV ownership rates." + ) entity = Household definition_period = YEAR value_type = bool - def formula(household, period, parameters): - percent_owning_tv = parameters( - period - ).gov.dcms.bbc.tv_licence.tv_ownership - return random(household) <= percent_owning_tv + # No formula - when in dataset, OpenFisca uses dataset value automatically + # For policy calculator (non-dataset), defaults to True + default_value = True diff --git a/policyengine_uk/variables/household/demographic/is_disabled_for_benefits.py b/policyengine_uk/variables/household/demographic/is_disabled_for_benefits.py index 0fdbb51a1..d5b4a7786 100644 --- a/policyengine_uk/variables/household/demographic/is_disabled_for_benefits.py +++ b/policyengine_uk/variables/household/demographic/is_disabled_for_benefits.py @@ -5,25 +5,13 @@ class is_disabled_for_benefits(Variable): value_type = bool entity = Person label = "Has a disability" - documentation = "Whether this person is disabled for benefits purposes" + documentation = ( + "Whether this person is disabled for benefits purposes. " + "In dataset mode, determined by reported DLA/PIP claims." + ) definition_period = YEAR reference = "Child Tax Credit Regulations 2002 s. 8" - def formula(person, period, parameters): - QUALIFYING_BENEFITS = [ - "dla", - "pip", - ] - - p_claims_lcwra_if_on_pip_dla = 0.8 - p_claims_lcwra_if_not_on_pip_dla = 0.13 - - random_seed = random(person) - - on_qual_benefits = add(person, period, QUALIFYING_BENEFITS) > 0 - - return np.where( - on_qual_benefits, - random_seed < p_claims_lcwra_if_on_pip_dla, - random_seed < p_claims_lcwra_if_not_on_pip_dla, - ) + # No formula - when in dataset, OpenFisca uses dataset value automatically + # For policy calculator (non-dataset), defaults to False + default_value = False diff --git a/policyengine_uk/variables/household/demographic/is_higher_earner.py b/policyengine_uk/variables/household/demographic/is_higher_earner.py index 353b6fad8..43f3cc25f 100644 --- a/policyengine_uk/variables/household/demographic/is_higher_earner.py +++ b/policyengine_uk/variables/household/demographic/is_higher_earner.py @@ -9,8 +9,8 @@ class is_higher_earner(Variable): def formula(person, period, parameters): income = person("adjusted_net_income", period) - # Add noise to incomes in order to avoid ties - return ( - person.get_rank(person.benunit, -income + random(person) * 1e-2) - == 0 - ) + # Use age as deterministic tie-breaker (older person wins ties) + age = person("age", period) + # Scale age to be tiny relative to income differences + tie_breaker = age * 1e-6 + return person.get_rank(person.benunit, -income - tie_breaker) == 0