diff --git a/soepy/simulate/initial_states.py b/soepy/simulate/initial_states.py index 305f17e..066ee4b 100644 --- a/soepy/simulate/initial_states.py +++ b/soepy/simulate/initial_states.py @@ -1,123 +1,58 @@ import numpy as np import pandas as pd -from soepy.shared.experience_stock import exp_years_to_stock -from soepy.shared.experience_stock import get_pt_increment -from soepy.shared.experience_stock import stock_to_exp_years -from soepy.shared.numerical_integration import draw_zero_one_distributed_shocks - - -def prepare_simulation_data( - model_params, - model_spec, - prob_educ_level, - prob_child_age, - prob_partner_present, - prob_exp_pt, - prob_exp_ft, - biased_exp, -): - """Draw initial conditions and precompute grid objects for simulation.""" - - initial_educ_level = np.random.choice( - model_spec.num_educ_levels, model_spec.num_agents_sim, p=prob_educ_level - ) - - initial_period = np.asarray(model_spec.educ_years)[initial_educ_level] - - initial_child_age = np.full(model_spec.num_agents_sim, np.nan) - initial_partner = np.full(model_spec.num_agents_sim, np.nan) - initial_exp_pt = np.full(model_spec.num_agents_sim, np.nan) - initial_exp_ft = np.full(model_spec.num_agents_sim, np.nan) - - for educ_level in range(model_spec.num_educ_levels): - mask = initial_educ_level == educ_level - - initial_child_age[mask] = np.random.choice( - list(range(-1, model_spec.child_age_init_max + 1)), - mask.sum(), - p=prob_child_age[educ_level], - ) - - initial_partner[mask] = np.random.binomial( - size=mask.sum(), - n=1, - p=prob_partner_present[educ_level], - ) - initial_exp_pt[mask] = np.random.choice( - list(range(0, model_spec.init_exp_max + 1)), - mask.sum(), - p=prob_exp_pt[educ_level], - ) - - initial_exp_ft[mask] = np.random.choice( - list(range(0, model_spec.init_exp_max + 1)), - mask.sum(), - p=prob_exp_ft[educ_level], - ) - - # Combine stocks - pt_increment = get_pt_increment( - model_params=model_params, - educ_level=initial_educ_level, - child_age=initial_child_age, - biased_exp=biased_exp, - ) - - total_years = initial_exp_pt * pt_increment + initial_exp_ft - - initial_exp_stock = exp_years_to_stock( - exp_years=total_years, - period=0, - init_exp_max=model_spec.init_exp_max, - pt_increment=pt_increment, - ) - - lagged_choice = lagged_choice_initial(initial_exp_years=total_years) - - unobserved_type = np.random.choice( - np.arange(model_spec.num_types), - model_spec.num_agents_sim, - p=model_params.type_shares, - ) - - draws_sim = draw_zero_one_distributed_shocks( - model_spec.seed_sim, model_spec.num_periods, model_spec.num_agents_sim - ) - draws_sim = draws_sim * float(model_params.shock_sd) - - # prob_exp_pt/prob_exp_ft are passed in from simulate.py and originate from the - # same legacy share files as prob_exp_years. - initial_states = pd.DataFrame( - { - "Identifier": np.arange(model_spec.num_agents_sim, dtype=int), - "Period": initial_period.astype(int), - "Education_Level": initial_educ_level.astype(int), - "Lagged_Choice": lagged_choice.astype(int), - "Experience_Part_Time": initial_exp_pt.astype(int), - "Experience_Full_Time": initial_exp_ft.astype(int), - "Experience_Stock": initial_exp_stock.astype(float), - "Type": unobserved_type.astype(int), - "Age_Youngest_Child": initial_child_age.astype(int), - "Partner_Indicator": initial_partner.astype(int), - } - ) - - return initial_states, draws_sim - - -def lagged_choice_initial(initial_exp_years): - """Determine initial lagged choice from total experience. - - Rule (per project decision): - - lagged_choice = 2 (full-time) if initial experience years > 1 - - otherwise lagged_choice = 0 - """ - - lagged_choice = np.zeros_like(initial_exp_years, dtype=int) - lagged_choice[initial_exp_years > 1] = 2 - # Check if initial years is a float if so, assign 1 part-time - int_exp = initial_exp_years.astype(int) - is_float = np.abs(initial_exp_years - int_exp) > 1e-8 - lagged_choice[is_float] = 1 - return lagged_choice +from soepy.shared.constants_and_indices import NUM_CHOICES + + +def validate_initial_states(initial_states, model_spec): + required_columns = [ + "Identifier", + "Period", + "Education_Level", + "Lagged_Choice", + "Experience_Part_Time", + "Experience_Full_Time", + "Type", + "Age_Youngest_Child", + "Partner_Indicator", + ] + missing = [col for col in required_columns if col not in initial_states.columns] + if missing: + raise ValueError(f"Initial states missing columns: {missing}") + + if initial_states[required_columns].isna().any().any(): + raise ValueError("Initial states contain missing values.") + + identifiers = initial_states["Identifier"].to_numpy() + unique_ids = np.unique(identifiers) + if len(unique_ids) != len(identifiers): + raise ValueError("Initial state identifiers must be unique.") + + if not np.issubdtype(unique_ids.dtype, np.integer): + raise ValueError("Initial state identifiers must be integers.") + + expected_ids = np.arange(len(unique_ids), dtype=unique_ids.dtype) + if not np.array_equal(np.sort(unique_ids), expected_ids): + raise ValueError("Initial state identifiers must be consecutive from 0.") + + # if (initial_states["Period"] < 0).any() or ( + # initial_states["Period"] >= model_spec.num_periods + # ).any(): + # raise ValueError("Initial state periods out of bounds.") + + if (initial_states["Education_Level"] < 0).any() or ( + initial_states["Education_Level"] >= model_spec.num_educ_levels + ).any(): + raise ValueError("Initial state education levels out of bounds.") + + if (initial_states["Type"] < 0).any() or ( + initial_states["Type"] >= model_spec.num_types + ).any(): + raise ValueError("Initial state types out of bounds.") + + if (initial_states["Lagged_Choice"] < 0).any() or ( + initial_states["Lagged_Choice"] >= NUM_CHOICES + ).any(): + raise ValueError("Initial state lagged choices out of bounds.") + + return initial_states diff --git a/soepy/simulate/simulate_auxiliary.py b/soepy/simulate/simulate_auxiliary.py index 3ddada2..9bb6db4 100644 --- a/soepy/simulate/simulate_auxiliary.py +++ b/soepy/simulate/simulate_auxiliary.py @@ -3,10 +3,12 @@ from soepy.shared.constants_and_indices import HOURS from soepy.shared.constants_and_indices import NUM_CHOICES +from soepy.shared.experience_stock import exp_years_to_stock from soepy.shared.experience_stock import get_pt_increment from soepy.shared.experience_stock import next_stock from soepy.shared.non_employment import calc_erziehungsgeld from soepy.shared.non_employment import calculate_non_employment_consumption_resources +from soepy.shared.numerical_integration import draw_zero_one_distributed_shocks from soepy.shared.wages import calculate_log_wage from soepy.simulate.constants_sim import DATA_FORMATS_SIM from soepy.simulate.constants_sim import DATA_FORMATS_SPARSE @@ -14,7 +16,7 @@ from soepy.simulate.constants_sim import LABELS_DATA_SPARSE from soepy.simulate.constants_sim import STATE_LABELS_SIM from soepy.simulate.income_sim import calculate_employment_consumption_resources -from soepy.simulate.initial_states import prepare_simulation_data +from soepy.simulate.initial_states import validate_initial_states def pyth_simulate( @@ -26,33 +28,47 @@ def pyth_simulate( covariates, non_consumption_utilities, child_age_update_rule, - prob_educ_level, - prob_child_age, - prob_partner_present, - prob_exp_pt, - prob_exp_ft, prob_child, prob_partner, + *, biased_exp, + initial_states, data_sparse=False, ): """Simulate agent histories under the continuous-experience model.""" np.random.seed(model_spec.seed_sim) + initial_states = validate_initial_states(initial_states, model_spec) + num_agents_sim = initial_states["Identifier"].nunique() + emaxs = np.asarray(emaxs) non_consumption_utilities = np.asarray(non_consumption_utilities) - initial_states, draws_sim = prepare_simulation_data( + pt_increment = get_pt_increment( model_params=model_params, - model_spec=model_spec, - prob_educ_level=prob_educ_level, - prob_child_age=prob_child_age, - prob_partner_present=prob_partner_present, - prob_exp_pt=prob_exp_pt, - prob_exp_ft=prob_exp_ft, - biased_exp=biased_exp, + educ_level=initial_states["Education_Level"].to_numpy(), + child_age=initial_states["Age_Youngest_Child"].to_numpy(), + biased_exp=False, + ) + total_years = ( + initial_states["Experience_Part_Time"].to_numpy() * pt_increment + + initial_states["Experience_Full_Time"].to_numpy() + ) + initial_states = initial_states.copy() + initial_states["Experience_Stock"] = exp_years_to_stock( + exp_years=total_years, + period=initial_states["Period"].to_numpy(), + init_exp_max=model_spec.init_exp_max, + pt_increment=pt_increment, + ).astype(float) + + draws_sim = draw_zero_one_distributed_shocks( + model_spec.seed_sim, + model_spec.num_periods, + num_agents_sim, ) + draws_sim = draws_sim * float(model_params.shock_sd) data_list = simulate_agents_over_periods( model_spec=model_spec, @@ -161,7 +177,8 @@ def simulate_agents_over_periods( ) ) - wages = np.exp(log_wage_agents + draws_sim[period, : len(current_states)]) + identifiers = current_states.iloc[:, state_col["Identifier"]].to_numpy() + wages = np.exp(log_wage_agents + draws_sim[period, identifiers]) wages = wages * float(model_spec.elasticity_scale) female_income = wages[:, None] * HOURS[None, 1:] diff --git a/soepy/simulate/simulate_python.py b/soepy/simulate/simulate_python.py index d2b6480..fd6c284 100644 --- a/soepy/simulate/simulate_python.py +++ b/soepy/simulate/simulate_python.py @@ -1,13 +1,10 @@ from functools import partial -from soepy.exogenous_processes.children import gen_prob_child_init_age_vector from soepy.exogenous_processes.children import gen_prob_child_vector -from soepy.exogenous_processes.education import gen_prob_educ_level_vector -from soepy.exogenous_processes.experience import gen_prob_init_exp_component_vector from soepy.exogenous_processes.partner import gen_prob_partner -from soepy.exogenous_processes.partner import gen_prob_partner_present_vector from soepy.pre_processing.model_processing import read_model_params_init from soepy.pre_processing.model_processing import read_model_spec_init +from soepy.simulate.initial_states import validate_initial_states from soepy.simulate.simulate_auxiliary import pyth_simulate from soepy.solve.create_state_space import create_state_space_objects from soepy.solve.solve_python import get_solve_function @@ -16,6 +13,8 @@ def simulate( model_params_init_file_name, model_spec_init_file_name, + *, + initial_states, biased_exp=True, data_sparse=False, ): @@ -28,7 +27,11 @@ def simulate( data_sparse=data_sparse, ) - return simulate_func(model_params_init_file_name, model_spec_init_file_name) + return simulate_func( + model_params_init_file_name, + model_spec_init_file_name, + initial_states=initial_states, + ) def get_simulate_func( @@ -42,19 +45,6 @@ def get_simulate_func( model_params_df, model_params = read_model_params_init(model_params_init_file_name) model_spec = read_model_spec_init(model_spec_init_file_name, model_params_df) - prob_educ_level = gen_prob_educ_level_vector(model_spec) - prob_child_age = gen_prob_child_init_age_vector(model_spec) - prob_partner_present = gen_prob_partner_present_vector(model_spec) - - prob_exp_pt = gen_prob_init_exp_component_vector( - model_spec, - model_spec.pt_exp_shares_file_name, - ) - prob_exp_ft = gen_prob_init_exp_component_vector( - model_spec, - model_spec.ft_exp_shares_file_name, - ) - prob_child = gen_prob_child_vector(model_spec) prob_partner = gen_prob_partner(model_spec) @@ -77,7 +67,10 @@ def get_simulate_func( ) def simulate_func( - model_params_init_file_name_inner, model_spec_init_file_name_inner + model_params_init_file_name_inner, + model_spec_init_file_name_inner, + *, + initial_states, ): model_params_df_inner, model_params_inner = read_model_params_init( model_params_init_file_name_inner @@ -89,6 +82,9 @@ def simulate_func( non_consumption_utilities, emaxs = solve_func(model_params_inner) + initial_states_validated = validate_initial_states( + initial_states, model_spec_inner + ) df = pyth_simulate( model_params=model_params_inner, model_spec=model_spec_inner, @@ -98,14 +94,10 @@ def simulate_func( covariates=covariates, non_consumption_utilities=non_consumption_utilities, child_age_update_rule=child_age_update_rule, - prob_educ_level=prob_educ_level, - prob_child_age=prob_child_age, - prob_partner_present=prob_partner_present, - prob_exp_pt=prob_exp_pt, - prob_exp_ft=prob_exp_ft, prob_child=prob_child, prob_partner=prob_partner, biased_exp=False, + initial_states=initial_states_validated, data_sparse=data_sparse, ).set_index(["Identifier", "Period"]) @@ -120,17 +112,13 @@ def partiable_simulate( indexer, covariates, child_age_update_rule, - prob_educ_level, - prob_child_age, - prob_partner_present, - prob_exp_years, - prob_exp_pt, - prob_exp_ft, prob_child, prob_partner, data_sparse, model_params_init_file_name, model_spec_init_file_name, + *, + initial_states, ): # Read in model specification from yaml file model_params_df, model_params = read_model_params_init(model_params_init_file_name) @@ -141,6 +129,7 @@ def partiable_simulate( non_consumption_utilities, emaxs = solve_func(model_params) # Simulate agents experiences according to parameters in the model specification + initial_states_validated = validate_initial_states(initial_states, model_spec) df = pyth_simulate( model_params, model_spec, @@ -150,14 +139,10 @@ def partiable_simulate( covariates, non_consumption_utilities, child_age_update_rule, - prob_educ_level, - prob_child_age, - prob_partner_present=prob_partner_present, - prob_exp_pt=prob_exp_pt, - prob_exp_ft=prob_exp_ft, prob_child=prob_child, prob_partner=prob_partner, biased_exp=False, + initial_states=initial_states_validated, data_sparse=data_sparse, ).set_index(["Identifier", "Period"]) diff --git a/soepy/test/resources/initial_states.py b/soepy/test/resources/initial_states.py new file mode 100644 index 0000000..42d58ae --- /dev/null +++ b/soepy/test/resources/initial_states.py @@ -0,0 +1,133 @@ +import numpy as np +import pandas as pd + +from soepy.exogenous_processes.children import gen_prob_child_init_age_vector +from soepy.exogenous_processes.education import gen_prob_educ_level_vector +from soepy.exogenous_processes.experience import gen_prob_init_exp_component_vector +from soepy.exogenous_processes.partner import gen_prob_partner_present_vector +from soepy.pre_processing.model_processing import read_model_params_init +from soepy.pre_processing.model_processing import read_model_spec_init +from soepy.shared.experience_stock import get_pt_increment + + +def _lagged_choice_initial(initial_exp_years): + lagged_choice = np.zeros_like(initial_exp_years, dtype=int) + lagged_choice[initial_exp_years > 1] = 2 + int_exp = initial_exp_years.astype(int) + is_float = np.abs(initial_exp_years - int_exp) > 1e-8 + lagged_choice[is_float] = 1 + return lagged_choice + + +def create_initial_states_from_probs( + model_params, + model_spec, + *, + prob_educ_level, + prob_child_age, + prob_partner_present, + prob_exp_pt, + prob_exp_ft, +): + np.random.seed(model_spec.seed_sim) + + initial_educ_level = np.random.choice( + model_spec.num_educ_levels, + model_spec.num_agents_sim, + p=prob_educ_level, + ) + + initial_period = np.asarray(model_spec.educ_years)[initial_educ_level] + + initial_child_age = np.full(model_spec.num_agents_sim, np.nan) + initial_partner = np.full(model_spec.num_agents_sim, np.nan) + initial_exp_pt = np.full(model_spec.num_agents_sim, np.nan) + initial_exp_ft = np.full(model_spec.num_agents_sim, np.nan) + + for educ_level in range(model_spec.num_educ_levels): + mask = initial_educ_level == educ_level + + initial_child_age[mask] = np.random.choice( + list(range(-1, model_spec.child_age_init_max + 1)), + mask.sum(), + p=prob_child_age[educ_level], + ) + + initial_partner[mask] = np.random.binomial( + size=mask.sum(), + n=1, + p=prob_partner_present[educ_level], + ) + initial_exp_pt[mask] = np.random.choice( + list(range(0, model_spec.init_exp_max + 1)), + mask.sum(), + p=prob_exp_pt[educ_level], + ) + + initial_exp_ft[mask] = np.random.choice( + list(range(0, model_spec.init_exp_max + 1)), + mask.sum(), + p=prob_exp_ft[educ_level], + ) + + pt_increment = get_pt_increment( + model_params=model_params, + educ_level=initial_educ_level, + child_age=initial_child_age, + biased_exp=False, + ) + total_years = initial_exp_pt * pt_increment + initial_exp_ft + lagged_choice = _lagged_choice_initial(total_years) + + unobserved_type = np.random.choice( + np.arange(model_spec.num_types), + model_spec.num_agents_sim, + p=model_params.type_shares, + ) + + initial_states = pd.DataFrame( + { + "Identifier": np.arange(model_spec.num_agents_sim, dtype=int), + "Period": initial_period.astype(int), + "Education_Level": initial_educ_level.astype(int), + "Lagged_Choice": lagged_choice.astype(int), + "Experience_Part_Time": initial_exp_pt.astype(int), + "Experience_Full_Time": initial_exp_ft.astype(int), + "Type": unobserved_type.astype(int), + "Age_Youngest_Child": initial_child_age.astype(int), + "Partner_Indicator": initial_partner.astype(int), + } + ) + + return initial_states + + +def create_initial_states( + *, + model_params_init_file_name, + model_spec_init_file_name, +): + model_params_df, model_params = read_model_params_init(model_params_init_file_name) + model_spec = read_model_spec_init(model_spec_init_file_name, model_params_df) + + prob_educ_level = gen_prob_educ_level_vector(model_spec) + prob_child_age = gen_prob_child_init_age_vector(model_spec) + prob_partner_present = gen_prob_partner_present_vector(model_spec) + prob_exp_pt = gen_prob_init_exp_component_vector( + model_spec, + model_spec.pt_exp_shares_file_name, + ) + prob_exp_ft = gen_prob_init_exp_component_vector( + model_spec, + model_spec.ft_exp_shares_file_name, + ) + + return create_initial_states_from_probs( + model_params=model_params, + model_spec=model_spec, + prob_educ_level=prob_educ_level, + prob_child_age=prob_child_age, + prob_partner_present=prob_partner_present, + prob_exp_pt=prob_exp_pt, + prob_exp_ft=prob_exp_ft, + ) diff --git a/soepy/test/resources/regression_vault.soepy.pkl b/soepy/test/resources/regression_vault.soepy.pkl index 0988d0f..e4ded74 100644 Binary files a/soepy/test/resources/regression_vault.soepy.pkl and b/soepy/test/resources/regression_vault.soepy.pkl differ diff --git a/soepy/test/resources/update_vault.py b/soepy/test/resources/update_vault.py index 6e6db2e..1c86cdb 100644 --- a/soepy/test/resources/update_vault.py +++ b/soepy/test/resources/update_vault.py @@ -2,9 +2,16 @@ import jax.numpy as jnp +from soepy.exogenous_processes.children import gen_prob_child_init_age_vector +from soepy.exogenous_processes.education import gen_prob_educ_level_vector +from soepy.exogenous_processes.experience import gen_prob_init_exp_component_vector +from soepy.exogenous_processes.partner import gen_prob_partner_present_vector +from soepy.pre_processing.model_processing import read_model_params_init +from soepy.pre_processing.model_processing import read_model_spec_init from soepy.simulate.simulate_python import simulate from soepy.soepy_config import TEST_RESOURCES_DIR from soepy.test.resources.aux_funcs import cleanup +from soepy.test.resources.initial_states import create_initial_states_from_probs def update_sim_objectes(): @@ -44,9 +51,39 @@ def update_sim_objectes(): # Sort index after modifications random_model_params_df = random_model_params_df.sort_index() - calculated_df_sim = simulate(random_model_params_df, model_spec_init_dict) + model_params_df, model_params = read_model_params_init(random_model_params_df) + model_spec = read_model_spec_init(model_spec_init_dict, model_params_df) + + prob_educ_level = gen_prob_educ_level_vector(model_spec) + prob_child_age = gen_prob_child_init_age_vector(model_spec) + prob_partner_present = gen_prob_partner_present_vector(model_spec) + prob_exp_pt = gen_prob_init_exp_component_vector( + model_spec, model_spec.pt_exp_shares_file_name + ) + prob_exp_ft = gen_prob_init_exp_component_vector( + model_spec, model_spec.ft_exp_shares_file_name + ) + + initial_states = create_initial_states_from_probs( + model_params=model_params, + model_spec=model_spec, + prob_educ_level=prob_educ_level, + prob_child_age=prob_child_age, + prob_partner_present=prob_partner_present, + prob_exp_pt=prob_exp_pt, + prob_exp_ft=prob_exp_ft, + ) + + calculated_df_sim = simulate( + random_model_params_df, + model_spec_init_dict, + initial_states=initial_states, + ) unbiased_calc_df = simulate( - random_model_params_df, model_spec_init_dict, biased_exp=False + random_model_params_df, + model_spec_init_dict, + initial_states=initial_states, + biased_exp=False, ) vault[i] = ( diff --git a/soepy/test/test_childless.py b/soepy/test/test_childless.py index 4031df3..e60a8af 100644 --- a/soepy/test/test_childless.py +++ b/soepy/test/test_childless.py @@ -17,6 +17,7 @@ from soepy.soepy_config import TEST_RESOURCES_DIR from soepy.solve.create_state_space import create_state_space_objects from soepy.solve.solve_python import pyth_solve +from soepy.test.resources.initial_states import create_initial_states_from_probs @pytest.fixture(scope="module") @@ -132,6 +133,16 @@ def input_data(): ) # Simulate + initial_states = create_initial_states_from_probs( + model_params=model_params, + model_spec=model_spec, + prob_educ_level=prob_educ_level, + prob_child_age=prob_child_age, + prob_partner_present=prob_partner_present, + prob_exp_pt=prob_exp_pt, + prob_exp_ft=prob_exp_ft, + ) + calculated_df = pyth_simulate( model_params=model_params, model_spec=model_spec, @@ -141,14 +152,10 @@ def input_data(): covariates=covariates, non_consumption_utilities=non_consumption_utilities, child_age_update_rule=child_age_update_rule, - prob_educ_level=prob_educ_level, - prob_child_age=prob_child_age, - prob_partner_present=prob_partner_present, - prob_exp_pt=prob_exp_pt, prob_child=prob_child, - prob_exp_ft=prob_exp_ft, prob_partner=prob_partner, biased_exp=False, + initial_states=initial_states, data_sparse=False, ) diff --git a/soepy/test/test_expectation.py b/soepy/test/test_expectation.py index e447e2a..c02f831 100644 --- a/soepy/test/test_expectation.py +++ b/soepy/test/test_expectation.py @@ -3,6 +3,7 @@ from soepy.simulate.simulate_python import simulate from soepy.test.random_init import random_init from soepy.test.resources.aux_funcs import cleanup +from soepy.test.resources.initial_states import create_initial_states def test_simulation_func_exp(): @@ -25,9 +26,14 @@ def test_simulation_func_exp(): model_params_df = pd.read_pickle("test.soepy.pkl") + initial_states = create_initial_states( + model_params_init_file_name=model_params_df, + model_spec_init_file_name="test.soepy.yml", + ) calculated_df_false = simulate( model_params_init_file_name=model_params_df, model_spec_init_file_name="test.soepy.yml", + initial_states=initial_states, biased_exp=False, ) @@ -42,6 +48,7 @@ def test_simulation_func_exp(): calculated_df_true = simulate( model_params_init_file_name=model_params_df, model_spec_init_file_name="test.soepy.yml", + initial_states=initial_states, biased_exp=True, ) diff --git a/soepy/test/test_get_simulate.py b/soepy/test/test_get_simulate.py index c648a47..9bfba00 100644 --- a/soepy/test/test_get_simulate.py +++ b/soepy/test/test_get_simulate.py @@ -5,6 +5,7 @@ from soepy.simulate.simulate_python import simulate from soepy.test.random_init import random_init from soepy.test.resources.aux_funcs import cleanup +from soepy.test.resources.initial_states import create_initial_states def test_simulation_func(): @@ -26,9 +27,15 @@ def test_simulation_func(): } random_init(constr) + initial_states = create_initial_states( + model_params_init_file_name="test.soepy.pkl", + model_spec_init_file_name="test.soepy.yml", + ) + df_sim = simulate( model_params_init_file_name="test.soepy.pkl", model_spec_init_file_name="test.soepy.yml", + initial_states=initial_states, ) simulate_func = get_simulate_func( model_params_init_file_name="test.soepy.pkl", @@ -37,6 +44,7 @@ def test_simulation_func(): df_partial_sim = simulate_func( model_params_init_file_name_inner="test.soepy.pkl", model_spec_init_file_name_inner="test.soepy.yml", + initial_states=initial_states, ) pd.testing.assert_series_equal( diff --git a/soepy/test/test_quadrature.py b/soepy/test/test_quadrature.py index 38df0c8..1e4e7a1 100644 --- a/soepy/test/test_quadrature.py +++ b/soepy/test/test_quadrature.py @@ -17,6 +17,7 @@ from soepy.solve.create_state_space import create_state_space_objects from soepy.solve.solve_python import pyth_solve from soepy.test.resources.aux_funcs import create_disc_sum_av_utility +from soepy.test.resources.initial_states import create_initial_states_from_probs @pytest.fixture(scope="module") @@ -102,6 +103,16 @@ def input_data(): ) # Simulate + initial_states = create_initial_states_from_probs( + model_params=model_params, + model_spec=model_spec, + prob_educ_level=prob_educ_level, + prob_child_age=prob_child_age, + prob_partner_present=prob_partner_present, + prob_exp_pt=prob_exp_pt, + prob_exp_ft=prob_exp_ft, + ) + calculated_df = pyth_simulate( model_params=model_params, model_spec=model_spec, @@ -111,14 +122,10 @@ def input_data(): covariates=covariates, non_consumption_utilities=non_consumption_utilities, child_age_update_rule=child_age_update_rule, - prob_educ_level=prob_educ_level, - prob_child_age=prob_child_age, - prob_partner_present=prob_partner_present, - prob_exp_pt=prob_exp_pt, - prob_exp_ft=prob_exp_ft, prob_child=prob_child, prob_partner=prob_partner, biased_exp=False, + initial_states=initial_states, ) out[name] = create_disc_sum_av_utility( diff --git a/soepy/test/test_regression.py b/soepy/test/test_regression.py index 1e26c36..fdc9ce8 100644 --- a/soepy/test/test_regression.py +++ b/soepy/test/test_regression.py @@ -19,6 +19,7 @@ from soepy.solve.create_state_space import create_state_space_objects from soepy.solve.solve_python import pyth_solve from soepy.test.resources.aux_funcs import cleanup +from soepy.test.resources.initial_states import create_initial_states_from_probs CASES_TEST = random.sample(range(0, 100), 10) @@ -131,6 +132,16 @@ def test_pyth_simulate(input_vault, test_id): ) # Simulate + initial_states = create_initial_states_from_probs( + model_params=model_params, + model_spec=model_spec, + prob_educ_level=prob_educ_level, + prob_child_age=prob_child_age, + prob_partner_present=prob_partner_present, + prob_exp_pt=prob_exp_pt, + prob_exp_ft=prob_exp_ft, + ) + calculated_df = pyth_simulate( model_params=model_params, model_spec=model_spec, @@ -140,14 +151,10 @@ def test_pyth_simulate(input_vault, test_id): covariates=covariates, non_consumption_utilities=non_consumption_utilities, child_age_update_rule=child_age_update_rule, - prob_educ_level=prob_educ_level, - prob_child_age=prob_child_age, - prob_partner_present=prob_partner_present, - prob_exp_pt=prob_exp_pt, - prob_exp_ft=prob_exp_ft, prob_child=prob_child, prob_partner=prob_partner, biased_exp=False, + initial_states=initial_states, ) pd.testing.assert_series_equal( @@ -186,7 +193,41 @@ def test_simulation_func(input_vault, test_id): exog_partner_arrival_info.to_pickle("test.soepy.partner.arrival.pkl") exog_partner_separation_info.to_pickle("test.soepy.partner.separation.pkl") - calculated_df = simulate(random_model_params_df, model_spec_init_dict) + model_params_df, model_params = read_model_params_init( + model_params_init_file_name=random_model_params_df + ) + model_spec = read_model_spec_init( + model_spec_init_dict=model_spec_init_dict, + model_params=model_params_df, + ) + + prob_educ_level = gen_prob_educ_level_vector(model_spec=model_spec) + prob_child_age = gen_prob_child_init_age_vector(model_spec=model_spec) + prob_partner_present = gen_prob_partner_present_vector(model_spec=model_spec) + prob_exp_pt = gen_prob_init_exp_component_vector( + model_spec=model_spec, + model_spec_exp_file_key=model_spec.pt_exp_shares_file_name, + ) + prob_exp_ft = gen_prob_init_exp_component_vector( + model_spec=model_spec, + model_spec_exp_file_key=model_spec.ft_exp_shares_file_name, + ) + + initial_states = create_initial_states_from_probs( + model_params=model_params, + model_spec=model_spec, + prob_educ_level=prob_educ_level, + prob_child_age=prob_child_age, + prob_partner_present=prob_partner_present, + prob_exp_pt=prob_exp_pt, + prob_exp_ft=prob_exp_ft, + ) + + calculated_df = simulate( + random_model_params_df, + model_spec_init_dict, + initial_states=initial_states, + ) pd.testing.assert_series_equal( expected_df.loc[DATA_LABLES_CHECK], @@ -224,8 +265,41 @@ def test_simulation_func_unbiased(input_vault, test_id): exog_partner_arrival_info.to_pickle("test.soepy.partner.arrival.pkl") exog_partner_separation_info.to_pickle("test.soepy.partner.separation.pkl") + model_params_df, model_params = read_model_params_init( + model_params_init_file_name=random_model_params_df + ) + model_spec = read_model_spec_init( + model_spec_init_dict=model_spec_init_dict, + model_params=model_params_df, + ) + + prob_educ_level = gen_prob_educ_level_vector(model_spec=model_spec) + prob_child_age = gen_prob_child_init_age_vector(model_spec=model_spec) + prob_partner_present = gen_prob_partner_present_vector(model_spec=model_spec) + prob_exp_pt = gen_prob_init_exp_component_vector( + model_spec=model_spec, + model_spec_exp_file_key=model_spec.pt_exp_shares_file_name, + ) + prob_exp_ft = gen_prob_init_exp_component_vector( + model_spec=model_spec, + model_spec_exp_file_key=model_spec.ft_exp_shares_file_name, + ) + + initial_states = create_initial_states_from_probs( + model_params=model_params, + model_spec=model_spec, + prob_educ_level=prob_educ_level, + prob_child_age=prob_child_age, + prob_partner_present=prob_partner_present, + prob_exp_pt=prob_exp_pt, + prob_exp_ft=prob_exp_ft, + ) + calculated_df = simulate( - random_model_params_df, model_spec_init_dict, biased_exp=False + random_model_params_df, + model_spec_init_dict, + initial_states=initial_states, + biased_exp=False, ) pd.testing.assert_series_equal( @@ -264,8 +338,41 @@ def test_simulation_func_data_sparse(input_vault, test_id): exog_partner_arrival_info.to_pickle("test.soepy.partner.arrival.pkl") exog_partner_separation_info.to_pickle("test.soepy.partner.separation.pkl") + model_params_df, model_params = read_model_params_init( + model_params_init_file_name=random_model_params_df + ) + model_spec = read_model_spec_init( + model_spec_init_dict=model_spec_init_dict, + model_params=model_params_df, + ) + + prob_educ_level = gen_prob_educ_level_vector(model_spec=model_spec) + prob_child_age = gen_prob_child_init_age_vector(model_spec=model_spec) + prob_partner_present = gen_prob_partner_present_vector(model_spec=model_spec) + prob_exp_pt = gen_prob_init_exp_component_vector( + model_spec=model_spec, + model_spec_exp_file_key=model_spec.pt_exp_shares_file_name, + ) + prob_exp_ft = gen_prob_init_exp_component_vector( + model_spec=model_spec, + model_spec_exp_file_key=model_spec.ft_exp_shares_file_name, + ) + + initial_states = create_initial_states_from_probs( + model_params=model_params, + model_spec=model_spec, + prob_educ_level=prob_educ_level, + prob_child_age=prob_child_age, + prob_partner_present=prob_partner_present, + prob_exp_pt=prob_exp_pt, + prob_exp_ft=prob_exp_ft, + ) + calculated_df = simulate( - random_model_params_df, model_spec_init_dict, data_sparse=True + random_model_params_df, + model_spec_init_dict, + initial_states=initial_states, + data_sparse=True, ) pd.testing.assert_series_equal( diff --git a/soepy/test/test_single_woman.py b/soepy/test/test_single_woman.py index ee0095d..ecd9a86 100644 --- a/soepy/test/test_single_woman.py +++ b/soepy/test/test_single_woman.py @@ -17,6 +17,7 @@ from soepy.solve.create_state_space import create_state_space_objects from soepy.solve.solve_python import pyth_solve from soepy.test.resources.aux_funcs import create_disc_sum_av_utility +from soepy.test.resources.initial_states import create_initial_states_from_probs @pytest.fixture(scope="module") @@ -98,6 +99,16 @@ def input_data(): ) # Simulate + initial_states = create_initial_states_from_probs( + model_params=model_params, + model_spec=model_spec, + prob_educ_level=prob_educ_level, + prob_child_age=prob_child_age, + prob_partner_present=prob_partner_present, + prob_exp_pt=prob_exp_pt, + prob_exp_ft=prob_exp_ft, + ) + calculated_df = pyth_simulate( model_params=model_params, model_spec=model_spec, @@ -107,14 +118,10 @@ def input_data(): covariates=covariates, non_consumption_utilities=non_consumption_utilities, child_age_update_rule=child_age_update_rule, - prob_educ_level=prob_educ_level, - prob_child_age=prob_child_age, - prob_partner_present=prob_partner_present, - prob_exp_pt=prob_exp_pt, - prob_exp_ft=prob_exp_ft, prob_child=prob_child, prob_partner=prob_partner, biased_exp=False, + initial_states=initial_states, ) out[name] = create_disc_sum_av_utility( @@ -124,7 +131,7 @@ def input_data(): # Check if really all are single at any time assert (calculated_df["Male_Wages"] == 0).all() - out["regression_disc_sum"] = -0.10864927638945981 + out["regression_disc_sum"] = -0.10947236852045482 return out diff --git a/soepy/test/test_unit.py b/soepy/test/test_unit.py index 913c70c..08bacf2 100644 --- a/soepy/test/test_unit.py +++ b/soepy/test/test_unit.py @@ -2,6 +2,7 @@ from soepy.simulate.simulate_python import simulate from soepy.test.random_init import random_init +from soepy.test.resources.initial_states import create_initial_states def test_unit_nan(): @@ -22,9 +23,14 @@ def test_unit_nan(): "NUM_DRAWS_EMAX": 20, } random_init(constr) + initial_states = create_initial_states( + model_params_init_file_name="test.soepy.pkl", + model_spec_init_file_name="test.soepy.yml", + ) df = simulate( model_params_init_file_name="test.soepy.pkl", model_spec_init_file_name="test.soepy.yml", + initial_states=initial_states, ).reset_index() for educ_level, educ_years in enumerate(constr["EDUC_YEARS"]): @@ -51,9 +57,14 @@ def test_no_children_no_exp(): } random_init(constr) + initial_states = create_initial_states( + model_params_init_file_name="test.soepy.pkl", + model_spec_init_file_name="test.soepy.yml", + ) df = simulate( model_params_init_file_name="test.soepy.pkl", model_spec_init_file_name="test.soepy.yml", + initial_states=initial_states, ).reset_index() # If the initial child-age distribution is degenerate at -1, then at period 0 all @@ -85,9 +96,14 @@ def test_unit_data_frame_shape(): } random_init(constr) + initial_states = create_initial_states( + model_params_init_file_name="test.soepy.pkl", + model_spec_init_file_name="test.soepy.yml", + ) df = simulate( model_params_init_file_name="test.soepy.pkl", model_spec_init_file_name="test.soepy.yml", + initial_states=initial_states, ).reset_index() counts = [df[df["Education_Level"] == i]["Identifier"].nunique() for i in [0, 1, 2]] diff --git a/soepy/test/test_validation_childless.py b/soepy/test/test_validation_childless.py index 151abb1..b1825f2 100644 --- a/soepy/test/test_validation_childless.py +++ b/soepy/test/test_validation_childless.py @@ -17,6 +17,7 @@ from soepy.soepy_config import TEST_RESOURCES_DIR from soepy.solve.create_state_space import create_state_space_objects from soepy.solve.solve_python import pyth_solve +from soepy.test.resources.initial_states import create_initial_states_from_probs @pytest.fixture(scope="module") @@ -114,6 +115,16 @@ def input_data(): ) # Simulate + initial_states = create_initial_states_from_probs( + model_params=model_params, + model_spec=model_spec, + prob_educ_level=prob_educ_level, + prob_child_age=prob_child_age, + prob_partner_present=prob_partner_present, + prob_exp_pt=prob_exp_pt, + prob_exp_ft=prob_exp_ft, + ) + calculated_df = pyth_simulate( model_params, model_spec, @@ -123,14 +134,10 @@ def input_data(): covariates, non_consumption_utilities, child_age_update_rule, - prob_educ_level, - prob_child_age, - prob_partner_present, - prob_exp_pt, - prob_exp_ft, prob_child, prob_partner, biased_exp=False, + initial_states=initial_states, ) out[name] = calculated_df diff --git a/soepy/test/test_value_function_simulation_consistency.py b/soepy/test/test_value_function_simulation_consistency.py index f0b9584..2de66d7 100644 --- a/soepy/test/test_value_function_simulation_consistency.py +++ b/soepy/test/test_value_function_simulation_consistency.py @@ -3,6 +3,7 @@ from soepy.simulate.simulate_python import get_simulate_func from soepy.test.resources.aux_funcs import cleanup +from soepy.test.resources.initial_states import create_initial_states def _write_minimal_exog_files(*, num_periods: int) -> None: @@ -186,9 +187,14 @@ def test_value_function_matches_mean_realized_discounted_sum(): biased_exp=True, data_sparse=False, ) + initial_states = create_initial_states( + model_params_init_file_name=model_params_df, + model_spec_init_file_name=model_spec_init_dict, + ) df = simulate_func( model_params_init_file_name_inner=model_params_df, model_spec_init_file_name_inner=model_spec_init_dict, + initial_states=initial_states, ) # Compute realized discounted sum of chosen flow utility per agent.