Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
175 changes: 55 additions & 120 deletions soepy/simulate/initial_states.py
Original file line number Diff line number Diff line change
@@ -1,123 +1,58 @@
import numpy as np
import pandas as pd

from soepy.shared.experience_stock import exp_years_to_stock
from soepy.shared.experience_stock import get_pt_increment
from soepy.shared.experience_stock import stock_to_exp_years
from soepy.shared.numerical_integration import draw_zero_one_distributed_shocks


def prepare_simulation_data(
model_params,
model_spec,
prob_educ_level,
prob_child_age,
prob_partner_present,
prob_exp_pt,
prob_exp_ft,
biased_exp,
):
"""Draw initial conditions and precompute grid objects for simulation."""

initial_educ_level = np.random.choice(
model_spec.num_educ_levels, model_spec.num_agents_sim, p=prob_educ_level
)

initial_period = np.asarray(model_spec.educ_years)[initial_educ_level]

initial_child_age = np.full(model_spec.num_agents_sim, np.nan)
initial_partner = np.full(model_spec.num_agents_sim, np.nan)
initial_exp_pt = np.full(model_spec.num_agents_sim, np.nan)
initial_exp_ft = np.full(model_spec.num_agents_sim, np.nan)

for educ_level in range(model_spec.num_educ_levels):
mask = initial_educ_level == educ_level

initial_child_age[mask] = np.random.choice(
list(range(-1, model_spec.child_age_init_max + 1)),
mask.sum(),
p=prob_child_age[educ_level],
)

initial_partner[mask] = np.random.binomial(
size=mask.sum(),
n=1,
p=prob_partner_present[educ_level],
)
initial_exp_pt[mask] = np.random.choice(
list(range(0, model_spec.init_exp_max + 1)),
mask.sum(),
p=prob_exp_pt[educ_level],
)

initial_exp_ft[mask] = np.random.choice(
list(range(0, model_spec.init_exp_max + 1)),
mask.sum(),
p=prob_exp_ft[educ_level],
)

# Combine stocks
pt_increment = get_pt_increment(
model_params=model_params,
educ_level=initial_educ_level,
child_age=initial_child_age,
biased_exp=biased_exp,
)

total_years = initial_exp_pt * pt_increment + initial_exp_ft

initial_exp_stock = exp_years_to_stock(
exp_years=total_years,
period=0,
init_exp_max=model_spec.init_exp_max,
pt_increment=pt_increment,
)

lagged_choice = lagged_choice_initial(initial_exp_years=total_years)

unobserved_type = np.random.choice(
np.arange(model_spec.num_types),
model_spec.num_agents_sim,
p=model_params.type_shares,
)

draws_sim = draw_zero_one_distributed_shocks(
model_spec.seed_sim, model_spec.num_periods, model_spec.num_agents_sim
)
draws_sim = draws_sim * float(model_params.shock_sd)

# prob_exp_pt/prob_exp_ft are passed in from simulate.py and originate from the
# same legacy share files as prob_exp_years.
initial_states = pd.DataFrame(
{
"Identifier": np.arange(model_spec.num_agents_sim, dtype=int),
"Period": initial_period.astype(int),
"Education_Level": initial_educ_level.astype(int),
"Lagged_Choice": lagged_choice.astype(int),
"Experience_Part_Time": initial_exp_pt.astype(int),
"Experience_Full_Time": initial_exp_ft.astype(int),
"Experience_Stock": initial_exp_stock.astype(float),
"Type": unobserved_type.astype(int),
"Age_Youngest_Child": initial_child_age.astype(int),
"Partner_Indicator": initial_partner.astype(int),
}
)

return initial_states, draws_sim


def lagged_choice_initial(initial_exp_years):
"""Determine initial lagged choice from total experience.

Rule (per project decision):
- lagged_choice = 2 (full-time) if initial experience years > 1
- otherwise lagged_choice = 0
"""

lagged_choice = np.zeros_like(initial_exp_years, dtype=int)
lagged_choice[initial_exp_years > 1] = 2
# Check if initial years is a float if so, assign 1 part-time
int_exp = initial_exp_years.astype(int)
is_float = np.abs(initial_exp_years - int_exp) > 1e-8
lagged_choice[is_float] = 1
return lagged_choice
from soepy.shared.constants_and_indices import NUM_CHOICES


def validate_initial_states(initial_states, model_spec):
required_columns = [
"Identifier",
"Period",
"Education_Level",
"Lagged_Choice",
"Experience_Part_Time",
"Experience_Full_Time",
"Type",
"Age_Youngest_Child",
"Partner_Indicator",
]
missing = [col for col in required_columns if col not in initial_states.columns]
if missing:
raise ValueError(f"Initial states missing columns: {missing}")

if initial_states[required_columns].isna().any().any():
raise ValueError("Initial states contain missing values.")

identifiers = initial_states["Identifier"].to_numpy()
unique_ids = np.unique(identifiers)
if len(unique_ids) != len(identifiers):
raise ValueError("Initial state identifiers must be unique.")

if not np.issubdtype(unique_ids.dtype, np.integer):
raise ValueError("Initial state identifiers must be integers.")

expected_ids = np.arange(len(unique_ids), dtype=unique_ids.dtype)
if not np.array_equal(np.sort(unique_ids), expected_ids):
raise ValueError("Initial state identifiers must be consecutive from 0.")

# if (initial_states["Period"] < 0).any() or (
# initial_states["Period"] >= model_spec.num_periods
# ).any():
# raise ValueError("Initial state periods out of bounds.")

if (initial_states["Education_Level"] < 0).any() or (
initial_states["Education_Level"] >= model_spec.num_educ_levels
).any():
raise ValueError("Initial state education levels out of bounds.")

if (initial_states["Type"] < 0).any() or (
initial_states["Type"] >= model_spec.num_types
).any():
raise ValueError("Initial state types out of bounds.")

if (initial_states["Lagged_Choice"] < 0).any() or (
initial_states["Lagged_Choice"] >= NUM_CHOICES
).any():
raise ValueError("Initial state lagged choices out of bounds.")

return initial_states
47 changes: 32 additions & 15 deletions soepy/simulate/simulate_auxiliary.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,18 +3,20 @@

from soepy.shared.constants_and_indices import HOURS
from soepy.shared.constants_and_indices import NUM_CHOICES
from soepy.shared.experience_stock import exp_years_to_stock
from soepy.shared.experience_stock import get_pt_increment
from soepy.shared.experience_stock import next_stock
from soepy.shared.non_employment import calc_erziehungsgeld
from soepy.shared.non_employment import calculate_non_employment_consumption_resources
from soepy.shared.numerical_integration import draw_zero_one_distributed_shocks
from soepy.shared.wages import calculate_log_wage
from soepy.simulate.constants_sim import DATA_FORMATS_SIM
from soepy.simulate.constants_sim import DATA_FORMATS_SPARSE
from soepy.simulate.constants_sim import DATA_LABLES_SIM
from soepy.simulate.constants_sim import LABELS_DATA_SPARSE
from soepy.simulate.constants_sim import STATE_LABELS_SIM
from soepy.simulate.income_sim import calculate_employment_consumption_resources
from soepy.simulate.initial_states import prepare_simulation_data
from soepy.simulate.initial_states import validate_initial_states


def pyth_simulate(
Expand All @@ -26,33 +28,47 @@ def pyth_simulate(
covariates,
non_consumption_utilities,
child_age_update_rule,
prob_educ_level,
prob_child_age,
prob_partner_present,
prob_exp_pt,
prob_exp_ft,
prob_child,
prob_partner,
*,
biased_exp,
initial_states,
data_sparse=False,
):
"""Simulate agent histories under the continuous-experience model."""

np.random.seed(model_spec.seed_sim)

initial_states = validate_initial_states(initial_states, model_spec)
num_agents_sim = initial_states["Identifier"].nunique()

emaxs = np.asarray(emaxs)
non_consumption_utilities = np.asarray(non_consumption_utilities)

initial_states, draws_sim = prepare_simulation_data(
pt_increment = get_pt_increment(
model_params=model_params,
model_spec=model_spec,
prob_educ_level=prob_educ_level,
prob_child_age=prob_child_age,
prob_partner_present=prob_partner_present,
prob_exp_pt=prob_exp_pt,
prob_exp_ft=prob_exp_ft,
biased_exp=biased_exp,
educ_level=initial_states["Education_Level"].to_numpy(),
child_age=initial_states["Age_Youngest_Child"].to_numpy(),
biased_exp=False,
)
total_years = (
initial_states["Experience_Part_Time"].to_numpy() * pt_increment
+ initial_states["Experience_Full_Time"].to_numpy()
)
initial_states = initial_states.copy()
initial_states["Experience_Stock"] = exp_years_to_stock(
exp_years=total_years,
period=initial_states["Period"].to_numpy(),
init_exp_max=model_spec.init_exp_max,
pt_increment=pt_increment,
).astype(float)

draws_sim = draw_zero_one_distributed_shocks(
model_spec.seed_sim,
model_spec.num_periods,
num_agents_sim,
)
draws_sim = draws_sim * float(model_params.shock_sd)

data_list = simulate_agents_over_periods(
model_spec=model_spec,
Expand Down Expand Up @@ -161,7 +177,8 @@ def simulate_agents_over_periods(
)
)

wages = np.exp(log_wage_agents + draws_sim[period, : len(current_states)])
identifiers = current_states.iloc[:, state_col["Identifier"]].to_numpy()
wages = np.exp(log_wage_agents + draws_sim[period, identifiers])
wages = wages * float(model_spec.elasticity_scale)

female_income = wages[:, None] * HOURS[None, 1:]
Expand Down
Loading
Loading