From 6657544d2b99316652dfd306d7d7f1edfa7fb0cb Mon Sep 17 00:00:00 2001 From: thewati Date: Tue, 9 Dec 2025 15:59:56 +0200 Subject: [PATCH 01/10] . --- .../clinics/ResourceFile_ClinicConfigurations/Default.csv | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/resources/healthsystem/human_resources/clinics/ResourceFile_ClinicConfigurations/Default.csv b/resources/healthsystem/human_resources/clinics/ResourceFile_ClinicConfigurations/Default.csv index 88c9a3cb73..871f162935 100644 --- a/resources/healthsystem/human_resources/clinics/ResourceFile_ClinicConfigurations/Default.csv +++ b/resources/healthsystem/human_resources/clinics/ResourceFile_ClinicConfigurations/Default.csv @@ -1 +1,3 @@ -Facility_ID,Officer_Type_Code,GenericClinic +version https://git-lfs.github.com/spec/v1 +oid sha256:cd312903ff50d5233d81075b1f38e7879b8933e3ad7067d52c696e4f37e51eac +size 44 From 6e1f1921e7904e95fedeacbfe411d02cde0eb0bb Mon Sep 17 00:00:00 2001 From: Precious29-web Date: Wed, 21 Jan 2026 15:30:04 +0200 Subject: [PATCH 02/10] initial commit --- src/tlo/methods/cardio_metabolic_disorders.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/tlo/methods/cardio_metabolic_disorders.py b/src/tlo/methods/cardio_metabolic_disorders.py index f2ad9c75f1..29bcbb0861 100644 --- a/src/tlo/methods/cardio_metabolic_disorders.py +++ b/src/tlo/methods/cardio_metabolic_disorders.py @@ -1739,6 +1739,7 @@ def apply(self, person_id, squeeze_factor): class HSI_CardioMetabolicDisorders_Refill_Medication(HSI_Event, IndividualScopeEventMixin): + #This is an HSI for medication refill """ This is a Health System Interaction Event in which a person seeks a refill prescription of medication. The next refill of medication is also scheduled. From 41fe3ae25332fa5804ad0fcd9558a8ccb4408c6e Mon Sep 17 00:00:00 2001 From: Precious29-web Date: Thu, 22 Jan 2026 15:57:03 +0200 Subject: [PATCH 03/10] herbal medication linear models --- src/tlo/methods/enhanced_lifestyle.py | 103 +++++++++++++++++++++++++- 1 file changed, 99 insertions(+), 4 deletions(-) diff --git a/src/tlo/methods/enhanced_lifestyle.py b/src/tlo/methods/enhanced_lifestyle.py index d9164f04fd..5b5f41b297 100644 --- a/src/tlo/methods/enhanced_lifestyle.py +++ b/src/tlo/methods/enhanced_lifestyle.py @@ -280,9 +280,32 @@ def __init__(self, name=None): ), "fsw_transition": Parameter( Types.REAL, "proportion of sex workers that stop being a sex worker each year" - ) + ), + "init_p_herbal_medication_use_in_rural": Parameter( + Types.REAL, "proportion of people in rural areas that use herbal medication" + ), + "init_p_herbal_medication_use_in_urban": Parameter( + Types.REAL, "proportion of people in urban areas that use herbal medication" + ), + 'r_start_herbal_medication_rural': Parameter( + Types.REAL, + 'probability per 3 months of starting herbal medication in rural areas' + ), + 'r_start_herbal_medication_urban': Parameter( + Types.REAL, + 'probability per 3 months of starting herbal medication in urban areas' + ), + 'r_stop_herbal_medication_rural': Parameter( + Types.REAL, + 'probability per 3 months of stopping herbal medication in rural areas' + ), + 'r_stop_herbal_medication_urban': Parameter( + Types.REAL, + 'probability per 3 months of stopping herbal medication in urban areas' + ), } + # Properties of individuals that this module provides. # Again each has a name, type and description. In addition, properties may be marked # as optional if they can be undefined for a given individual. @@ -337,7 +360,9 @@ def __init__(self, name=None): 'li_date_acquire_clean_drinking_water': Property(Types.DATE, 'date acquire clean drinking water'), 'li_date_acquire_non_wood_burn_stove': Property(Types.DATE, 'date acquire non-wood burning stove'), "li_is_sexworker": Property(Types.BOOL, "Is the person a sex worker"), - "li_is_circ": Property(Types.BOOL, "Is the person circumcised if they are male (False for all females)"), + "li_is_circ": Property(Types.BOOL, "Is the person circumcised if they are male (False for all females)" + ), + 'li_herbal_medication_use': Property( Types.BOOL, 'whether someone uses herbal medication or not'), } def read_parameters(self, resourcefilepath: Optional[Path] = None): @@ -425,7 +450,7 @@ def on_birth(self, mother_id, child_id): df.at[child_id, 'li_is_circ'] = ( self.rng.rand() < self.parameters['proportion_of_men_that_are_assumed_to_be_circumcised_at_birth'] ) - + df.at[child_id, 'herbal_medication_use'] = False class EduPropertyInitialiser: """ a class that will initialise education property in the population dataframe. it is mimicing the @@ -632,7 +657,12 @@ def __init__(self, module): 'li_is_sexworker': { 'init': self.female_sex_workers(), 'update': self.female_sex_workers() - } + }, + 'li_herbal_medication': { + 'init': self.herbal_medication_linear_model(), + 'update': self.update_herbal_medication_property_linear_model(), + }, + } def is_edu_dictionary_empty(self): @@ -1174,6 +1204,26 @@ def handle_male_circumcision_prop(self, df, rng=None, **externals) -> pd.Series: male_circ_lm = LinearModel.custom(handle_male_circumcision_prop, parameters=self.params) return male_circ_lm + def herbal_medication_linear_model(self) -> LinearModel: + """Assign herbal medication use based on rural and urban""" + + def predict_herbal_use(self, df, rng=None, **externals) -> pd.Series: + p = self.parameters + # Probability depends ONLY on li_urban + prob = pd.Series( + np.where( + df.li_urban, + p['init_p_herbal_medication_use_in_urban'], + p['init_p_herbal_medication_use_in_rural'], + ), + index=df.index, + dtype=float + ) + rnd = rng.random_sample(len(df)) + return rnd < prob + + return LinearModel.custom(predict_herbal_use, parameters=self.params) + # --------------------- LINEAR MODELS FOR UPDATING POPULATION PROPERTIES ------------------------------ # # todo: make exposed to campaign `_property` reflect index of individuals who have transitioned @@ -1886,6 +1936,51 @@ def handle_bmi_transitions(self, df, rng=None, **externals) -> pd.Series: return bmi_lm + def update_herbal_medication_property_linear_model(self) -> LinearModel: + """ + Update herbal medication use over time. + Transitions depend only on rural/urban residence. + """ + + def handle_herbal_medication_transitions(self, df, rng=None, **externals) -> pd.Series: + p = self.parameters + # Copy current state + herbal_trans = df.li_herbal_medication.copy() + # ------------------------- + # START herbal medication + # ------------------------- + not_using = df.index[df.is_alive & ~df.li_herbal_medication] + eff_p_start = pd.Series( + np.where( + df.loc[not_using, 'li_urban'], + p['r_start_herbal_medication_urban'], + p['r_start_herbal_medication_rural'], + ), + index=not_using, + dtype=float + ) + will_start = rng.random_sample(len(not_using)) < eff_p_start + herbal_trans.loc[not_using[will_start]] = True + # ------------------------- + # STOP herbal medication + # ------------------------- + using = df.index[df.is_alive & df.li_herbal_medication] + eff_p_stop = pd.Series( + np.where( + df.loc[using, 'li_urban'], + p['r_stop_herbal_medication_urban'], + p['r_stop_herbal_medication_rural'], + ), + index=using, + dtype=float + ) + will_stop = rng.random_sample(len(using)) < eff_p_stop + herbal_trans.loc[using[will_stop]] = False + return herbal_trans + + return LinearModel.custom(handle_herbal_medication_transitions, parameters=self.params) + + class LifestyleEvent(RegularEvent, PopulationScopeEventMixin): """ Regular event that updates all lifestyle properties for population From 28f65a5218d427bb8e353e1b926f6a67ca1c26d1 Mon Sep 17 00:00:00 2001 From: Precious29-web Date: Fri, 23 Jan 2026 10:01:07 +0200 Subject: [PATCH 04/10] parameters for herbal medication use and analysis --- .../parameter_values.csv | 4 ++-- .../enhanced_lifestyle_analyses.py | 4 +++- src/tlo/methods/enhanced_lifestyle.py | 14 ++++++++++++-- 3 files changed, 17 insertions(+), 5 deletions(-) diff --git a/resources/ResourceFile_Lifestyle_Enhanced/parameter_values.csv b/resources/ResourceFile_Lifestyle_Enhanced/parameter_values.csv index ec6b23fa88..8deb89e75e 100644 --- a/resources/ResourceFile_Lifestyle_Enhanced/parameter_values.csv +++ b/resources/ResourceFile_Lifestyle_Enhanced/parameter_values.csv @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d1b551597e808189e2eb3729d74d050df020f79a30446fccf2a417aacc280567 -size 3973 +oid sha256:b0a664c45d45c524c70835fb38a65b41cc95265d178e2ac90de5dd1a52172fe1 +size 4335 diff --git a/src/scripts/enhanced_lifestyle_analyses/enhanced_lifestyle_analyses.py b/src/scripts/enhanced_lifestyle_analyses/enhanced_lifestyle_analyses.py index b8f2de5a55..3379b12d83 100644 --- a/src/scripts/enhanced_lifestyle_analyses/enhanced_lifestyle_analyses.py +++ b/src/scripts/enhanced_lifestyle_analyses/enhanced_lifestyle_analyses.py @@ -65,6 +65,8 @@ def __init__(self, logs=None, path: Optional[str] = None): "per urban or rural"), "li_is_circ": PlotDescriptor("Male circumcision", "Sum of all males"), "li_is_sexworker": PlotDescriptor("sex workers", "Sum of all females aged between 15-49"), + "li_herbal_medication": PlotDescriptor("herbal medication use", "Sum of all individuals per urban or rural" + ), } # A dictionary to map properties and their description. Useful when setting plot legend @@ -84,7 +86,7 @@ def __init__(self, logs=None, path: Optional[str] = None): # define all properties that are categorised by rural or urban in addition to age and sex self.cat_by_rural_urban_props = ['li_wealth', 'li_bmi', 'li_low_ex', 'li_ex_alc', 'li_wood_burn_stove', 'li_unimproved_sanitation', - 'li_no_clean_drinking_water'] + 'li_no_clean_drinking_water','li_herbal_medication'] # date-stamp to label log files and any other outputs self.datestamp: str = datetime.date.today().strftime("__%Y_%m_%d") diff --git a/src/tlo/methods/enhanced_lifestyle.py b/src/tlo/methods/enhanced_lifestyle.py index 5b5f41b297..7045b7694d 100644 --- a/src/tlo/methods/enhanced_lifestyle.py +++ b/src/tlo/methods/enhanced_lifestyle.py @@ -362,7 +362,7 @@ def __init__(self, name=None): "li_is_sexworker": Property(Types.BOOL, "Is the person a sex worker"), "li_is_circ": Property(Types.BOOL, "Is the person circumcised if they are male (False for all females)" ), - 'li_herbal_medication_use': Property( Types.BOOL, 'whether someone uses herbal medication or not'), + 'li_herbal_medication': Property( Types.BOOL, 'whether someone uses herbal medication or not'), } def read_parameters(self, resourcefilepath: Optional[Path] = None): @@ -2074,7 +2074,17 @@ def apply(self, population): key=_property, data=flatten_multi_index_series_into_dict_for_logging(data) ) - + #Herbal Medication Use + herbal_summary = ( + df.loc[df.is_alive] + .groupby(['li_urban', 'sex', 'li_herbal_medication']) + .size() + .rename('count') + ) + logger.info( + key='li_herbal_medication', + data=flatten_multi_index_series_into_dict_for_logging(herbal_summary) + ) # ---------------------- log properties associated with WASH under_5 = df.is_alive & (df.age_years < 5) between_5_and_15 = df.is_alive & (df.age_years.between(5, 15)) From f790e05e3f9466e552f5ef69474edef8796e9f44 Mon Sep 17 00:00:00 2001 From: thewati Date: Fri, 23 Jan 2026 15:48:12 +0200 Subject: [PATCH 05/10] fix on_birth initialisation --- src/tlo/methods/enhanced_lifestyle.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/src/tlo/methods/enhanced_lifestyle.py b/src/tlo/methods/enhanced_lifestyle.py index 7045b7694d..3e76c66a44 100644 --- a/src/tlo/methods/enhanced_lifestyle.py +++ b/src/tlo/methods/enhanced_lifestyle.py @@ -305,7 +305,6 @@ def __init__(self, name=None): ), } - # Properties of individuals that this module provides. # Again each has a name, type and description. In addition, properties may be marked # as optional if they can be undefined for a given individual. @@ -361,15 +360,15 @@ def __init__(self, name=None): 'li_date_acquire_non_wood_burn_stove': Property(Types.DATE, 'date acquire non-wood burning stove'), "li_is_sexworker": Property(Types.BOOL, "Is the person a sex worker"), "li_is_circ": Property(Types.BOOL, "Is the person circumcised if they are male (False for all females)" - ), - 'li_herbal_medication': Property( Types.BOOL, 'whether someone uses herbal medication or not'), + ), + 'li_herbal_medication': Property(Types.BOOL, 'whether someone uses herbal medication or not'), } def read_parameters(self, resourcefilepath: Optional[Path] = None): p = self.parameters dataframes = read_csv_files(resourcefilepath / 'ResourceFile_Lifestyle_Enhanced', - files=["parameter_values", "urban_rural_by_district"], - ) + files=["parameter_values", "urban_rural_by_district"], + ) self.load_parameters_from_dataframe(dataframes["parameter_values"]) p['init_p_urban'] = ( dataframes["urban_rural_by_district"].drop( @@ -450,7 +449,8 @@ def on_birth(self, mother_id, child_id): df.at[child_id, 'li_is_circ'] = ( self.rng.rand() < self.parameters['proportion_of_men_that_are_assumed_to_be_circumcised_at_birth'] ) - df.at[child_id, 'herbal_medication_use'] = False + df.at[child_id, 'li_herbal_medication'] = df.at[_id_inherit_from, 'li_herbal_medication'] + class EduPropertyInitialiser: """ a class that will initialise education property in the population dataframe. it is mimicing the @@ -862,14 +862,14 @@ def init_marital_status(self, df, rng=None, **externals) -> pd.Series: p = self.parameters li_mar_stat_dtype = df.li_mar_stat.dtype - mar_stat = pd.Series(data=1, index=df.index, dtype=li_mar_stat_dtype ) + mar_stat = pd.Series(data=1, index=df.index, dtype=li_mar_stat_dtype) # select individuals of different age category age_ranges = [(15, 20), (20, 30), (30, 40), (40, 50), (50, 60), (60, np.inf)] for lower_age, upper_age in age_ranges: subpopulation = df.index[ df.age_years.between(lower_age, upper_age, inclusive="left") & df.is_alive - ] + ] parameters_key = ( f"init_dist_mar_stat_age{lower_age}{upper_age}" if upper_age != np.inf else @@ -1935,7 +1935,6 @@ def handle_bmi_transitions(self, df, rng=None, **externals) -> pd.Series: bmi_lm = LinearModel.custom(handle_bmi_transitions, parameters=self.params) return bmi_lm - def update_herbal_medication_property_linear_model(self) -> LinearModel: """ Update herbal medication use over time. From 440ab134953fbe87d4d9cb63e6e545fc7fd9c92c Mon Sep 17 00:00:00 2001 From: thewati Date: Fri, 23 Jan 2026 16:32:09 +0200 Subject: [PATCH 06/10] analysis and logging edited --- .../enhanced_lifestyle_analyses.py | 520 +++++++++++++++--- src/tlo/methods/enhanced_lifestyle.py | 32 +- 2 files changed, 467 insertions(+), 85 deletions(-) diff --git a/src/scripts/enhanced_lifestyle_analyses/enhanced_lifestyle_analyses.py b/src/scripts/enhanced_lifestyle_analyses/enhanced_lifestyle_analyses.py index 3379b12d83..8e63ae73f4 100644 --- a/src/scripts/enhanced_lifestyle_analyses/enhanced_lifestyle_analyses.py +++ b/src/scripts/enhanced_lifestyle_analyses/enhanced_lifestyle_analyses.py @@ -14,6 +14,7 @@ from tlo import Date, Simulation, logging from tlo.analysis.utils import parse_log_file, unflatten_flattened_multi_index_in_logging from tlo.methods import demography, enhanced_lifestyle, simplified_births +import numpy as np def add_footnote(fig: plt.Figure, footnote: str): @@ -66,7 +67,7 @@ def __init__(self, logs=None, path: Optional[str] = None): "li_is_circ": PlotDescriptor("Male circumcision", "Sum of all males"), "li_is_sexworker": PlotDescriptor("sex workers", "Sum of all females aged between 15-49"), "li_herbal_medication": PlotDescriptor("herbal medication use", "Sum of all individuals per urban or rural" - ), + ), } # A dictionary to map properties and their description. Useful when setting plot legend @@ -86,7 +87,7 @@ def __init__(self, logs=None, path: Optional[str] = None): # define all properties that are categorised by rural or urban in addition to age and sex self.cat_by_rural_urban_props = ['li_wealth', 'li_bmi', 'li_low_ex', 'li_ex_alc', 'li_wood_burn_stove', 'li_unimproved_sanitation', - 'li_no_clean_drinking_water','li_herbal_medication'] + 'li_no_clean_drinking_water', 'li_herbal_medication'] # date-stamp to label log files and any other outputs self.datestamp: str = datetime.date.today().strftime("__%Y_%m_%d") @@ -218,6 +219,66 @@ def plot_categorical_properties_by_gender(self, li_property: str): plt.savefig(self.outputpath / (li_property + self.datestamp + '.png'), format='png') plt.close(fig=fig) # close figure after saving it to avoid opening multiple figures + # def plot_non_categorical_properties_by_gender(self, li_property: str): + # """ a function to plot non-categorical properties of lifestyle module grouped by gender + # + # :param li_property: any other non-categorical property defined in lifestyle module """ + # + # # set y-axis limit. + # y_lim: float = 0.8 + # if li_property in ['li_no_access_handwashing', 'li_high_salt', 'li_wood_burn_stove', 'li_in_ed']: + # y_lim = 1.0 + # + # if li_property in ['li_tob', 'li_ex_alc']: + # y_lim = 0.3 + # + # # plot for male circumcision and female sex workers + # if li_property in ['li_is_circ', 'li_is_sexworker']: + # self.male_circumcision_and_sex_workers_plot(li_property) + # + # else: + # col: int = 0 # counter for indexing purposes + # # create subplots + # fig, axes = plt.subplots(nrows=2 if li_property in self.cat_by_rural_urban_props + # or li_property == 'li_ed_lev' else 1, ncols=2, figsize=(10, 5)) + # for gender, desc in self.gender_des.items(): + # + # df_dict = dict() + # if li_property in self.cat_by_rural_urban_props: + # _row: int = 0 # row counter + # _rows_counter: int = 0 # a counter for plotting. setting rows + # for _key, _value in self._rural_urban_state.items(): + # df_dict[f'{gender}_{_value}_{_row}'] = self.dfs[li_property][_key][gender]["True"].sum( + # axis=1) / self.dfs[li_property][_key][gender].sum(axis=1) + # _row += 1 + # + # else: + # df = self.dfs[li_property].reorder_levels([0, 2, 1, 3], axis=1) if li_property == 'li_in_ed' \ + # else self.dfs[li_property] + # df_dict[gender] = df[gender]["True"].sum(axis=1) / df[gender].sum(axis=1) + # for _key in df_dict.keys(): + # # do plotting + # ax = df_dict[_key].plot(kind='bar', stacked=True, + # ax=axes[int(_key.split("_")[-1]), col] if + # li_property in self.cat_by_rural_urban_props else axes[col], + # ylim=(0, y_lim), + # legend=None, + # color='darkturquoise', + # title=f"{_key.split('_')[1]} {desc} {self.en_props[li_property].label}" + # if li_property in self.cat_by_rural_urban_props + # else f"{desc} {self.en_props[li_property].label}", + # ylabel=f"{self.en_props[li_property].label} proportions", xlabel="Year" + # ) + # self.custom_axis_formatter(df_dict[_key], ax) + # # increase counter + # col += 1 + # fig.legend([self.en_props[li_property].label], loc='lower left', bbox_to_anchor=(0.75, 0.8)) + # # save and display plots for property categories by gender + # add_footnote(fig, f'{self.en_props[li_property].per_gender_footnote}') + # fig.tight_layout() + # plt.savefig(self.outputpath / (li_property + self.datestamp + '.png'), format='png') + # plt.close(fig=fig) # close figure after saving it to avoid opening multiple figures + def plot_non_categorical_properties_by_gender(self, li_property: str): """ a function to plot non-categorical properties of lifestyle module grouped by gender @@ -234,49 +295,203 @@ def plot_non_categorical_properties_by_gender(self, li_property: str): # plot for male circumcision and female sex workers if li_property in ['li_is_circ', 'li_is_sexworker']: self.male_circumcision_and_sex_workers_plot(li_property) + return # Exit early for these properties + + # Check the actual structure of the DataFrame + df_property = self.dfs[li_property] + first_level_values = df_property.columns.get_level_values(0).unique() + + # Determine the structure + has_gender_first = all(v in ['F', 'M'] for v in first_level_values) + has_urban_rural_first = all( + str(v).lower() in ['true', 'false'] or v in [True, False] for v in first_level_values) + + print(f"DEBUG for {li_property}:") + print(f" First level values: {list(first_level_values)}") + print(f" has_gender_first: {has_gender_first}") + print(f" has_urban_rural_first: {has_urban_rural_first}") + + # Special handling for li_in_ed which has different structure + if li_property == 'li_in_ed': + # li_in_ed has structure [gender][li_wealth][li_in_ed][age_years] + # We need to aggregate across wealth levels and ages + col: int = 0 + fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(10, 5)) + axes = np.array([[axes[0], axes[1]]]) - else: - col: int = 0 # counter for indexing purposes - # create subplots - fig, axes = plt.subplots(nrows=2 if li_property in self.cat_by_rural_urban_props - or li_property == 'li_ed_lev' else 1, ncols=2, figsize=(10, 5)) for gender, desc in self.gender_des.items(): - - df_dict = dict() - if li_property in self.cat_by_rural_urban_props: - _row: int = 0 # row counter - _rows_counter: int = 0 # a counter for plotting. setting rows - for _key, _value in self._rural_urban_state.items(): - df_dict[f'{gender}_{_value}_{_row}'] = self.dfs[li_property][_key][gender]["True"].sum( - axis=1) / self.dfs[li_property][_key][gender].sum(axis=1) - _row += 1 - - else: - df = self.dfs[li_property].reorder_levels([0, 2, 1, 3], axis=1) if li_property == 'li_in_ed' \ - else self.dfs[li_property] - df_dict[gender] = df[gender]["True"].sum(axis=1) / df[gender].sum(axis=1) - for _key in df_dict.keys(): - # do plotting - ax = df_dict[_key].plot(kind='bar', stacked=True, - ax=axes[int(_key.split("_")[-1]), col] if - li_property in self.cat_by_rural_urban_props else axes[col], - ylim=(0, y_lim), - legend=None, - color='darkturquoise', - title=f"{_key.split('_')[1]} {desc} {self.en_props[li_property].label}" - if li_property in self.cat_by_rural_urban_props - else f"{desc} {self.en_props[li_property].label}", - ylabel=f"{self.en_props[li_property].label} proportions", xlabel="Year" - ) - self.custom_axis_formatter(df_dict[_key], ax) - # increase counter + # Get all columns for this gender + gender_cols = df_property[gender] + # Sum across all wealth levels, ages, and True/False + total_in_education = pd.Series(0, index=df_property.index) + total_population = pd.Series(0, index=df_property.index) + + # Iterate through all columns for this gender + for col_name in gender_cols.columns: + # col_name is a tuple like ('1', 'True', '10') + wealth_level, in_ed_status, age = col_name + data_series = gender_cols[col_name] + + total_population += data_series + if in_ed_status == 'True' or in_ed_status == True: + total_in_education += data_series + + proportion = total_in_education / total_population.replace(0, np.nan) + proportion = proportion.fillna(0) + + ax = proportion.plot(kind='bar', stacked=True, + ax=axes[0, col], + ylim=(0, y_lim), + legend=None, + color='darkturquoise', + title=f"{desc} {self.en_props[li_property].label}", + ylabel=f"{self.en_props[li_property].label} proportions", + xlabel="Year" + ) + self.custom_axis_formatter(proportion, ax) col += 1 + fig.legend([self.en_props[li_property].label], loc='lower left', bbox_to_anchor=(0.75, 0.8)) - # save and display plots for property categories by gender add_footnote(fig, f'{self.en_props[li_property].per_gender_footnote}') fig.tight_layout() plt.savefig(self.outputpath / (li_property + self.datestamp + '.png'), format='png') - plt.close(fig=fig) # close figure after saving it to avoid opening multiple figures + plt.close(fig=fig) + return + + # For other properties, determine plotting structure + if has_urban_rural_first: + # Structure: [urban/rural][gender][property][age_range] + fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(10, 5)) + col = 0 + + for gender, desc in self.gender_des.items(): + df_dict = {} + row = 0 + + for urban_key, urban_desc in self._rural_urban_state.items(): + # Convert key to match what's in the DataFrame + if urban_key == 'True' and True in first_level_values: + df_key = True + elif urban_key == 'False' and False in first_level_values: + df_key = False + elif urban_key in first_level_values: + df_key = urban_key + elif str(urban_key) in [str(v) for v in first_level_values]: + for v in first_level_values: + if str(v) == str(urban_key): + df_key = v + break + else: + print(f"WARNING: Urban key {urban_key} not found in {li_property}") + df_dict[f'{gender}_{urban_desc}_{row}'] = pd.Series( + [0] * len(df_property.index), index=df_property.index + ) + row += 1 + continue + + # Get data for this urban/rural, gender + try: + gender_data = df_property[df_key][gender] + + # Find the True column + true_col = None + if "True" in gender_data: + true_col = gender_data["True"] + elif True in gender_data: + true_col = gender_data[True] + elif 'True' in gender_data: + true_col = gender_data['True'] + + if true_col is not None: + total = gender_data.sum(axis=1) + proportion = true_col.sum(axis=1) / total.replace(0, np.nan) + proportion = proportion.fillna(0) + df_dict[f'{gender}_{urban_desc}_{row}'] = proportion + else: + print(f"WARNING: No True column found for {li_property}[{df_key}][{gender}]") + df_dict[f'{gender}_{urban_desc}_{row}'] = pd.Series( + [0] * len(df_property.index), index=df_property.index + ) + except Exception as e: + print(f"ERROR processing {li_property}[{df_key}][{gender}]: {e}") + df_dict[f'{gender}_{urban_desc}_{row}'] = pd.Series( + [0] * len(df_property.index), index=df_property.index + ) + + row += 1 + + # Plot for this gender + for plot_key, plot_data in df_dict.items(): + plot_row = int(plot_key.split('_')[-1]) + ax = plot_data.plot(kind='bar', stacked=True, + ax=axes[plot_row, col], + ylim=(0, y_lim), + legend=None, + color='darkturquoise', + title=f"{plot_key.split('_')[1]} {desc} {self.en_props[li_property].label}", + ylabel=f"{self.en_props[li_property].label} proportions", + xlabel="Year" + ) + self.custom_axis_formatter(plot_data, ax) + + col += 1 + + else: + # Structure: [gender][property][age_range] or other + fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(10, 5)) + axes = np.array([[axes[0], axes[1]]]) + col = 0 + + for gender, desc in self.gender_des.items(): + try: + if gender in df_property: + gender_data = df_property[gender] + + # Find the True column + true_col = None + if "True" in gender_data: + true_col = gender_data["True"] + elif True in gender_data: + true_col = gender_data[True] + elif 'True' in gender_data: + true_col = gender_data['True'] + + if true_col is not None: + total = gender_data.sum(axis=1) + proportion = true_col.sum(axis=1) / total.replace(0, np.nan) + proportion = proportion.fillna(0) + else: + # If no True column, check if property values are directly accessible + print(f"DEBUG: No True column for {li_property}[{gender}]. Checking structure...") + print(f" Columns: {list(gender_data.columns)[:5]}...") + # For properties like li_urban where the property itself is the value + # We might need to handle this differently + proportion = pd.Series([0] * len(df_property.index), index=df_property.index) + else: + print(f"WARNING: Gender {gender} not found in {li_property}") + proportion = pd.Series([0] * len(df_property.index), index=df_property.index) + + except Exception as e: + print(f"ERROR processing {li_property} for {gender}: {e}") + proportion = pd.Series([0] * len(df_property.index), index=df_property.index) + + ax = proportion.plot(kind='bar', stacked=True, + ax=axes[0, col], + ylim=(0, y_lim), + legend=None, + color='darkturquoise', + title=f"{desc} {self.en_props[li_property].label}", + ylabel=f"{self.en_props[li_property].label} proportions", + xlabel="Year" + ) + self.custom_axis_formatter(proportion, ax) + col += 1 + + fig.legend([self.en_props[li_property].label], loc='lower left', bbox_to_anchor=(0.75, 0.8)) + add_footnote(fig, f'{self.en_props[li_property].per_gender_footnote}') + fig.tight_layout() + plt.savefig(self.outputpath / (li_property + self.datestamp + '.png'), format='png') + plt.close(fig=fig) def display_all_categorical_and_non_categorical_plots_by_gender(self): """ a function to display plots for both categorical and non-categorical properties grouped by gender """ @@ -383,6 +598,69 @@ def plot_categorical_properties_by_age_group(self, li_property: str): plt.savefig(self.outputpath / (li_property + self.datestamp + '.png'), format='png') plt.close(fig=fig) # close figure after saving it to avoid opening multiple figures + # def plot_non_categorical_properties_by_age_group(self, li_property): + # """ plot all non-categorical properties by age group """ + # # select logs from the latest year. In this case we are selecting year 2021 + # y_lim: float = 1.0 + # if li_property in ['li_is_sexworker']: + # y_lim = 0.040 + # + # all_logs_df = self.dfs[li_property] + # mask = (all_logs_df.index > pd.to_datetime('2021-01-01')) & (all_logs_df.index <= pd.to_datetime('2022-01-01')) + # self.dfs[li_property] = self.dfs[li_property].loc[mask] + # + # # create subplots + # fig, axes = plt.subplots(nrows=2 if li_property in self.cat_by_rural_urban_props or li_property == + # 'li_in_ed' else 1, + # figsize=(10, 5), sharex=True) + # + # df_dict = dict() + # if li_property == 'li_in_ed' or li_property in self.cat_by_rural_urban_props: + # _col: int = 0 # column counter + # key_value_desc = self.wealth_desc.items() if li_property == 'li_in_ed' else \ + # self._rural_urban_state.items() + # for _key, _value in key_value_desc: + # temp_df = pd.DataFrame() + # for _bool_value in ['True', 'False']: + # if li_property == 'li_in_ed': + # temp_df[_bool_value] = self.dfs[li_property]['M'][_key][_bool_value].sum(axis=0) + \ + # self.dfs[li_property]['F'][_key][_bool_value].sum(axis=0) + # + # else: + # temp_df[_bool_value] = self.dfs[li_property][_key]['M'][_bool_value].sum(axis=0) + \ + # self.dfs[li_property][_key]['F'][_bool_value].sum(axis=0) + # + # df_dict[f'{_value}_{_col}'] = temp_df['True'] / temp_df.sum(axis=1) + # _col += 1 + # + # else: + # plot_df = pd.DataFrame() + # for _bool_value in ['True', 'False']: + # plot_df[_bool_value] = self.dfs[li_property]['M'][_bool_value].sum(axis=0) + \ + # self.dfs[li_property]['F'][_bool_value].sum(axis=0) + # + # df_dict['non_urban_1'] = plot_df['True'] / plot_df.sum(axis=1) + # + # for _key in df_dict.keys(): + # # do plotting + # df_dict[_key].plot(kind='bar', stacked=True, + # ax=axes[int(_key.split("_")[-1])] if + # li_property in self.cat_by_rural_urban_props or li_property == 'li_in_ed' else axes, + # ylim=(0, y_lim), + # legend=None, + # color='darkturquoise', + # title=f"{self.en_props[li_property].label} by age group in 2021, {_key.split('_')[0]}" + # if li_property in self.cat_by_rural_urban_props or li_property == 'li_in_ed' else + # f"{self.en_props[li_property].label} by age group in 2021", + # ylabel=f"{self.en_props[li_property].label} proportions", xlabel="Year" + # ) + # + # fig.legend([self.en_props[li_property].label], loc='lower left', bbox_to_anchor=(0.8, 0.7)) + # add_footnote(fig, f'{self.en_props[li_property].per_age_group_footnote}') + # fig.tight_layout() + # plt.savefig(self.outputpath / (li_property + self.datestamp + '.png'), format='png') + # plt.close(fig=fig) # close figure after saving it to avoid opening multiple figures + def plot_non_categorical_properties_by_age_group(self, li_property): """ plot all non-categorical properties by age group """ # select logs from the latest year. In this case we are selecting year 2021 @@ -394,51 +672,143 @@ def plot_non_categorical_properties_by_age_group(self, li_property): mask = (all_logs_df.index > pd.to_datetime('2021-01-01')) & (all_logs_df.index <= pd.to_datetime('2022-01-01')) self.dfs[li_property] = self.dfs[li_property].loc[mask] - # create subplots - fig, axes = plt.subplots(nrows=2 if li_property in self.cat_by_rural_urban_props or li_property == - 'li_in_ed' else 1, - figsize=(10, 5), sharex=True) + # Check the actual structure + df_property = self.dfs[li_property] + first_level_values = df_property.columns.get_level_values(0).unique() - df_dict = dict() - if li_property == 'li_in_ed' or li_property in self.cat_by_rural_urban_props: - _col: int = 0 # column counter - key_value_desc = self.wealth_desc.items() if li_property == 'li_in_ed' else \ - self._rural_urban_state.items() - for _key, _value in key_value_desc: + # Determine if it has urban/rural structure + has_urban_rural_structure = False + for val in first_level_values: + if str(val).lower() in ['true', 'false'] or val in [True, False]: + has_urban_rural_structure = True + break + + # Initialize df_dict + df_dict = {} + + # Special handling for li_in_ed + if li_property == 'li_in_ed': + # li_in_ed has special structure [gender][li_wealth][li_in_ed][age_years] + fig, axes = plt.subplots(nrows=2, figsize=(10, 5), sharex=True) + + _col = 0 + for wealth_key, wealth_desc in self.wealth_desc.items(): temp_df = pd.DataFrame() for _bool_value in ['True', 'False']: - if li_property == 'li_in_ed': - temp_df[_bool_value] = self.dfs[li_property]['M'][_key][_bool_value].sum(axis=0) + \ - self.dfs[li_property]['F'][_key][_bool_value].sum(axis=0) + try: + m_data = df_property['M'][wealth_key][_bool_value].sum(axis=0) + f_data = df_property['F'][wealth_key][_bool_value].sum(axis=0) + temp_df[_bool_value] = m_data + f_data + except KeyError: + # Try with boolean True/False + bool_val = True if _bool_value == 'True' else False + m_data = df_property['M'][wealth_key][bool_val].sum(axis=0) + f_data = df_property['F'][wealth_key][bool_val].sum(axis=0) + temp_df[_bool_value] = m_data + f_data + + df_dict[f'{wealth_desc}_{_col}'] = temp_df['True'] / temp_df.sum(axis=1).replace(0, np.nan) + _col += 1 - else: - temp_df[_bool_value] = self.dfs[li_property][_key]['M'][_bool_value].sum(axis=0) + \ - self.dfs[li_property][_key]['F'][_bool_value].sum(axis=0) + elif has_urban_rural_structure and li_property in self.cat_by_rural_urban_props: + # Has urban/rural structure + fig, axes = plt.subplots(nrows=2, figsize=(10, 5), sharex=True) + + _col = 0 + for urban_key, urban_desc in self._rural_urban_state.items(): + # Find the matching key in the DataFrame + df_key = None + for val in first_level_values: + if str(val).lower() == str(urban_key).lower() or \ + (urban_key == 'True' and val is True) or \ + (urban_key == 'False' and val is False): + df_key = val + break + + if df_key is None: + print(f"WARNING: Could not find urban key {urban_key} for {li_property}") + # Create empty series with correct index (age groups) + age_groups = sorted(df_property.columns.get_level_values(-1).unique()) + df_dict[f'{urban_desc}_{_col}'] = pd.Series([0] * len(age_groups), index=age_groups) + _col += 1 + continue - df_dict[f'{_value}_{_col}'] = temp_df['True'] / temp_df.sum(axis=1) + temp_df = pd.DataFrame() + for _bool_value in ['True', 'False']: + try: + m_data = df_property[df_key]['M'][_bool_value].sum(axis=0) + f_data = df_property[df_key]['F'][_bool_value].sum(axis=0) + temp_df[_bool_value] = m_data + f_data + except KeyError: + # Try with boolean True/False + bool_val = True if _bool_value == 'True' else False + m_data = df_property[df_key]['M'][bool_val].sum(axis=0) + f_data = df_property[df_key]['F'][bool_val].sum(axis=0) + temp_df[_bool_value] = m_data + f_data + + proportion = temp_df['True'] / temp_df.sum(axis=1).replace(0, np.nan) + df_dict[f'{urban_desc}_{_col}'] = proportion.fillna(0) _col += 1 else: + # No urban/rural structure or not in cat_by_rural_urban_props + fig, axes = plt.subplots(nrows=1, figsize=(10, 5), sharex=True) + plot_df = pd.DataFrame() for _bool_value in ['True', 'False']: - plot_df[_bool_value] = self.dfs[li_property]['M'][_bool_value].sum(axis=0) + \ - self.dfs[li_property]['F'][_bool_value].sum(axis=0) - - df_dict['non_urban_1'] = plot_df['True'] / plot_df.sum(axis=1) - - for _key in df_dict.keys(): - # do plotting - df_dict[_key].plot(kind='bar', stacked=True, - ax=axes[int(_key.split("_")[-1])] if - li_property in self.cat_by_rural_urban_props or li_property == 'li_in_ed' else axes, - ylim=(0, y_lim), - legend=None, - color='darkturquoise', - title=f"{self.en_props[li_property].label} by age group in 2021, {_key.split('_')[0]}" - if li_property in self.cat_by_rural_urban_props or li_property == 'li_in_ed' else - f"{self.en_props[li_property].label} by age group in 2021", - ylabel=f"{self.en_props[li_property].label} proportions", xlabel="Year" - ) + try: + m_data = df_property['M'][_bool_value].sum(axis=0) + f_data = df_property['F'][_bool_value].sum(axis=0) + plot_df[_bool_value] = m_data + f_data + except KeyError: + # Try with boolean True/False + bool_val = True if _bool_value == 'True' else False + m_data = df_property['M'][bool_val].sum(axis=0) + f_data = df_property['F'][bool_val].sum(axis=0) + plot_df[_bool_value] = m_data + f_data + + proportion = plot_df['True'] / plot_df.sum(axis=1).replace(0, np.nan) + df_dict['all'] = proportion.fillna(0) + + # Plotting - Check if we have data to plot + if not df_dict: + print(f"WARNING: No data to plot for {li_property}") + plt.close(fig=fig) + return + + # Convert axes to array if needed for consistent indexing + if not isinstance(axes, np.ndarray): + axes = np.array([axes]) + + # Plot each item in df_dict + for i, (_key, plot_data) in enumerate(df_dict.items()): + # Determine which axis to use + if len(df_dict) > 1 and len(axes) > 1: + ax = axes[i] + else: + ax = axes[0] if isinstance(axes, np.ndarray) else axes + + # Ensure plot_data is a Series with proper index + if isinstance(plot_data, pd.Series): + # Sort by index if it's numeric-like + try: + plot_data = plot_data.sort_index(key=lambda x: pd.to_numeric(x, errors='ignore')) + except: + pass + else: + # Convert to Series if it's not + plot_data = pd.Series(plot_data) + + plot_data.plot(kind='bar', + ax=ax, + ylim=(0, y_lim), + legend=None, + color='darkturquoise', + title=f"{self.en_props[li_property].label} by age group in 2021, {_key.split('_')[0]}" + if len(df_dict) > 1 else + f"{self.en_props[li_property].label} by age group in 2021", + ylabel=f"{self.en_props[li_property].label} proportions", + xlabel="Age Group" + ) fig.legend([self.en_props[li_property].label], loc='lower left', bbox_to_anchor=(0.8, 0.7)) add_footnote(fig, f'{self.en_props[li_property].per_age_group_footnote}') @@ -509,8 +879,8 @@ def run(): # Basic arguments required for the simulation start_date = Date(2010, 1, 1) - end_date = Date(2050, 1, 1) - pop_size = 20000 + end_date = Date(2015, 1, 1) + pop_size = 5000 # Path to the resource files used by the disease and intervention methods resourcefilepath = './resources' diff --git a/src/tlo/methods/enhanced_lifestyle.py b/src/tlo/methods/enhanced_lifestyle.py index 3e76c66a44..43995ed048 100644 --- a/src/tlo/methods/enhanced_lifestyle.py +++ b/src/tlo/methods/enhanced_lifestyle.py @@ -2031,6 +2031,18 @@ def apply(self, population): for _property in all_lm_keys: if _property in log_by_age_15up: + if _property == 'li_herbal_medication': + # Log li_herbal_medication like other properties that are categorized by rural/urban + data = grouped_counts_with_all_combinations( + df.loc[df.is_alive], + ["li_urban", "sex", "li_herbal_medication", "age_range"] + ) + logger.info( + key=_property, + data=flatten_multi_index_series_into_dict_for_logging(data) + ) + continue + if _property in cat_by_rural_urban_props: data = grouped_counts_with_all_combinations( df.loc[df.is_alive & (df.age_years >= 15)], @@ -2074,16 +2086,16 @@ def apply(self, population): data=flatten_multi_index_series_into_dict_for_logging(data) ) #Herbal Medication Use - herbal_summary = ( - df.loc[df.is_alive] - .groupby(['li_urban', 'sex', 'li_herbal_medication']) - .size() - .rename('count') - ) - logger.info( - key='li_herbal_medication', - data=flatten_multi_index_series_into_dict_for_logging(herbal_summary) - ) + # herbal_summary = ( + # df.loc[df.is_alive] + # .groupby(['li_urban', 'sex', 'li_herbal_medication']) + # .size() + # .rename('count') + # ) + # logger.info( + # key='li_herbal_medication', + # data=flatten_multi_index_series_into_dict_for_logging(herbal_summary) + # ) # ---------------------- log properties associated with WASH under_5 = df.is_alive & (df.age_years < 5) between_5_and_15 = df.is_alive & (df.age_years.between(5, 15)) From d35074f270f5970d00861ee1bd0a695f3a640bb0 Mon Sep 17 00:00:00 2001 From: Precious29-web Date: Mon, 26 Jan 2026 14:20:00 +0200 Subject: [PATCH 07/10] herbal medication plots --- .../enhanced_lifestyle_analyses.py | 516 +++++++++++++++--- src/tlo/methods/enhanced_lifestyle.py | 31 +- 2 files changed, 453 insertions(+), 94 deletions(-) diff --git a/src/scripts/enhanced_lifestyle_analyses/enhanced_lifestyle_analyses.py b/src/scripts/enhanced_lifestyle_analyses/enhanced_lifestyle_analyses.py index 3379b12d83..41435d3fed 100644 --- a/src/scripts/enhanced_lifestyle_analyses/enhanced_lifestyle_analyses.py +++ b/src/scripts/enhanced_lifestyle_analyses/enhanced_lifestyle_analyses.py @@ -14,6 +14,7 @@ from tlo import Date, Simulation, logging from tlo.analysis.utils import parse_log_file, unflatten_flattened_multi_index_in_logging from tlo.methods import demography, enhanced_lifestyle, simplified_births +import numpy as np def add_footnote(fig: plt.Figure, footnote: str): @@ -66,7 +67,7 @@ def __init__(self, logs=None, path: Optional[str] = None): "li_is_circ": PlotDescriptor("Male circumcision", "Sum of all males"), "li_is_sexworker": PlotDescriptor("sex workers", "Sum of all females aged between 15-49"), "li_herbal_medication": PlotDescriptor("herbal medication use", "Sum of all individuals per urban or rural" - ), + ), } # A dictionary to map properties and their description. Useful when setting plot legend @@ -86,7 +87,7 @@ def __init__(self, logs=None, path: Optional[str] = None): # define all properties that are categorised by rural or urban in addition to age and sex self.cat_by_rural_urban_props = ['li_wealth', 'li_bmi', 'li_low_ex', 'li_ex_alc', 'li_wood_burn_stove', 'li_unimproved_sanitation', - 'li_no_clean_drinking_water','li_herbal_medication'] + 'li_no_clean_drinking_water', 'li_herbal_medication'] # date-stamp to label log files and any other outputs self.datestamp: str = datetime.date.today().strftime("__%Y_%m_%d") @@ -218,6 +219,66 @@ def plot_categorical_properties_by_gender(self, li_property: str): plt.savefig(self.outputpath / (li_property + self.datestamp + '.png'), format='png') plt.close(fig=fig) # close figure after saving it to avoid opening multiple figures + # def plot_non_categorical_properties_by_gender(self, li_property: str): + # """ a function to plot non-categorical properties of lifestyle module grouped by gender + # + # :param li_property: any other non-categorical property defined in lifestyle module """ + # + # # set y-axis limit. + # y_lim: float = 0.8 + # if li_property in ['li_no_access_handwashing', 'li_high_salt', 'li_wood_burn_stove', 'li_in_ed']: + # y_lim = 1.0 + # + # if li_property in ['li_tob', 'li_ex_alc']: + # y_lim = 0.3 + # + # # plot for male circumcision and female sex workers + # if li_property in ['li_is_circ', 'li_is_sexworker']: + # self.male_circumcision_and_sex_workers_plot(li_property) + # + # else: + # col: int = 0 # counter for indexing purposes + # # create subplots + # fig, axes = plt.subplots(nrows=2 if li_property in self.cat_by_rural_urban_props + # or li_property == 'li_ed_lev' else 1, ncols=2, figsize=(10, 5)) + # for gender, desc in self.gender_des.items(): + # + # df_dict = dict() + # if li_property in self.cat_by_rural_urban_props: + # _row: int = 0 # row counter + # _rows_counter: int = 0 # a counter for plotting. setting rows + # for _key, _value in self._rural_urban_state.items(): + # df_dict[f'{gender}_{_value}_{_row}'] = self.dfs[li_property][_key][gender]["True"].sum( + # axis=1) / self.dfs[li_property][_key][gender].sum(axis=1) + # _row += 1 + # + # else: + # df = self.dfs[li_property].reorder_levels([0, 2, 1, 3], axis=1) if li_property == 'li_in_ed' \ + # else self.dfs[li_property] + # df_dict[gender] = df[gender]["True"].sum(axis=1) / df[gender].sum(axis=1) + # for _key in df_dict.keys(): + # # do plotting + # ax = df_dict[_key].plot(kind='bar', stacked=True, + # ax=axes[int(_key.split("_")[-1]), col] if + # li_property in self.cat_by_rural_urban_props else axes[col], + # ylim=(0, y_lim), + # legend=None, + # color='darkturquoise', + # title=f"{_key.split('_')[1]} {desc} {self.en_props[li_property].label}" + # if li_property in self.cat_by_rural_urban_props + # else f"{desc} {self.en_props[li_property].label}", + # ylabel=f"{self.en_props[li_property].label} proportions", xlabel="Year" + # ) + # self.custom_axis_formatter(df_dict[_key], ax) + # # increase counter + # col += 1 + # fig.legend([self.en_props[li_property].label], loc='lower left', bbox_to_anchor=(0.75, 0.8)) + # # save and display plots for property categories by gender + # add_footnote(fig, f'{self.en_props[li_property].per_gender_footnote}') + # fig.tight_layout() + # plt.savefig(self.outputpath / (li_property + self.datestamp + '.png'), format='png') + # plt.close(fig=fig) # close figure after saving it to avoid opening multiple figures + def plot_non_categorical_properties_by_gender(self, li_property: str): """ a function to plot non-categorical properties of lifestyle module grouped by gender @@ -234,49 +295,203 @@ def plot_non_categorical_properties_by_gender(self, li_property: str): # plot for male circumcision and female sex workers if li_property in ['li_is_circ', 'li_is_sexworker']: self.male_circumcision_and_sex_workers_plot(li_property) + return # Exit early for these properties + + # Check the actual structure of the DataFrame + df_property = self.dfs[li_property] + first_level_values = df_property.columns.get_level_values(0).unique() + + # Determine the structure + has_gender_first = all(v in ['F', 'M'] for v in first_level_values) + has_urban_rural_first = all( + str(v).lower() in ['true', 'false'] or v in [True, False] for v in first_level_values) + + print(f"DEBUG for {li_property}:") + print(f" First level values: {list(first_level_values)}") + print(f" has_gender_first: {has_gender_first}") + print(f" has_urban_rural_first: {has_urban_rural_first}") + + # Special handling for li_in_ed which has different structure + if li_property == 'li_in_ed': + # li_in_ed has structure [gender][li_wealth][li_in_ed][age_years] + # We need to aggregate across wealth levels and ages + col: int = 0 + fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(10, 5)) + axes = np.array([[axes[0], axes[1]]]) - else: - col: int = 0 # counter for indexing purposes - # create subplots - fig, axes = plt.subplots(nrows=2 if li_property in self.cat_by_rural_urban_props - or li_property == 'li_ed_lev' else 1, ncols=2, figsize=(10, 5)) for gender, desc in self.gender_des.items(): - - df_dict = dict() - if li_property in self.cat_by_rural_urban_props: - _row: int = 0 # row counter - _rows_counter: int = 0 # a counter for plotting. setting rows - for _key, _value in self._rural_urban_state.items(): - df_dict[f'{gender}_{_value}_{_row}'] = self.dfs[li_property][_key][gender]["True"].sum( - axis=1) / self.dfs[li_property][_key][gender].sum(axis=1) - _row += 1 - - else: - df = self.dfs[li_property].reorder_levels([0, 2, 1, 3], axis=1) if li_property == 'li_in_ed' \ - else self.dfs[li_property] - df_dict[gender] = df[gender]["True"].sum(axis=1) / df[gender].sum(axis=1) - for _key in df_dict.keys(): - # do plotting - ax = df_dict[_key].plot(kind='bar', stacked=True, - ax=axes[int(_key.split("_")[-1]), col] if - li_property in self.cat_by_rural_urban_props else axes[col], - ylim=(0, y_lim), - legend=None, - color='darkturquoise', - title=f"{_key.split('_')[1]} {desc} {self.en_props[li_property].label}" - if li_property in self.cat_by_rural_urban_props - else f"{desc} {self.en_props[li_property].label}", - ylabel=f"{self.en_props[li_property].label} proportions", xlabel="Year" - ) - self.custom_axis_formatter(df_dict[_key], ax) - # increase counter + # Get all columns for this gender + gender_cols = df_property[gender] + # Sum across all wealth levels, ages, and True/False + total_in_education = pd.Series(0, index=df_property.index) + total_population = pd.Series(0, index=df_property.index) + + # Iterate through all columns for this gender + for col_name in gender_cols.columns: + # col_name is a tuple like ('1', 'True', '10') + wealth_level, in_ed_status, age = col_name + data_series = gender_cols[col_name] + + total_population += data_series + if in_ed_status == 'True' or in_ed_status == True: + total_in_education += data_series + + proportion = total_in_education / total_population.replace(0, np.nan) + proportion = proportion.fillna(0) + + ax = proportion.plot(kind='bar', stacked=True, + ax=axes[0, col], + ylim=(0, y_lim), + legend=None, + color='darkturquoise', + title=f"{desc} {self.en_props[li_property].label}", + ylabel=f"{self.en_props[li_property].label} proportions", + xlabel="Year" + ) + self.custom_axis_formatter(proportion, ax) col += 1 + fig.legend([self.en_props[li_property].label], loc='lower left', bbox_to_anchor=(0.75, 0.8)) - # save and display plots for property categories by gender add_footnote(fig, f'{self.en_props[li_property].per_gender_footnote}') fig.tight_layout() plt.savefig(self.outputpath / (li_property + self.datestamp + '.png'), format='png') - plt.close(fig=fig) # close figure after saving it to avoid opening multiple figures + plt.close(fig=fig) + return + + # For other properties, determine plotting structure + if has_urban_rural_first: + # Structure: [urban/rural][gender][property][age_range] + fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(10, 5)) + col = 0 + + for gender, desc in self.gender_des.items(): + df_dict = {} + row = 0 + + for urban_key, urban_desc in self._rural_urban_state.items(): + # Convert key to match what's in the DataFrame + if urban_key == 'True' and True in first_level_values: + df_key = True + elif urban_key == 'False' and False in first_level_values: + df_key = False + elif urban_key in first_level_values: + df_key = urban_key + elif str(urban_key) in [str(v) for v in first_level_values]: + for v in first_level_values: + if str(v) == str(urban_key): + df_key = v + break + else: + print(f"WARNING: Urban key {urban_key} not found in {li_property}") + df_dict[f'{gender}_{urban_desc}_{row}'] = pd.Series( + [0] * len(df_property.index), index=df_property.index + ) + row += 1 + continue + + # Get data for this urban/rural, gender + try: + gender_data = df_property[df_key][gender] + + # Find the True column + true_col = None + if "True" in gender_data: + true_col = gender_data["True"] + elif True in gender_data: + true_col = gender_data[True] + elif 'True' in gender_data: + true_col = gender_data['True'] + + if true_col is not None: + total = gender_data.sum(axis=1) + proportion = true_col.sum(axis=1) / total.replace(0, np.nan) + proportion = proportion.fillna(0) + df_dict[f'{gender}_{urban_desc}_{row}'] = proportion + else: + print(f"WARNING: No True column found for {li_property}[{df_key}][{gender}]") + df_dict[f'{gender}_{urban_desc}_{row}'] = pd.Series( + [0] * len(df_property.index), index=df_property.index + ) + except Exception as e: + print(f"ERROR processing {li_property}[{df_key}][{gender}]: {e}") + df_dict[f'{gender}_{urban_desc}_{row}'] = pd.Series( + [0] * len(df_property.index), index=df_property.index + ) + + row += 1 + + # Plot for this gender + for plot_key, plot_data in df_dict.items(): + plot_row = int(plot_key.split('_')[-1]) + ax = plot_data.plot(kind='bar', stacked=True, + ax=axes[plot_row, col], + ylim=(0, y_lim), + legend=None, + color='darkturquoise', + title=f"{plot_key.split('_')[1]} {desc} {self.en_props[li_property].label}", + ylabel=f"{self.en_props[li_property].label} proportions", + xlabel="Year" + ) + self.custom_axis_formatter(plot_data, ax) + + col += 1 + + else: + # Structure: [gender][property][age_range] or other + fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(10, 5)) + axes = np.array([[axes[0], axes[1]]]) + col = 0 + + for gender, desc in self.gender_des.items(): + try: + if gender in df_property: + gender_data = df_property[gender] + + # Find the True column + true_col = None + if "True" in gender_data: + true_col = gender_data["True"] + elif True in gender_data: + true_col = gender_data[True] + elif 'True' in gender_data: + true_col = gender_data['True'] + + if true_col is not None: + total = gender_data.sum(axis=1) + proportion = true_col.sum(axis=1) / total.replace(0, np.nan) + proportion = proportion.fillna(0) + else: + # If no True column, check if property values are directly accessible + print(f"DEBUG: No True column for {li_property}[{gender}]. Checking structure...") + print(f" Columns: {list(gender_data.columns)[:5]}...") + # For properties like li_urban where the property itself is the value + # We might need to handle this differently + proportion = pd.Series([0] * len(df_property.index), index=df_property.index) + else: + print(f"WARNING: Gender {gender} not found in {li_property}") + proportion = pd.Series([0] * len(df_property.index), index=df_property.index) + + except Exception as e: + print(f"ERROR processing {li_property} for {gender}: {e}") + proportion = pd.Series([0] * len(df_property.index), index=df_property.index) + + ax = proportion.plot(kind='bar', stacked=True, + ax=axes[0, col], + ylim=(0, y_lim), + legend=None, + color='darkturquoise', + title=f"{desc} {self.en_props[li_property].label}", + ylabel=f"{self.en_props[li_property].label} proportions", + xlabel="Year" + ) + self.custom_axis_formatter(proportion, ax) + col += 1 + + fig.legend([self.en_props[li_property].label], loc='lower left', bbox_to_anchor=(0.75, 0.8)) + add_footnote(fig, f'{self.en_props[li_property].per_gender_footnote}') + fig.tight_layout() + plt.savefig(self.outputpath / (li_property + self.datestamp + '.png'), format='png') + plt.close(fig=fig) def display_all_categorical_and_non_categorical_plots_by_gender(self): """ a function to display plots for both categorical and non-categorical properties grouped by gender """ @@ -383,6 +598,69 @@ def plot_categorical_properties_by_age_group(self, li_property: str): plt.savefig(self.outputpath / (li_property + self.datestamp + '.png'), format='png') plt.close(fig=fig) # close figure after saving it to avoid opening multiple figures + # def plot_non_categorical_properties_by_age_group(self, li_property): + # """ plot all non-categorical properties by age group """ + # # select logs from the latest year. In this case we are selecting year 2021 + # y_lim: float = 1.0 + # if li_property in ['li_is_sexworker']: + # y_lim = 0.040 + # + # all_logs_df = self.dfs[li_property] + # mask = (all_logs_df.index > pd.to_datetime('2021-01-01')) & (all_logs_df.index <= pd.to_datetime('2022-01-01')) + # self.dfs[li_property] = self.dfs[li_property].loc[mask] + # + # # create subplots + # fig, axes = plt.subplots(nrows=2 if li_property in self.cat_by_rural_urban_props or li_property == + # 'li_in_ed' else 1, + # figsize=(10, 5), sharex=True) + # + # df_dict = dict() + # if li_property == 'li_in_ed' or li_property in self.cat_by_rural_urban_props: + # _col: int = 0 # column counter + # key_value_desc = self.wealth_desc.items() if li_property == 'li_in_ed' else \ + # self._rural_urban_state.items() + # for _key, _value in key_value_desc: + # temp_df = pd.DataFrame() + # for _bool_value in ['True', 'False']: + # if li_property == 'li_in_ed': + # temp_df[_bool_value] = self.dfs[li_property]['M'][_key][_bool_value].sum(axis=0) + \ + # self.dfs[li_property]['F'][_key][_bool_value].sum(axis=0) + # + # else: + # temp_df[_bool_value] = self.dfs[li_property][_key]['M'][_bool_value].sum(axis=0) + \ + # self.dfs[li_property][_key]['F'][_bool_value].sum(axis=0) + # + # df_dict[f'{_value}_{_col}'] = temp_df['True'] / temp_df.sum(axis=1) + # _col += 1 + # + # else: + # plot_df = pd.DataFrame() + # for _bool_value in ['True', 'False']: + # plot_df[_bool_value] = self.dfs[li_property]['M'][_bool_value].sum(axis=0) + \ + # self.dfs[li_property]['F'][_bool_value].sum(axis=0) + # + # df_dict['non_urban_1'] = plot_df['True'] / plot_df.sum(axis=1) + # + # for _key in df_dict.keys(): + # # do plotting + # df_dict[_key].plot(kind='bar', stacked=True, + # ax=axes[int(_key.split("_")[-1])] if + # li_property in self.cat_by_rural_urban_props or li_property == 'li_in_ed' else axes, + # ylim=(0, y_lim), + # legend=None, + # color='darkturquoise', + # title=f"{self.en_props[li_property].label} by age group in 2021, {_key.split('_')[0]}" + # if li_property in self.cat_by_rural_urban_props or li_property == 'li_in_ed' else + # f"{self.en_props[li_property].label} by age group in 2021", + # ylabel=f"{self.en_props[li_property].label} proportions", xlabel="Year" + # ) + # + # fig.legend([self.en_props[li_property].label], loc='lower left', bbox_to_anchor=(0.8, 0.7)) + # add_footnote(fig, f'{self.en_props[li_property].per_age_group_footnote}') + # fig.tight_layout() + # plt.savefig(self.outputpath / (li_property + self.datestamp + '.png'), format='png') + # plt.close(fig=fig) # close figure after saving it to avoid opening multiple figures + def plot_non_categorical_properties_by_age_group(self, li_property): """ plot all non-categorical properties by age group """ # select logs from the latest year. In this case we are selecting year 2021 @@ -394,51 +672,143 @@ def plot_non_categorical_properties_by_age_group(self, li_property): mask = (all_logs_df.index > pd.to_datetime('2021-01-01')) & (all_logs_df.index <= pd.to_datetime('2022-01-01')) self.dfs[li_property] = self.dfs[li_property].loc[mask] - # create subplots - fig, axes = plt.subplots(nrows=2 if li_property in self.cat_by_rural_urban_props or li_property == - 'li_in_ed' else 1, - figsize=(10, 5), sharex=True) + # Check the actual structure + df_property = self.dfs[li_property] + first_level_values = df_property.columns.get_level_values(0).unique() - df_dict = dict() - if li_property == 'li_in_ed' or li_property in self.cat_by_rural_urban_props: - _col: int = 0 # column counter - key_value_desc = self.wealth_desc.items() if li_property == 'li_in_ed' else \ - self._rural_urban_state.items() - for _key, _value in key_value_desc: + # Determine if it has urban/rural structure + has_urban_rural_structure = False + for val in first_level_values: + if str(val).lower() in ['true', 'false'] or val in [True, False]: + has_urban_rural_structure = True + break + + # Initialize df_dict + df_dict = {} + + # Special handling for li_in_ed + if li_property == 'li_in_ed': + # li_in_ed has special structure [gender][li_wealth][li_in_ed][age_years] + fig, axes = plt.subplots(nrows=2, figsize=(10, 5), sharex=True) + + _col = 0 + for wealth_key, wealth_desc in self.wealth_desc.items(): temp_df = pd.DataFrame() for _bool_value in ['True', 'False']: - if li_property == 'li_in_ed': - temp_df[_bool_value] = self.dfs[li_property]['M'][_key][_bool_value].sum(axis=0) + \ - self.dfs[li_property]['F'][_key][_bool_value].sum(axis=0) + try: + m_data = df_property['M'][wealth_key][_bool_value].sum(axis=0) + f_data = df_property['F'][wealth_key][_bool_value].sum(axis=0) + temp_df[_bool_value] = m_data + f_data + except KeyError: + # Try with boolean True/False + bool_val = True if _bool_value == 'True' else False + m_data = df_property['M'][wealth_key][bool_val].sum(axis=0) + f_data = df_property['F'][wealth_key][bool_val].sum(axis=0) + temp_df[_bool_value] = m_data + f_data + + df_dict[f'{wealth_desc}_{_col}'] = temp_df['True'] / temp_df.sum(axis=1).replace(0, np.nan) + _col += 1 - else: - temp_df[_bool_value] = self.dfs[li_property][_key]['M'][_bool_value].sum(axis=0) + \ - self.dfs[li_property][_key]['F'][_bool_value].sum(axis=0) + elif has_urban_rural_structure and li_property in self.cat_by_rural_urban_props: + # Has urban/rural structure + fig, axes = plt.subplots(nrows=2, figsize=(10, 5), sharex=True) + + _col = 0 + for urban_key, urban_desc in self._rural_urban_state.items(): + # Find the matching key in the DataFrame + df_key = None + for val in first_level_values: + if str(val).lower() == str(urban_key).lower() or \ + (urban_key == 'True' and val is True) or \ + (urban_key == 'False' and val is False): + df_key = val + break + + if df_key is None: + print(f"WARNING: Could not find urban key {urban_key} for {li_property}") + # Create empty series with correct index (age groups) + age_groups = sorted(df_property.columns.get_level_values(-1).unique()) + df_dict[f'{urban_desc}_{_col}'] = pd.Series([0] * len(age_groups), index=age_groups) + _col += 1 + continue - df_dict[f'{_value}_{_col}'] = temp_df['True'] / temp_df.sum(axis=1) + temp_df = pd.DataFrame() + for _bool_value in ['True', 'False']: + try: + m_data = df_property[df_key]['M'][_bool_value].sum(axis=0) + f_data = df_property[df_key]['F'][_bool_value].sum(axis=0) + temp_df[_bool_value] = m_data + f_data + except KeyError: + # Try with boolean True/False + bool_val = True if _bool_value == 'True' else False + m_data = df_property[df_key]['M'][bool_val].sum(axis=0) + f_data = df_property[df_key]['F'][bool_val].sum(axis=0) + temp_df[_bool_value] = m_data + f_data + + proportion = temp_df['True'] / temp_df.sum(axis=1).replace(0, np.nan) + df_dict[f'{urban_desc}_{_col}'] = proportion.fillna(0) _col += 1 else: + # No urban/rural structure or not in cat_by_rural_urban_props + fig, axes = plt.subplots(nrows=1, figsize=(10, 5), sharex=True) + plot_df = pd.DataFrame() for _bool_value in ['True', 'False']: - plot_df[_bool_value] = self.dfs[li_property]['M'][_bool_value].sum(axis=0) + \ - self.dfs[li_property]['F'][_bool_value].sum(axis=0) - - df_dict['non_urban_1'] = plot_df['True'] / plot_df.sum(axis=1) - - for _key in df_dict.keys(): - # do plotting - df_dict[_key].plot(kind='bar', stacked=True, - ax=axes[int(_key.split("_")[-1])] if - li_property in self.cat_by_rural_urban_props or li_property == 'li_in_ed' else axes, - ylim=(0, y_lim), - legend=None, - color='darkturquoise', - title=f"{self.en_props[li_property].label} by age group in 2021, {_key.split('_')[0]}" - if li_property in self.cat_by_rural_urban_props or li_property == 'li_in_ed' else - f"{self.en_props[li_property].label} by age group in 2021", - ylabel=f"{self.en_props[li_property].label} proportions", xlabel="Year" - ) + try: + m_data = df_property['M'][_bool_value].sum(axis=0) + f_data = df_property['F'][_bool_value].sum(axis=0) + plot_df[_bool_value] = m_data + f_data + except KeyError: + # Try with boolean True/False + bool_val = True if _bool_value == 'True' else False + m_data = df_property['M'][bool_val].sum(axis=0) + f_data = df_property['F'][bool_val].sum(axis=0) + plot_df[_bool_value] = m_data + f_data + + proportion = plot_df['True'] / plot_df.sum(axis=1).replace(0, np.nan) + df_dict['all'] = proportion.fillna(0) + + # Plotting - Check if we have data to plot + if not df_dict: + print(f"WARNING: No data to plot for {li_property}") + plt.close(fig=fig) + return + + # Convert axes to array if needed for consistent indexing + if not isinstance(axes, np.ndarray): + axes = np.array([axes]) + + # Plot each item in df_dict + for i, (_key, plot_data) in enumerate(df_dict.items()): + # Determine which axis to use + if len(df_dict) > 1 and len(axes) > 1: + ax = axes[i] + else: + ax = axes[0] if isinstance(axes, np.ndarray) else axes + + # Ensure plot_data is a Series with proper index + if isinstance(plot_data, pd.Series): + # Sort by index if it's numeric-like + try: + plot_data = plot_data.sort_index(key=lambda x: pd.to_numeric(x, errors='ignore')) + except: + pass + else: + # Convert to Series if it's not + plot_data = pd.Series(plot_data) + + plot_data.plot(kind='bar', + ax=ax, + ylim=(0, y_lim), + legend=None, + color='darkturquoise', + title=f"{self.en_props[li_property].label} by age group in 2021, {_key.split('_')[0]}" + if len(df_dict) > 1 else + f"{self.en_props[li_property].label} by age group in 2021", + ylabel=f"{self.en_props[li_property].label} proportions", + xlabel="Age Group" + ) fig.legend([self.en_props[li_property].label], loc='lower left', bbox_to_anchor=(0.8, 0.7)) add_footnote(fig, f'{self.en_props[li_property].per_age_group_footnote}') diff --git a/src/tlo/methods/enhanced_lifestyle.py b/src/tlo/methods/enhanced_lifestyle.py index 7045b7694d..867bb41643 100644 --- a/src/tlo/methods/enhanced_lifestyle.py +++ b/src/tlo/methods/enhanced_lifestyle.py @@ -305,7 +305,6 @@ def __init__(self, name=None): ), } - # Properties of individuals that this module provides. # Again each has a name, type and description. In addition, properties may be marked # as optional if they can be undefined for a given individual. @@ -361,15 +360,15 @@ def __init__(self, name=None): 'li_date_acquire_non_wood_burn_stove': Property(Types.DATE, 'date acquire non-wood burning stove'), "li_is_sexworker": Property(Types.BOOL, "Is the person a sex worker"), "li_is_circ": Property(Types.BOOL, "Is the person circumcised if they are male (False for all females)" - ), - 'li_herbal_medication': Property( Types.BOOL, 'whether someone uses herbal medication or not'), + ), + 'li_herbal_medication': Property(Types.BOOL, 'whether someone uses herbal medication or not'), } def read_parameters(self, resourcefilepath: Optional[Path] = None): p = self.parameters dataframes = read_csv_files(resourcefilepath / 'ResourceFile_Lifestyle_Enhanced', - files=["parameter_values", "urban_rural_by_district"], - ) + files=["parameter_values", "urban_rural_by_district"], + ) self.load_parameters_from_dataframe(dataframes["parameter_values"]) p['init_p_urban'] = ( dataframes["urban_rural_by_district"].drop( @@ -450,7 +449,8 @@ def on_birth(self, mother_id, child_id): df.at[child_id, 'li_is_circ'] = ( self.rng.rand() < self.parameters['proportion_of_men_that_are_assumed_to_be_circumcised_at_birth'] ) - df.at[child_id, 'herbal_medication_use'] = False + df.at[child_id, 'li_herbal_medication'] = df.at[_id_inherit_from, 'li_herbal_medication'] + class EduPropertyInitialiser: """ a class that will initialise education property in the population dataframe. it is mimicing the @@ -862,14 +862,14 @@ def init_marital_status(self, df, rng=None, **externals) -> pd.Series: p = self.parameters li_mar_stat_dtype = df.li_mar_stat.dtype - mar_stat = pd.Series(data=1, index=df.index, dtype=li_mar_stat_dtype ) + mar_stat = pd.Series(data=1, index=df.index, dtype=li_mar_stat_dtype) # select individuals of different age category age_ranges = [(15, 20), (20, 30), (30, 40), (40, 50), (50, 60), (60, np.inf)] for lower_age, upper_age in age_ranges: subpopulation = df.index[ df.age_years.between(lower_age, upper_age, inclusive="left") & df.is_alive - ] + ] parameters_key = ( f"init_dist_mar_stat_age{lower_age}{upper_age}" if upper_age != np.inf else @@ -1935,7 +1935,6 @@ def handle_bmi_transitions(self, df, rng=None, **externals) -> pd.Series: bmi_lm = LinearModel.custom(handle_bmi_transitions, parameters=self.params) return bmi_lm - def update_herbal_medication_property_linear_model(self) -> LinearModel: """ Update herbal medication use over time. @@ -2026,7 +2025,7 @@ def apply(self, population): # NB: In addition to logging properties by sex and age groups, there are some properties that requires # individual's urban or rural status. define and log these properties separately cat_by_rural_urban_props = ['li_wealth', 'li_bmi', 'li_low_ex', 'li_ex_alc', 'li_wood_burn_stove', - 'li_unimproved_sanitation', 'li_no_clean_drinking_water'] + 'li_unimproved_sanitation', 'li_no_clean_drinking_water', 'li_herbal_medication'] # these properties are applicable to individuals 15+ years log_by_age_15up = ['li_low_ex', 'li_mar_stat', 'li_ex_alc', 'li_bmi', 'li_tob'] @@ -2074,17 +2073,7 @@ def apply(self, population): key=_property, data=flatten_multi_index_series_into_dict_for_logging(data) ) - #Herbal Medication Use - herbal_summary = ( - df.loc[df.is_alive] - .groupby(['li_urban', 'sex', 'li_herbal_medication']) - .size() - .rename('count') - ) - logger.info( - key='li_herbal_medication', - data=flatten_multi_index_series_into_dict_for_logging(herbal_summary) - ) + # ---------------------- log properties associated with WASH under_5 = df.is_alive & (df.age_years < 5) between_5_and_15 = df.is_alive & (df.age_years.between(5, 15)) From fae38c44383255f25bcfb992489c761253d53b1f Mon Sep 17 00:00:00 2001 From: thewati Date: Wed, 28 Jan 2026 10:54:36 +0200 Subject: [PATCH 08/10] fix errors --- .../enhanced_lifestyle_analyses.py | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/src/scripts/enhanced_lifestyle_analyses/enhanced_lifestyle_analyses.py b/src/scripts/enhanced_lifestyle_analyses/enhanced_lifestyle_analyses.py index 8e63ae73f4..dcbd529357 100644 --- a/src/scripts/enhanced_lifestyle_analyses/enhanced_lifestyle_analyses.py +++ b/src/scripts/enhanced_lifestyle_analyses/enhanced_lifestyle_analyses.py @@ -8,13 +8,13 @@ from typing import Dict, NamedTuple, Optional import matplotlib.pyplot as plt +import numpy as np import pandas as pd from matplotlib import ticker from tlo import Date, Simulation, logging from tlo.analysis.utils import parse_log_file, unflatten_flattened_multi_index_in_logging from tlo.methods import demography, enhanced_lifestyle, simplified_births -import numpy as np def add_footnote(fig: plt.Figure, footnote: str): @@ -302,15 +302,9 @@ def plot_non_categorical_properties_by_gender(self, li_property: str): first_level_values = df_property.columns.get_level_values(0).unique() # Determine the structure - has_gender_first = all(v in ['F', 'M'] for v in first_level_values) has_urban_rural_first = all( str(v).lower() in ['true', 'false'] or v in [True, False] for v in first_level_values) - print(f"DEBUG for {li_property}:") - print(f" First level values: {list(first_level_values)}") - print(f" has_gender_first: {has_gender_first}") - print(f" has_urban_rural_first: {has_urban_rural_first}") - # Special handling for li_in_ed which has different structure if li_property == 'li_in_ed': # li_in_ed has structure [gender][li_wealth][li_in_ed][age_years] @@ -333,7 +327,7 @@ def plot_non_categorical_properties_by_gender(self, li_property: str): data_series = gender_cols[col_name] total_population += data_series - if in_ed_status == 'True' or in_ed_status == True: + if in_ed_status in ('True', True): total_in_education += data_series proportion = total_in_education / total_population.replace(0, np.nan) @@ -792,7 +786,7 @@ def plot_non_categorical_properties_by_age_group(self, li_property): # Sort by index if it's numeric-like try: plot_data = plot_data.sort_index(key=lambda x: pd.to_numeric(x, errors='ignore')) - except: + except (ValueError, TypeError, AttributeError): pass else: # Convert to Series if it's not @@ -879,8 +873,8 @@ def run(): # Basic arguments required for the simulation start_date = Date(2010, 1, 1) - end_date = Date(2015, 1, 1) - pop_size = 5000 + end_date = Date(2050, 1, 1) + pop_size = 20000 # Path to the resource files used by the disease and intervention methods resourcefilepath = './resources' From d79df416ebbc6c0e2464154be6d4fbe0eff70864 Mon Sep 17 00:00:00 2001 From: Precious29-web Date: Mon, 2 Feb 2026 14:10:45 +0200 Subject: [PATCH 09/10] removing start and stop parameters --- .../parameter_values.csv | 4 +- src/tlo/methods/enhanced_lifestyle.py | 61 +------------------ 2 files changed, 3 insertions(+), 62 deletions(-) diff --git a/resources/ResourceFile_Lifestyle_Enhanced/parameter_values.csv b/resources/ResourceFile_Lifestyle_Enhanced/parameter_values.csv index 8deb89e75e..bcffd394c0 100644 --- a/resources/ResourceFile_Lifestyle_Enhanced/parameter_values.csv +++ b/resources/ResourceFile_Lifestyle_Enhanced/parameter_values.csv @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b0a664c45d45c524c70835fb38a65b41cc95265d178e2ac90de5dd1a52172fe1 -size 4335 +oid sha256:88e7b3c64af46d5cf5554091a87cb0bc7905cfa2fc0c94fd09055ae13aec738c +size 4173 diff --git a/src/tlo/methods/enhanced_lifestyle.py b/src/tlo/methods/enhanced_lifestyle.py index 867bb41643..3c18877764 100644 --- a/src/tlo/methods/enhanced_lifestyle.py +++ b/src/tlo/methods/enhanced_lifestyle.py @@ -287,22 +287,6 @@ def __init__(self, name=None): "init_p_herbal_medication_use_in_urban": Parameter( Types.REAL, "proportion of people in urban areas that use herbal medication" ), - 'r_start_herbal_medication_rural': Parameter( - Types.REAL, - 'probability per 3 months of starting herbal medication in rural areas' - ), - 'r_start_herbal_medication_urban': Parameter( - Types.REAL, - 'probability per 3 months of starting herbal medication in urban areas' - ), - 'r_stop_herbal_medication_rural': Parameter( - Types.REAL, - 'probability per 3 months of stopping herbal medication in rural areas' - ), - 'r_stop_herbal_medication_urban': Parameter( - Types.REAL, - 'probability per 3 months of stopping herbal medication in urban areas' - ), } # Properties of individuals that this module provides. @@ -660,7 +644,7 @@ def __init__(self, module): }, 'li_herbal_medication': { 'init': self.herbal_medication_linear_model(), - 'update': self.update_herbal_medication_property_linear_model(), + 'update': None }, } @@ -1935,49 +1919,6 @@ def handle_bmi_transitions(self, df, rng=None, **externals) -> pd.Series: bmi_lm = LinearModel.custom(handle_bmi_transitions, parameters=self.params) return bmi_lm - def update_herbal_medication_property_linear_model(self) -> LinearModel: - """ - Update herbal medication use over time. - Transitions depend only on rural/urban residence. - """ - - def handle_herbal_medication_transitions(self, df, rng=None, **externals) -> pd.Series: - p = self.parameters - # Copy current state - herbal_trans = df.li_herbal_medication.copy() - # ------------------------- - # START herbal medication - # ------------------------- - not_using = df.index[df.is_alive & ~df.li_herbal_medication] - eff_p_start = pd.Series( - np.where( - df.loc[not_using, 'li_urban'], - p['r_start_herbal_medication_urban'], - p['r_start_herbal_medication_rural'], - ), - index=not_using, - dtype=float - ) - will_start = rng.random_sample(len(not_using)) < eff_p_start - herbal_trans.loc[not_using[will_start]] = True - # ------------------------- - # STOP herbal medication - # ------------------------- - using = df.index[df.is_alive & df.li_herbal_medication] - eff_p_stop = pd.Series( - np.where( - df.loc[using, 'li_urban'], - p['r_stop_herbal_medication_urban'], - p['r_stop_herbal_medication_rural'], - ), - index=using, - dtype=float - ) - will_stop = rng.random_sample(len(using)) < eff_p_stop - herbal_trans.loc[using[will_stop]] = False - return herbal_trans - - return LinearModel.custom(handle_herbal_medication_transitions, parameters=self.params) class LifestyleEvent(RegularEvent, PopulationScopeEventMixin): From 7c3569a95b37e8fdad056972c1f18b79c8b3b319 Mon Sep 17 00:00:00 2001 From: Precious29-web Date: Mon, 9 Feb 2026 16:20:12 +0200 Subject: [PATCH 10/10] herbal medication plots revised --- .../enhanced_lifestyle_analyses.py | 148 ++++++++++++++++++ 1 file changed, 148 insertions(+) diff --git a/src/scripts/enhanced_lifestyle_analyses/enhanced_lifestyle_analyses.py b/src/scripts/enhanced_lifestyle_analyses/enhanced_lifestyle_analyses.py index dcbd529357..cc77c12642 100644 --- a/src/scripts/enhanced_lifestyle_analyses/enhanced_lifestyle_analyses.py +++ b/src/scripts/enhanced_lifestyle_analyses/enhanced_lifestyle_analyses.py @@ -810,6 +810,151 @@ def plot_non_categorical_properties_by_age_group(self, li_property): plt.savefig(self.outputpath / (li_property + self.datestamp + '.png'), format='png') plt.close(fig=fig) # close figure after saving it to avoid opening multiple figures + def plot_herbal_medication_by_urban_rural_over_time(self): + """Plot herbal medication use prevalence by urban/rural over time using bar charts with month labels""" + + if 'li_herbal_medication' not in self.dfs: + print("ERROR: li_herbal_medication data not found in logs") + return + + df = self.dfs['li_herbal_medication'] + + # Initialize series for storing results + urban_prevalence = pd.Series(index=df.index, dtype=float) + rural_prevalence = pd.Series(index=df.index, dtype=float) + + # Process each time point + for date_idx in df.index: + date_data = df.loc[date_idx] + + # Initialize counters for this date + urban_true = urban_false = rural_true = rural_false = 0 + + # Process each column + for col, value in date_data.items(): + li_urban_val, sex, herbal_val, age_range = col + + # Determine if urban/rural + try: + if isinstance(li_urban_val, bool): + is_urban = li_urban_val + elif isinstance(li_urban_val, str): + is_urban = (li_urban_val.lower() == 'true') + else: + is_urban = bool(li_urban_val) + except (ValueError, TypeError): + # Skip this column if we can't determine urban status + continue + + # Determine if uses herbal medication + try: + if isinstance(herbal_val, bool): + uses_herbal = herbal_val + elif isinstance(herbal_val, str): + uses_herbal = (herbal_val.lower() == 'true') + else: + uses_herbal = bool(herbal_val) + except (ValueError, TypeError): + # Skip this column if we can't determine herbal status + continue + + # Add to appropriate counter + if is_urban: + if uses_herbal: + urban_true += value + else: + urban_false += value + else: + if uses_herbal: + rural_true += value + else: + rural_false += value + + # Calculate prevalences for this date + urban_total = urban_true + urban_false + rural_total = rural_true + rural_false + + urban_prevalence[date_idx] = urban_true / urban_total if urban_total > 0 else 0 + rural_prevalence[date_idx] = rural_true / rural_total if rural_total > 0 else 0 + + fig, ax = plt.subplots(figsize=(14, 7)) + + # Set up bar positions + dates = df.index + x = np.arange(len(dates)) + width = 0.35 + + # Create bars + bars1 = ax.bar(x - width / 2, urban_prevalence.values, width, label='Urban', color='blue', alpha=0.7) + bars2 = ax.bar(x + width / 2, rural_prevalence.values, width, label='Rural', color='green', alpha=0.7) + + # Format the plot + ax.set_xlabel('Time (Year-Month)', fontsize=12) + ax.set_ylabel('Prevalence of Herbal Medication Use', fontsize=12) + ax.set_title('Herbal Medication Use by Residence Over Time', fontsize=14, fontweight='bold') + ax.set_xticks(x) + + # Create date labels with year and month + date_labels = [] + for date in dates: + date_labels.append(date.strftime('%Y-%m')) + + # Set x-axis labels + ax.set_xticklabels(date_labels) + + # Rotate labels + plt.setp(ax.get_xticklabels(), rotation=45, ha='right') + + # If there are too many labels, show only some of them + if len(dates) > 24: # If more than 2 years of quarterly data + # Show every Nth label + n = max(1, len(dates) // 12) # Show about 12 labels total + for i, label in enumerate(ax.xaxis.get_ticklabels()): + if i % n != 0: + label.set_visible(False) + + ax.legend(fontsize=11) + ax.grid(True, alpha=0.3, axis='y') + + # Set y-axis limit to give some headroom + max_prevalence = max(urban_prevalence.max(), rural_prevalence.max()) + ax.set_ylim([0, min(1.0, max_prevalence * 1.15)]) + + # Add value labels on top of bars (only for selected bars to avoid clutter) + def autolabel_selected(bars, every_n=4): + """Add labels to every Nth bar to avoid clutter""" + for i, bar in enumerate(bars): + if i % every_n == 0: # Label every Nth bar + height = bar.get_height() + if height > 0: # Only label non-zero values + ax.annotate(f'{height:.3f}', + xy=(bar.get_x() + bar.get_width() / 2, height), + xytext=(0, 3), + textcoords="offset points", + ha='center', va='bottom', fontsize=8, + rotation=45) + + # Determine labeling frequency based on number of bars + label_freq = max(1, len(dates) // 8) # Show about 8 labels + autolabel_selected(bars1, every_n=label_freq) + autolabel_selected(bars2, every_n=label_freq) + + # Add a horizontal line at y=0 for reference + ax.axhline(y=0, color='black', linewidth=0.5, alpha=0.3) + + # Footnote + footnote = (f"Data collected quarterly. Total time points: {len(dates)}. Denominator: Total individuals per " + f"urban/rural category.") + ax.figure.text(0.5, 0.01, footnote, ha='center', fontsize=10, + bbox={"facecolor": "gray", "alpha": 0.3, "pad": 5}) + + plt.tight_layout() + + # Save the plot + output_path = self.outputpath / (f'herbal_medication_by_urban_rural_over_time{self.datestamp}.png') + plt.savefig(output_path, format='png', dpi=300, bbox_inches='tight') + plt.close(fig=fig) # close figure after saving it to avoid opening multiple figures + def display_all_categorical_and_non_categorical_plots_by_age_group(self): """ a function that will display plots of all enhanced lifestyle properties grouped by age group """ for _property in self.en_props.keys(): @@ -915,3 +1060,6 @@ def run(): # plot by age groups g_plots.display_all_categorical_and_non_categorical_plots_by_age_group() + +# plt herbal medication by rural/urban over time +g_plots.plot_herbal_medication_by_urban_rural_over_time()