diff --git a/resources/ResourceFile_Lifestyle_Enhanced/parameter_values.csv b/resources/ResourceFile_Lifestyle_Enhanced/parameter_values.csv index ec6b23fa88..bcffd394c0 100644 --- a/resources/ResourceFile_Lifestyle_Enhanced/parameter_values.csv +++ b/resources/ResourceFile_Lifestyle_Enhanced/parameter_values.csv @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d1b551597e808189e2eb3729d74d050df020f79a30446fccf2a417aacc280567 -size 3973 +oid sha256:88e7b3c64af46d5cf5554091a87cb0bc7905cfa2fc0c94fd09055ae13aec738c +size 4173 diff --git a/src/scripts/enhanced_lifestyle_analyses/enhanced_lifestyle_analyses.py b/src/scripts/enhanced_lifestyle_analyses/enhanced_lifestyle_analyses.py index b8f2de5a55..cc77c12642 100644 --- a/src/scripts/enhanced_lifestyle_analyses/enhanced_lifestyle_analyses.py +++ b/src/scripts/enhanced_lifestyle_analyses/enhanced_lifestyle_analyses.py @@ -8,6 +8,7 @@ from typing import Dict, NamedTuple, Optional import matplotlib.pyplot as plt +import numpy as np import pandas as pd from matplotlib import ticker @@ -65,6 +66,8 @@ def __init__(self, logs=None, path: Optional[str] = None): "per urban or rural"), "li_is_circ": PlotDescriptor("Male circumcision", "Sum of all males"), "li_is_sexworker": PlotDescriptor("sex workers", "Sum of all females aged between 15-49"), + "li_herbal_medication": PlotDescriptor("herbal medication use", "Sum of all individuals per urban or rural" + ), } # A dictionary to map properties and their description. Useful when setting plot legend @@ -84,7 +87,7 @@ def __init__(self, logs=None, path: Optional[str] = None): # define all properties that are categorised by rural or urban in addition to age and sex self.cat_by_rural_urban_props = ['li_wealth', 'li_bmi', 'li_low_ex', 'li_ex_alc', 'li_wood_burn_stove', 'li_unimproved_sanitation', - 'li_no_clean_drinking_water'] + 'li_no_clean_drinking_water', 'li_herbal_medication'] # date-stamp to label log files and any other outputs self.datestamp: str = datetime.date.today().strftime("__%Y_%m_%d") @@ -216,6 +219,66 @@ def plot_categorical_properties_by_gender(self, li_property: str): plt.savefig(self.outputpath / (li_property + self.datestamp + '.png'), format='png') plt.close(fig=fig) # close figure after saving it to avoid opening multiple figures + # def plot_non_categorical_properties_by_gender(self, li_property: str): + # """ a function to plot non-categorical properties of lifestyle module grouped by gender + # + # :param li_property: any other non-categorical property defined in lifestyle module """ + # + # # set y-axis limit. + # y_lim: float = 0.8 + # if li_property in ['li_no_access_handwashing', 'li_high_salt', 'li_wood_burn_stove', 'li_in_ed']: + # y_lim = 1.0 + # + # if li_property in ['li_tob', 'li_ex_alc']: + # y_lim = 0.3 + # + # # plot for male circumcision and female sex workers + # if li_property in ['li_is_circ', 'li_is_sexworker']: + # self.male_circumcision_and_sex_workers_plot(li_property) + # + # else: + # col: int = 0 # counter for indexing purposes + # # create subplots + # fig, axes = plt.subplots(nrows=2 if li_property in self.cat_by_rural_urban_props + # or li_property == 'li_ed_lev' else 1, ncols=2, figsize=(10, 5)) + # for gender, desc in self.gender_des.items(): + # + # df_dict = dict() + # if li_property in self.cat_by_rural_urban_props: + # _row: int = 0 # row counter + # _rows_counter: int = 0 # a counter for plotting. setting rows + # for _key, _value in self._rural_urban_state.items(): + # df_dict[f'{gender}_{_value}_{_row}'] = self.dfs[li_property][_key][gender]["True"].sum( + # axis=1) / self.dfs[li_property][_key][gender].sum(axis=1) + # _row += 1 + # + # else: + # df = self.dfs[li_property].reorder_levels([0, 2, 1, 3], axis=1) if li_property == 'li_in_ed' \ + # else self.dfs[li_property] + # df_dict[gender] = df[gender]["True"].sum(axis=1) / df[gender].sum(axis=1) + # for _key in df_dict.keys(): + # # do plotting + # ax = df_dict[_key].plot(kind='bar', stacked=True, + # ax=axes[int(_key.split("_")[-1]), col] if + # li_property in self.cat_by_rural_urban_props else axes[col], + # ylim=(0, y_lim), + # legend=None, + # color='darkturquoise', + # title=f"{_key.split('_')[1]} {desc} {self.en_props[li_property].label}" + # if li_property in self.cat_by_rural_urban_props + # else f"{desc} {self.en_props[li_property].label}", + # ylabel=f"{self.en_props[li_property].label} proportions", xlabel="Year" + # ) + # self.custom_axis_formatter(df_dict[_key], ax) + # # increase counter + # col += 1 + # fig.legend([self.en_props[li_property].label], loc='lower left', bbox_to_anchor=(0.75, 0.8)) + # # save and display plots for property categories by gender + # add_footnote(fig, f'{self.en_props[li_property].per_gender_footnote}') + # fig.tight_layout() + # plt.savefig(self.outputpath / (li_property + self.datestamp + '.png'), format='png') + # plt.close(fig=fig) # close figure after saving it to avoid opening multiple figures + def plot_non_categorical_properties_by_gender(self, li_property: str): """ a function to plot non-categorical properties of lifestyle module grouped by gender @@ -232,49 +295,197 @@ def plot_non_categorical_properties_by_gender(self, li_property: str): # plot for male circumcision and female sex workers if li_property in ['li_is_circ', 'li_is_sexworker']: self.male_circumcision_and_sex_workers_plot(li_property) + return # Exit early for these properties - else: - col: int = 0 # counter for indexing purposes - # create subplots - fig, axes = plt.subplots(nrows=2 if li_property in self.cat_by_rural_urban_props - or li_property == 'li_ed_lev' else 1, ncols=2, figsize=(10, 5)) - for gender, desc in self.gender_des.items(): + # Check the actual structure of the DataFrame + df_property = self.dfs[li_property] + first_level_values = df_property.columns.get_level_values(0).unique() - df_dict = dict() - if li_property in self.cat_by_rural_urban_props: - _row: int = 0 # row counter - _rows_counter: int = 0 # a counter for plotting. setting rows - for _key, _value in self._rural_urban_state.items(): - df_dict[f'{gender}_{_value}_{_row}'] = self.dfs[li_property][_key][gender]["True"].sum( - axis=1) / self.dfs[li_property][_key][gender].sum(axis=1) - _row += 1 + # Determine the structure + has_urban_rural_first = all( + str(v).lower() in ['true', 'false'] or v in [True, False] for v in first_level_values) - else: - df = self.dfs[li_property].reorder_levels([0, 2, 1, 3], axis=1) if li_property == 'li_in_ed' \ - else self.dfs[li_property] - df_dict[gender] = df[gender]["True"].sum(axis=1) / df[gender].sum(axis=1) - for _key in df_dict.keys(): - # do plotting - ax = df_dict[_key].plot(kind='bar', stacked=True, - ax=axes[int(_key.split("_")[-1]), col] if - li_property in self.cat_by_rural_urban_props else axes[col], - ylim=(0, y_lim), - legend=None, - color='darkturquoise', - title=f"{_key.split('_')[1]} {desc} {self.en_props[li_property].label}" - if li_property in self.cat_by_rural_urban_props - else f"{desc} {self.en_props[li_property].label}", - ylabel=f"{self.en_props[li_property].label} proportions", xlabel="Year" - ) - self.custom_axis_formatter(df_dict[_key], ax) - # increase counter + # Special handling for li_in_ed which has different structure + if li_property == 'li_in_ed': + # li_in_ed has structure [gender][li_wealth][li_in_ed][age_years] + # We need to aggregate across wealth levels and ages + col: int = 0 + fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(10, 5)) + axes = np.array([[axes[0], axes[1]]]) + + for gender, desc in self.gender_des.items(): + # Get all columns for this gender + gender_cols = df_property[gender] + # Sum across all wealth levels, ages, and True/False + total_in_education = pd.Series(0, index=df_property.index) + total_population = pd.Series(0, index=df_property.index) + + # Iterate through all columns for this gender + for col_name in gender_cols.columns: + # col_name is a tuple like ('1', 'True', '10') + wealth_level, in_ed_status, age = col_name + data_series = gender_cols[col_name] + + total_population += data_series + if in_ed_status in ('True', True): + total_in_education += data_series + + proportion = total_in_education / total_population.replace(0, np.nan) + proportion = proportion.fillna(0) + + ax = proportion.plot(kind='bar', stacked=True, + ax=axes[0, col], + ylim=(0, y_lim), + legend=None, + color='darkturquoise', + title=f"{desc} {self.en_props[li_property].label}", + ylabel=f"{self.en_props[li_property].label} proportions", + xlabel="Year" + ) + self.custom_axis_formatter(proportion, ax) col += 1 + fig.legend([self.en_props[li_property].label], loc='lower left', bbox_to_anchor=(0.75, 0.8)) - # save and display plots for property categories by gender add_footnote(fig, f'{self.en_props[li_property].per_gender_footnote}') fig.tight_layout() plt.savefig(self.outputpath / (li_property + self.datestamp + '.png'), format='png') - plt.close(fig=fig) # close figure after saving it to avoid opening multiple figures + plt.close(fig=fig) + return + + # For other properties, determine plotting structure + if has_urban_rural_first: + # Structure: [urban/rural][gender][property][age_range] + fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(10, 5)) + col = 0 + + for gender, desc in self.gender_des.items(): + df_dict = {} + row = 0 + + for urban_key, urban_desc in self._rural_urban_state.items(): + # Convert key to match what's in the DataFrame + if urban_key == 'True' and True in first_level_values: + df_key = True + elif urban_key == 'False' and False in first_level_values: + df_key = False + elif urban_key in first_level_values: + df_key = urban_key + elif str(urban_key) in [str(v) for v in first_level_values]: + for v in first_level_values: + if str(v) == str(urban_key): + df_key = v + break + else: + print(f"WARNING: Urban key {urban_key} not found in {li_property}") + df_dict[f'{gender}_{urban_desc}_{row}'] = pd.Series( + [0] * len(df_property.index), index=df_property.index + ) + row += 1 + continue + + # Get data for this urban/rural, gender + try: + gender_data = df_property[df_key][gender] + + # Find the True column + true_col = None + if "True" in gender_data: + true_col = gender_data["True"] + elif True in gender_data: + true_col = gender_data[True] + elif 'True' in gender_data: + true_col = gender_data['True'] + + if true_col is not None: + total = gender_data.sum(axis=1) + proportion = true_col.sum(axis=1) / total.replace(0, np.nan) + proportion = proportion.fillna(0) + df_dict[f'{gender}_{urban_desc}_{row}'] = proportion + else: + print(f"WARNING: No True column found for {li_property}[{df_key}][{gender}]") + df_dict[f'{gender}_{urban_desc}_{row}'] = pd.Series( + [0] * len(df_property.index), index=df_property.index + ) + except Exception as e: + print(f"ERROR processing {li_property}[{df_key}][{gender}]: {e}") + df_dict[f'{gender}_{urban_desc}_{row}'] = pd.Series( + [0] * len(df_property.index), index=df_property.index + ) + + row += 1 + + # Plot for this gender + for plot_key, plot_data in df_dict.items(): + plot_row = int(plot_key.split('_')[-1]) + ax = plot_data.plot(kind='bar', stacked=True, + ax=axes[plot_row, col], + ylim=(0, y_lim), + legend=None, + color='darkturquoise', + title=f"{plot_key.split('_')[1]} {desc} {self.en_props[li_property].label}", + ylabel=f"{self.en_props[li_property].label} proportions", + xlabel="Year" + ) + self.custom_axis_formatter(plot_data, ax) + + col += 1 + + else: + # Structure: [gender][property][age_range] or other + fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(10, 5)) + axes = np.array([[axes[0], axes[1]]]) + col = 0 + + for gender, desc in self.gender_des.items(): + try: + if gender in df_property: + gender_data = df_property[gender] + + # Find the True column + true_col = None + if "True" in gender_data: + true_col = gender_data["True"] + elif True in gender_data: + true_col = gender_data[True] + elif 'True' in gender_data: + true_col = gender_data['True'] + + if true_col is not None: + total = gender_data.sum(axis=1) + proportion = true_col.sum(axis=1) / total.replace(0, np.nan) + proportion = proportion.fillna(0) + else: + # If no True column, check if property values are directly accessible + print(f"DEBUG: No True column for {li_property}[{gender}]. Checking structure...") + print(f" Columns: {list(gender_data.columns)[:5]}...") + # For properties like li_urban where the property itself is the value + # We might need to handle this differently + proportion = pd.Series([0] * len(df_property.index), index=df_property.index) + else: + print(f"WARNING: Gender {gender} not found in {li_property}") + proportion = pd.Series([0] * len(df_property.index), index=df_property.index) + + except Exception as e: + print(f"ERROR processing {li_property} for {gender}: {e}") + proportion = pd.Series([0] * len(df_property.index), index=df_property.index) + + ax = proportion.plot(kind='bar', stacked=True, + ax=axes[0, col], + ylim=(0, y_lim), + legend=None, + color='darkturquoise', + title=f"{desc} {self.en_props[li_property].label}", + ylabel=f"{self.en_props[li_property].label} proportions", + xlabel="Year" + ) + self.custom_axis_formatter(proportion, ax) + col += 1 + + fig.legend([self.en_props[li_property].label], loc='lower left', bbox_to_anchor=(0.75, 0.8)) + add_footnote(fig, f'{self.en_props[li_property].per_gender_footnote}') + fig.tight_layout() + plt.savefig(self.outputpath / (li_property + self.datestamp + '.png'), format='png') + plt.close(fig=fig) def display_all_categorical_and_non_categorical_plots_by_gender(self): """ a function to display plots for both categorical and non-categorical properties grouped by gender """ @@ -381,6 +592,69 @@ def plot_categorical_properties_by_age_group(self, li_property: str): plt.savefig(self.outputpath / (li_property + self.datestamp + '.png'), format='png') plt.close(fig=fig) # close figure after saving it to avoid opening multiple figures + # def plot_non_categorical_properties_by_age_group(self, li_property): + # """ plot all non-categorical properties by age group """ + # # select logs from the latest year. In this case we are selecting year 2021 + # y_lim: float = 1.0 + # if li_property in ['li_is_sexworker']: + # y_lim = 0.040 + # + # all_logs_df = self.dfs[li_property] + # mask = (all_logs_df.index > pd.to_datetime('2021-01-01')) & (all_logs_df.index <= pd.to_datetime('2022-01-01')) + # self.dfs[li_property] = self.dfs[li_property].loc[mask] + # + # # create subplots + # fig, axes = plt.subplots(nrows=2 if li_property in self.cat_by_rural_urban_props or li_property == + # 'li_in_ed' else 1, + # figsize=(10, 5), sharex=True) + # + # df_dict = dict() + # if li_property == 'li_in_ed' or li_property in self.cat_by_rural_urban_props: + # _col: int = 0 # column counter + # key_value_desc = self.wealth_desc.items() if li_property == 'li_in_ed' else \ + # self._rural_urban_state.items() + # for _key, _value in key_value_desc: + # temp_df = pd.DataFrame() + # for _bool_value in ['True', 'False']: + # if li_property == 'li_in_ed': + # temp_df[_bool_value] = self.dfs[li_property]['M'][_key][_bool_value].sum(axis=0) + \ + # self.dfs[li_property]['F'][_key][_bool_value].sum(axis=0) + # + # else: + # temp_df[_bool_value] = self.dfs[li_property][_key]['M'][_bool_value].sum(axis=0) + \ + # self.dfs[li_property][_key]['F'][_bool_value].sum(axis=0) + # + # df_dict[f'{_value}_{_col}'] = temp_df['True'] / temp_df.sum(axis=1) + # _col += 1 + # + # else: + # plot_df = pd.DataFrame() + # for _bool_value in ['True', 'False']: + # plot_df[_bool_value] = self.dfs[li_property]['M'][_bool_value].sum(axis=0) + \ + # self.dfs[li_property]['F'][_bool_value].sum(axis=0) + # + # df_dict['non_urban_1'] = plot_df['True'] / plot_df.sum(axis=1) + # + # for _key in df_dict.keys(): + # # do plotting + # df_dict[_key].plot(kind='bar', stacked=True, + # ax=axes[int(_key.split("_")[-1])] if + # li_property in self.cat_by_rural_urban_props or li_property == 'li_in_ed' else axes, + # ylim=(0, y_lim), + # legend=None, + # color='darkturquoise', + # title=f"{self.en_props[li_property].label} by age group in 2021, {_key.split('_')[0]}" + # if li_property in self.cat_by_rural_urban_props or li_property == 'li_in_ed' else + # f"{self.en_props[li_property].label} by age group in 2021", + # ylabel=f"{self.en_props[li_property].label} proportions", xlabel="Year" + # ) + # + # fig.legend([self.en_props[li_property].label], loc='lower left', bbox_to_anchor=(0.8, 0.7)) + # add_footnote(fig, f'{self.en_props[li_property].per_age_group_footnote}') + # fig.tight_layout() + # plt.savefig(self.outputpath / (li_property + self.datestamp + '.png'), format='png') + # plt.close(fig=fig) # close figure after saving it to avoid opening multiple figures + def plot_non_categorical_properties_by_age_group(self, li_property): """ plot all non-categorical properties by age group """ # select logs from the latest year. In this case we are selecting year 2021 @@ -392,51 +666,143 @@ def plot_non_categorical_properties_by_age_group(self, li_property): mask = (all_logs_df.index > pd.to_datetime('2021-01-01')) & (all_logs_df.index <= pd.to_datetime('2022-01-01')) self.dfs[li_property] = self.dfs[li_property].loc[mask] - # create subplots - fig, axes = plt.subplots(nrows=2 if li_property in self.cat_by_rural_urban_props or li_property == - 'li_in_ed' else 1, - figsize=(10, 5), sharex=True) + # Check the actual structure + df_property = self.dfs[li_property] + first_level_values = df_property.columns.get_level_values(0).unique() - df_dict = dict() - if li_property == 'li_in_ed' or li_property in self.cat_by_rural_urban_props: - _col: int = 0 # column counter - key_value_desc = self.wealth_desc.items() if li_property == 'li_in_ed' else \ - self._rural_urban_state.items() - for _key, _value in key_value_desc: + # Determine if it has urban/rural structure + has_urban_rural_structure = False + for val in first_level_values: + if str(val).lower() in ['true', 'false'] or val in [True, False]: + has_urban_rural_structure = True + break + + # Initialize df_dict + df_dict = {} + + # Special handling for li_in_ed + if li_property == 'li_in_ed': + # li_in_ed has special structure [gender][li_wealth][li_in_ed][age_years] + fig, axes = plt.subplots(nrows=2, figsize=(10, 5), sharex=True) + + _col = 0 + for wealth_key, wealth_desc in self.wealth_desc.items(): temp_df = pd.DataFrame() for _bool_value in ['True', 'False']: - if li_property == 'li_in_ed': - temp_df[_bool_value] = self.dfs[li_property]['M'][_key][_bool_value].sum(axis=0) + \ - self.dfs[li_property]['F'][_key][_bool_value].sum(axis=0) + try: + m_data = df_property['M'][wealth_key][_bool_value].sum(axis=0) + f_data = df_property['F'][wealth_key][_bool_value].sum(axis=0) + temp_df[_bool_value] = m_data + f_data + except KeyError: + # Try with boolean True/False + bool_val = True if _bool_value == 'True' else False + m_data = df_property['M'][wealth_key][bool_val].sum(axis=0) + f_data = df_property['F'][wealth_key][bool_val].sum(axis=0) + temp_df[_bool_value] = m_data + f_data + + df_dict[f'{wealth_desc}_{_col}'] = temp_df['True'] / temp_df.sum(axis=1).replace(0, np.nan) + _col += 1 - else: - temp_df[_bool_value] = self.dfs[li_property][_key]['M'][_bool_value].sum(axis=0) + \ - self.dfs[li_property][_key]['F'][_bool_value].sum(axis=0) + elif has_urban_rural_structure and li_property in self.cat_by_rural_urban_props: + # Has urban/rural structure + fig, axes = plt.subplots(nrows=2, figsize=(10, 5), sharex=True) + + _col = 0 + for urban_key, urban_desc in self._rural_urban_state.items(): + # Find the matching key in the DataFrame + df_key = None + for val in first_level_values: + if str(val).lower() == str(urban_key).lower() or \ + (urban_key == 'True' and val is True) or \ + (urban_key == 'False' and val is False): + df_key = val + break + + if df_key is None: + print(f"WARNING: Could not find urban key {urban_key} for {li_property}") + # Create empty series with correct index (age groups) + age_groups = sorted(df_property.columns.get_level_values(-1).unique()) + df_dict[f'{urban_desc}_{_col}'] = pd.Series([0] * len(age_groups), index=age_groups) + _col += 1 + continue - df_dict[f'{_value}_{_col}'] = temp_df['True'] / temp_df.sum(axis=1) + temp_df = pd.DataFrame() + for _bool_value in ['True', 'False']: + try: + m_data = df_property[df_key]['M'][_bool_value].sum(axis=0) + f_data = df_property[df_key]['F'][_bool_value].sum(axis=0) + temp_df[_bool_value] = m_data + f_data + except KeyError: + # Try with boolean True/False + bool_val = True if _bool_value == 'True' else False + m_data = df_property[df_key]['M'][bool_val].sum(axis=0) + f_data = df_property[df_key]['F'][bool_val].sum(axis=0) + temp_df[_bool_value] = m_data + f_data + + proportion = temp_df['True'] / temp_df.sum(axis=1).replace(0, np.nan) + df_dict[f'{urban_desc}_{_col}'] = proportion.fillna(0) _col += 1 else: + # No urban/rural structure or not in cat_by_rural_urban_props + fig, axes = plt.subplots(nrows=1, figsize=(10, 5), sharex=True) + plot_df = pd.DataFrame() for _bool_value in ['True', 'False']: - plot_df[_bool_value] = self.dfs[li_property]['M'][_bool_value].sum(axis=0) + \ - self.dfs[li_property]['F'][_bool_value].sum(axis=0) - - df_dict['non_urban_1'] = plot_df['True'] / plot_df.sum(axis=1) - - for _key in df_dict.keys(): - # do plotting - df_dict[_key].plot(kind='bar', stacked=True, - ax=axes[int(_key.split("_")[-1])] if - li_property in self.cat_by_rural_urban_props or li_property == 'li_in_ed' else axes, - ylim=(0, y_lim), - legend=None, - color='darkturquoise', - title=f"{self.en_props[li_property].label} by age group in 2021, {_key.split('_')[0]}" - if li_property in self.cat_by_rural_urban_props or li_property == 'li_in_ed' else - f"{self.en_props[li_property].label} by age group in 2021", - ylabel=f"{self.en_props[li_property].label} proportions", xlabel="Year" - ) + try: + m_data = df_property['M'][_bool_value].sum(axis=0) + f_data = df_property['F'][_bool_value].sum(axis=0) + plot_df[_bool_value] = m_data + f_data + except KeyError: + # Try with boolean True/False + bool_val = True if _bool_value == 'True' else False + m_data = df_property['M'][bool_val].sum(axis=0) + f_data = df_property['F'][bool_val].sum(axis=0) + plot_df[_bool_value] = m_data + f_data + + proportion = plot_df['True'] / plot_df.sum(axis=1).replace(0, np.nan) + df_dict['all'] = proportion.fillna(0) + + # Plotting - Check if we have data to plot + if not df_dict: + print(f"WARNING: No data to plot for {li_property}") + plt.close(fig=fig) + return + + # Convert axes to array if needed for consistent indexing + if not isinstance(axes, np.ndarray): + axes = np.array([axes]) + + # Plot each item in df_dict + for i, (_key, plot_data) in enumerate(df_dict.items()): + # Determine which axis to use + if len(df_dict) > 1 and len(axes) > 1: + ax = axes[i] + else: + ax = axes[0] if isinstance(axes, np.ndarray) else axes + + # Ensure plot_data is a Series with proper index + if isinstance(plot_data, pd.Series): + # Sort by index if it's numeric-like + try: + plot_data = plot_data.sort_index(key=lambda x: pd.to_numeric(x, errors='ignore')) + except (ValueError, TypeError, AttributeError): + pass + else: + # Convert to Series if it's not + plot_data = pd.Series(plot_data) + + plot_data.plot(kind='bar', + ax=ax, + ylim=(0, y_lim), + legend=None, + color='darkturquoise', + title=f"{self.en_props[li_property].label} by age group in 2021, {_key.split('_')[0]}" + if len(df_dict) > 1 else + f"{self.en_props[li_property].label} by age group in 2021", + ylabel=f"{self.en_props[li_property].label} proportions", + xlabel="Age Group" + ) fig.legend([self.en_props[li_property].label], loc='lower left', bbox_to_anchor=(0.8, 0.7)) add_footnote(fig, f'{self.en_props[li_property].per_age_group_footnote}') @@ -444,6 +810,151 @@ def plot_non_categorical_properties_by_age_group(self, li_property): plt.savefig(self.outputpath / (li_property + self.datestamp + '.png'), format='png') plt.close(fig=fig) # close figure after saving it to avoid opening multiple figures + def plot_herbal_medication_by_urban_rural_over_time(self): + """Plot herbal medication use prevalence by urban/rural over time using bar charts with month labels""" + + if 'li_herbal_medication' not in self.dfs: + print("ERROR: li_herbal_medication data not found in logs") + return + + df = self.dfs['li_herbal_medication'] + + # Initialize series for storing results + urban_prevalence = pd.Series(index=df.index, dtype=float) + rural_prevalence = pd.Series(index=df.index, dtype=float) + + # Process each time point + for date_idx in df.index: + date_data = df.loc[date_idx] + + # Initialize counters for this date + urban_true = urban_false = rural_true = rural_false = 0 + + # Process each column + for col, value in date_data.items(): + li_urban_val, sex, herbal_val, age_range = col + + # Determine if urban/rural + try: + if isinstance(li_urban_val, bool): + is_urban = li_urban_val + elif isinstance(li_urban_val, str): + is_urban = (li_urban_val.lower() == 'true') + else: + is_urban = bool(li_urban_val) + except (ValueError, TypeError): + # Skip this column if we can't determine urban status + continue + + # Determine if uses herbal medication + try: + if isinstance(herbal_val, bool): + uses_herbal = herbal_val + elif isinstance(herbal_val, str): + uses_herbal = (herbal_val.lower() == 'true') + else: + uses_herbal = bool(herbal_val) + except (ValueError, TypeError): + # Skip this column if we can't determine herbal status + continue + + # Add to appropriate counter + if is_urban: + if uses_herbal: + urban_true += value + else: + urban_false += value + else: + if uses_herbal: + rural_true += value + else: + rural_false += value + + # Calculate prevalences for this date + urban_total = urban_true + urban_false + rural_total = rural_true + rural_false + + urban_prevalence[date_idx] = urban_true / urban_total if urban_total > 0 else 0 + rural_prevalence[date_idx] = rural_true / rural_total if rural_total > 0 else 0 + + fig, ax = plt.subplots(figsize=(14, 7)) + + # Set up bar positions + dates = df.index + x = np.arange(len(dates)) + width = 0.35 + + # Create bars + bars1 = ax.bar(x - width / 2, urban_prevalence.values, width, label='Urban', color='blue', alpha=0.7) + bars2 = ax.bar(x + width / 2, rural_prevalence.values, width, label='Rural', color='green', alpha=0.7) + + # Format the plot + ax.set_xlabel('Time (Year-Month)', fontsize=12) + ax.set_ylabel('Prevalence of Herbal Medication Use', fontsize=12) + ax.set_title('Herbal Medication Use by Residence Over Time', fontsize=14, fontweight='bold') + ax.set_xticks(x) + + # Create date labels with year and month + date_labels = [] + for date in dates: + date_labels.append(date.strftime('%Y-%m')) + + # Set x-axis labels + ax.set_xticklabels(date_labels) + + # Rotate labels + plt.setp(ax.get_xticklabels(), rotation=45, ha='right') + + # If there are too many labels, show only some of them + if len(dates) > 24: # If more than 2 years of quarterly data + # Show every Nth label + n = max(1, len(dates) // 12) # Show about 12 labels total + for i, label in enumerate(ax.xaxis.get_ticklabels()): + if i % n != 0: + label.set_visible(False) + + ax.legend(fontsize=11) + ax.grid(True, alpha=0.3, axis='y') + + # Set y-axis limit to give some headroom + max_prevalence = max(urban_prevalence.max(), rural_prevalence.max()) + ax.set_ylim([0, min(1.0, max_prevalence * 1.15)]) + + # Add value labels on top of bars (only for selected bars to avoid clutter) + def autolabel_selected(bars, every_n=4): + """Add labels to every Nth bar to avoid clutter""" + for i, bar in enumerate(bars): + if i % every_n == 0: # Label every Nth bar + height = bar.get_height() + if height > 0: # Only label non-zero values + ax.annotate(f'{height:.3f}', + xy=(bar.get_x() + bar.get_width() / 2, height), + xytext=(0, 3), + textcoords="offset points", + ha='center', va='bottom', fontsize=8, + rotation=45) + + # Determine labeling frequency based on number of bars + label_freq = max(1, len(dates) // 8) # Show about 8 labels + autolabel_selected(bars1, every_n=label_freq) + autolabel_selected(bars2, every_n=label_freq) + + # Add a horizontal line at y=0 for reference + ax.axhline(y=0, color='black', linewidth=0.5, alpha=0.3) + + # Footnote + footnote = (f"Data collected quarterly. Total time points: {len(dates)}. Denominator: Total individuals per " + f"urban/rural category.") + ax.figure.text(0.5, 0.01, footnote, ha='center', fontsize=10, + bbox={"facecolor": "gray", "alpha": 0.3, "pad": 5}) + + plt.tight_layout() + + # Save the plot + output_path = self.outputpath / (f'herbal_medication_by_urban_rural_over_time{self.datestamp}.png') + plt.savefig(output_path, format='png', dpi=300, bbox_inches='tight') + plt.close(fig=fig) # close figure after saving it to avoid opening multiple figures + def display_all_categorical_and_non_categorical_plots_by_age_group(self): """ a function that will display plots of all enhanced lifestyle properties grouped by age group """ for _property in self.en_props.keys(): @@ -549,3 +1060,6 @@ def run(): # plot by age groups g_plots.display_all_categorical_and_non_categorical_plots_by_age_group() + +# plt herbal medication by rural/urban over time +g_plots.plot_herbal_medication_by_urban_rural_over_time() diff --git a/src/tlo/methods/cardio_metabolic_disorders.py b/src/tlo/methods/cardio_metabolic_disorders.py index f2ad9c75f1..29bcbb0861 100644 --- a/src/tlo/methods/cardio_metabolic_disorders.py +++ b/src/tlo/methods/cardio_metabolic_disorders.py @@ -1739,6 +1739,7 @@ def apply(self, person_id, squeeze_factor): class HSI_CardioMetabolicDisorders_Refill_Medication(HSI_Event, IndividualScopeEventMixin): + #This is an HSI for medication refill """ This is a Health System Interaction Event in which a person seeks a refill prescription of medication. The next refill of medication is also scheduled. diff --git a/src/tlo/methods/enhanced_lifestyle.py b/src/tlo/methods/enhanced_lifestyle.py index d9164f04fd..3c18877764 100644 --- a/src/tlo/methods/enhanced_lifestyle.py +++ b/src/tlo/methods/enhanced_lifestyle.py @@ -280,7 +280,13 @@ def __init__(self, name=None): ), "fsw_transition": Parameter( Types.REAL, "proportion of sex workers that stop being a sex worker each year" - ) + ), + "init_p_herbal_medication_use_in_rural": Parameter( + Types.REAL, "proportion of people in rural areas that use herbal medication" + ), + "init_p_herbal_medication_use_in_urban": Parameter( + Types.REAL, "proportion of people in urban areas that use herbal medication" + ), } # Properties of individuals that this module provides. @@ -337,14 +343,16 @@ def __init__(self, name=None): 'li_date_acquire_clean_drinking_water': Property(Types.DATE, 'date acquire clean drinking water'), 'li_date_acquire_non_wood_burn_stove': Property(Types.DATE, 'date acquire non-wood burning stove'), "li_is_sexworker": Property(Types.BOOL, "Is the person a sex worker"), - "li_is_circ": Property(Types.BOOL, "Is the person circumcised if they are male (False for all females)"), + "li_is_circ": Property(Types.BOOL, "Is the person circumcised if they are male (False for all females)" + ), + 'li_herbal_medication': Property(Types.BOOL, 'whether someone uses herbal medication or not'), } def read_parameters(self, resourcefilepath: Optional[Path] = None): p = self.parameters dataframes = read_csv_files(resourcefilepath / 'ResourceFile_Lifestyle_Enhanced', - files=["parameter_values", "urban_rural_by_district"], - ) + files=["parameter_values", "urban_rural_by_district"], + ) self.load_parameters_from_dataframe(dataframes["parameter_values"]) p['init_p_urban'] = ( dataframes["urban_rural_by_district"].drop( @@ -425,6 +433,7 @@ def on_birth(self, mother_id, child_id): df.at[child_id, 'li_is_circ'] = ( self.rng.rand() < self.parameters['proportion_of_men_that_are_assumed_to_be_circumcised_at_birth'] ) + df.at[child_id, 'li_herbal_medication'] = df.at[_id_inherit_from, 'li_herbal_medication'] class EduPropertyInitialiser: @@ -632,7 +641,12 @@ def __init__(self, module): 'li_is_sexworker': { 'init': self.female_sex_workers(), 'update': self.female_sex_workers() - } + }, + 'li_herbal_medication': { + 'init': self.herbal_medication_linear_model(), + 'update': None + }, + } def is_edu_dictionary_empty(self): @@ -832,14 +846,14 @@ def init_marital_status(self, df, rng=None, **externals) -> pd.Series: p = self.parameters li_mar_stat_dtype = df.li_mar_stat.dtype - mar_stat = pd.Series(data=1, index=df.index, dtype=li_mar_stat_dtype ) + mar_stat = pd.Series(data=1, index=df.index, dtype=li_mar_stat_dtype) # select individuals of different age category age_ranges = [(15, 20), (20, 30), (30, 40), (40, 50), (50, 60), (60, np.inf)] for lower_age, upper_age in age_ranges: subpopulation = df.index[ df.age_years.between(lower_age, upper_age, inclusive="left") & df.is_alive - ] + ] parameters_key = ( f"init_dist_mar_stat_age{lower_age}{upper_age}" if upper_age != np.inf else @@ -1174,6 +1188,26 @@ def handle_male_circumcision_prop(self, df, rng=None, **externals) -> pd.Series: male_circ_lm = LinearModel.custom(handle_male_circumcision_prop, parameters=self.params) return male_circ_lm + def herbal_medication_linear_model(self) -> LinearModel: + """Assign herbal medication use based on rural and urban""" + + def predict_herbal_use(self, df, rng=None, **externals) -> pd.Series: + p = self.parameters + # Probability depends ONLY on li_urban + prob = pd.Series( + np.where( + df.li_urban, + p['init_p_herbal_medication_use_in_urban'], + p['init_p_herbal_medication_use_in_rural'], + ), + index=df.index, + dtype=float + ) + rnd = rng.random_sample(len(df)) + return rnd < prob + + return LinearModel.custom(predict_herbal_use, parameters=self.params) + # --------------------- LINEAR MODELS FOR UPDATING POPULATION PROPERTIES ------------------------------ # # todo: make exposed to campaign `_property` reflect index of individuals who have transitioned @@ -1886,6 +1920,7 @@ def handle_bmi_transitions(self, df, rng=None, **externals) -> pd.Series: return bmi_lm + class LifestyleEvent(RegularEvent, PopulationScopeEventMixin): """ Regular event that updates all lifestyle properties for population @@ -1931,7 +1966,7 @@ def apply(self, population): # NB: In addition to logging properties by sex and age groups, there are some properties that requires # individual's urban or rural status. define and log these properties separately cat_by_rural_urban_props = ['li_wealth', 'li_bmi', 'li_low_ex', 'li_ex_alc', 'li_wood_burn_stove', - 'li_unimproved_sanitation', 'li_no_clean_drinking_water'] + 'li_unimproved_sanitation', 'li_no_clean_drinking_water', 'li_herbal_medication'] # these properties are applicable to individuals 15+ years log_by_age_15up = ['li_low_ex', 'li_mar_stat', 'li_ex_alc', 'li_bmi', 'li_tob']