diff --git a/resources/healthsystem/consumables/ResourceFile_Consumables_availability_small.csv b/resources/healthsystem/consumables/ResourceFile_Consumables_availability_small.csv
index 2cf60fec13..4301af678d 100644
--- a/resources/healthsystem/consumables/ResourceFile_Consumables_availability_small.csv
+++ b/resources/healthsystem/consumables/ResourceFile_Consumables_availability_small.csv
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2ab824924a4c0e798eeec6c2514c50577ba83185120ec257d5065c583fe1dfa6
-size 57341233
+oid sha256:b0922f6c12c1303bdd1ce5f1a3c1e212b1635069664113a18a80ce77742ac031
+size 71215257
diff --git a/resources/healthsystem/consumables/ResourceFile_Consumables_availability_small_original.csv b/resources/healthsystem/consumables/ResourceFile_Consumables_availability_small_original.csv
index 6afeff00bb..9a944ff91f 100644
--- a/resources/healthsystem/consumables/ResourceFile_Consumables_availability_small_original.csv
+++ b/resources/healthsystem/consumables/ResourceFile_Consumables_availability_small_original.csv
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3bc044868ad9f1c618fc9b3fd6fc0ed23f32b153c8438f52fd4b72e40d8f503f
-size 55831644
+oid sha256:e8661e85489bf7422dd466b25af898220777e8c16e22c6ba3f8d7bfc2531f833
+size 70405856
diff --git a/src/scripts/consumables_analyses/descriptive_analysis.py b/src/scripts/consumables_analyses/descriptive_analysis.py
new file mode 100644
index 0000000000..1f2a30a43d
--- /dev/null
+++ b/src/scripts/consumables_analyses/descriptive_analysis.py
@@ -0,0 +1,455 @@
+'''
+Produce plots and estimates for the manuscript "Estimating the health gains and value for money of reducing drug stock-outs in Malawi: an individual-based modelling study"
+'''
+
+import datetime
+import os
+import textwrap
+from pathlib import Path
+
+import matplotlib.pyplot as plt
+import pandas as pd
+import seaborn as sns
+from typing import Literal, Sequence, Optional, Union, List
+
+from tlo import Date
+from tlo.analysis.utils import (
+    extract_params,
+    get_scenario_info,
+    get_scenario_outputs,
+    load_pickled_dataframes,
+)
+
+# Define a timestamp for script outputs
+timestamp = datetime.datetime.now().strftime("_%Y_%m_%d_%H_%M")
+
+# Print the start time of the script
+print('Script Start', datetime.datetime.now().strftime('%H:%M'))
+
+# Create folders to store results
+resourcefilepath = Path("./resources")
+consumable_resourcefilepath = resourcefilepath / "healthsystem/consumables"
+simulationfilepath = Path('./outputs/sakshi.mohan@york.ac.uk')
+outputfilepath = Path('./outputs/consumables_impact_analysis')
+if not os.path.exists(outputfilepath):
+    os.makedirs(outputfilepath)
+
+# Utility functions
+# Assign item names to item codes
+def assign_consumable_names_to_item_codes(df):
+   # Create dictionary mapping item_codes to consumables names
+    consumables_df = pd.read_csv(consumable_resourcefilepath / "ResourceFile_Consumables_Items_and_Packages.csv")[['Item_Code', 'Items']]
+    consumables_df = consumables_df[consumables_df['Item_Code'].notna()]
+    consumables_dict = dict(zip(consumables_df['Item_Code'], consumables_df['Items']))
+
+    # Add consumable_name to df
+    df = df.copy()
+    df['item_name'] = df['item_code'].map(consumables_dict)
+
+    return df
+
+# Prepare availability data
+def prepare_availability_dataset_for_plots(
+    _df: pd.DataFrame,
+    scenario_list: Optional[list[int]] = None,
+    scenario_names_dict: Optional[dict[str, str]] = None,
+    consumable_resourcefilepath: Path = None,
+    resourcefilepath: Path = None
+) -> pd.DataFrame:
+    """
+    Prepares a consumable availability dataset by merging facility and item category data,
+    renaming columns for scenarios, and cleaning category names for plotting.
+    """
+    if scenario_list is None:
+        scenario_list = []
+    if scenario_names_dict is None:
+        scenario_names_dict = {}
+
+    # Load item category mapping
+    program_item_mapping = pd.read_csv(
+        consumable_resourcefilepath / 'ResourceFile_Consumables_Item_Designations.csv',
+        usecols=['Item_Code', 'item_category']
+    )
+    program_item_mapping = program_item_mapping.rename(columns={'Item_Code': 'item_code'})
+    program_item_mapping = program_item_mapping[program_item_mapping['item_category'].notna()]
+
+    # Load facility list
+    mfl = pd.read_csv(resourcefilepath / "healthsystem" / "organisation" / "ResourceFile_Master_Facilities_List.csv")
+
+    # Merge facility and program info
+    _df = _df.merge(
+        mfl[['District', 'Facility_Level', 'Facility_ID']],
+        on='Facility_ID', how='left'
+    )
+    _df = _df.merge(program_item_mapping, on='item_code', how='left')
+
+    # Rename scenario columns
+    _df = _df.rename(columns=scenario_names_dict)
+
+    # Clean item category names
+    clean_category_names = {
+        'cancer': 'Cancer',
+        'cardiometabolicdisorders': 'Cardiometabolic Disorders',
+        'contraception': 'Contraception',
+        'general': 'General',
+        'hiv': 'HIV',
+        'malaria': 'Malaria',
+        'ncds': 'Non-communicable Diseases',
+        'neonatal_health': 'Neonatal Health',
+        'other_childhood_illnesses': 'Other Childhood Illnesses',
+        'reproductive_health': 'Reproductive Health',
+        'road_traffic_injuries': 'Road Traffic Injuries',
+        'tb': 'Tuberculosis',
+        'undernutrition': 'Undernutrition',
+        'epi': 'Expanded programme on immunization'
+    }
+    _df['item_category'] = _df['item_category'].map(clean_category_names)
+
+    return _df
+
+# Wrap Labels
+def wrap_labels(labels, width=15):
+    """Wrap each label to the given character width."""
+    return [textwrap.fill(str(lab), width) if lab is not None else "" for lab in labels]
+
+# Generate heatmaps of average availability
+def generate_heatmap(
+    df: pd.DataFrame,
+    include_levels: Optional[List[str]] = None,
+    value_col: str = "Actual",
+    row: str = "item_category",
+    col: str = "Facility_Level",
+    row_order: Optional[Sequence[str]] = None,
+    col_order: Optional[Sequence[str]] = None,
+    figurespath: Optional[Path] = None,
+    filename: str = "heatmap_consumable_availability.png",
+    figsize: tuple[int, int] = (10, 8),
+    cmap: str = "RdYlGn",
+    annot: bool = True,
+    fmt: Optional[str] = None,              # None -> auto choose
+    font_scale: float = 0.75,
+    cbar_label: str = "Proportion of days on which consumable is available",
+    xlabel: Optional[str] = None,
+    ylabel: Optional[str] = None,
+    as_pct: bool = True,                   # format annotations as percentages
+    round_decimals: int = 2,
+    # option to plot scenarios on the x_axis
+    scenario_axis: bool = False,  # if True, columns become scenarios
+    scenario_cols: Optional[Sequence[str]] = None,
+):
+    """
+    Build a heatmap either by a single column (e.g., Facility_Level) or across multiple
+    scenario columns placed on the x-axis.
+    """
+    if include_levels is not None:
+        df = df[df.Facility_Level.isin(include_levels)]
+    if scenario_axis:
+        aggregated = (
+            df.groupby([row], dropna=True)[scenario_cols]
+                         .mean()
+                         .reset_index()
+        )
+        # Add perfect scenario
+        aggregated['Perfect'] = 1  # Add a column representing the perfect scenario
+        heatmap_df = aggregated.set_index('item_category')
+    else:
+        # Standard mode: columns = `col`, values = mean(value_col)
+        aggregated = (
+            df.groupby([row, col], dropna=True)[value_col]
+            .mean()
+            .reset_index()
+        )
+        heatmap_df = aggregated.pivot(index=row, columns=col, values=value_col)
+
+    # Optional ordering
+    if row_order is not None:
+        heatmap_df = heatmap_df.reindex(row_order)
+    if col_order is not None:
+        heatmap_df = heatmap_df.reindex(columns=col_order)
+
+    # 2) Totals (means across the raw data, not the pivot means)
+    if scenario_axis:
+        # Means by row across all programs for each scenario
+        row_means = heatmap_df.mean(axis=0)  # average per scenario across programs
+        avg_row = row_means.copy()
+        heatmap_df.loc["Average"] = avg_row
+    else:
+        # Compute from raw df to avoid double-averaging
+        col_means = df.groupby(row, dropna=False)[value_col].mean()
+        row_means = df.groupby(col, dropna=False)[value_col].mean()
+        overall_mean = df[value_col].mean()
+
+        heatmap_df["Average"] = col_means.reindex(heatmap_df.index)
+        avg_row = row_means.reindex(heatmap_df.columns).copy()
+        avg_row.loc["Average"] = overall_mean
+        heatmap_df.loc["Average"] = avg_row
+
+    # 3) Annotation formatting
+    if as_pct:
+        # If values are 0–1 proportions, annotate as percentages
+        display_df = (heatmap_df * 100).round(round_decimals)
+        if fmt is None:
+            fmt = ".0f" if round_decimals == 0 else f".{round_decimals}f"
+        annot_kws = {"fmt": fmt}
+        # Build string labels with % sign
+        annot_data = display_df.astype(float)
+    else:
+        display_df = heatmap_df.round(round_decimals)
+        if fmt is None:
+            fmt = f".{round_decimals}f"
+        annot_kws = {"fmt": fmt}
+        annot_data = display_df.astype(float)
+
+    # 4) Plot
+    sns.set(font_scale=font_scale)
+    fig, ax = plt.subplots(figsize=figsize)
+    hm = sns.heatmap(
+        annot_data,
+        annot=annot,
+        cmap=cmap,
+        cbar_kws={"label": cbar_label},
+        ax=ax,
+        fmt=".2f"
+    )
+
+    # If percentage labels requested, overwrite text with % suffix
+    if annot and as_pct:
+        for t in ax.texts:
+            t.set_text(f"{t.get_text()}%")
+
+    # 5) Labels & ticks
+    xlab = (xlabel or ("Scenario" if scenario_axis else col.replace("_", " ").title()))
+    ylab = (ylabel or row.replace("_", " ").title())
+    ax.set_xlabel(xlab)
+    ax.set_ylabel(ylab)
+    ax.set_xticklabels(ax.get_xticklabels(), rotation=90)
+    ax.set_yticklabels(ax.get_yticklabels(), rotation=0)
+
+    # 6) Save (optional)
+    if figurespath is not None:
+        figurespath = Path(figurespath)
+        figurespath.mkdir(parents=True, exist_ok=True)
+        outpath = figurespath / filename
+        plt.savefig(outpath, dpi=300, bbox_inches="tight")
+
+    plt.show()
+    plt.close(fig)
+    return fig, ax, heatmap_df
+
+# Generate LaTex-compatible detailed table of availability
+def generate_detail_availability_table_by_scenario(
+    df: pd.DataFrame,
+    groupby_var: str,
+    scenario_cols: Sequence[str],
+    include_levels: Union[str, Sequence[str]],
+    longtable: bool = False,
+    outputpath: Path = None,
+    decimals: int = 2,
+    caption: Optional[str] = None,
+    label_prefix: str = "tab:availability_by_",
+    col_width_groupbyvar: str = "4cm",
+    col_width_scenario: str = "1.8cm",
+) -> str:
+    """
+    Create a LaTeX table of average availability (as percentages) for each consumable across scenarios,
+    filtered to selected facility levels.
+
+    Returns the LaTeX string and writes it to figurespath / f"availability_by_{groupby_var}.tex"
+    """
+
+    # --- Setup ---
+    if outputpath is None:
+        outputpath = outputfilepath / "appendix"  # falls back to your existing default
+    outputpath.mkdir(parents=True, exist_ok=True)
+
+    # Accept str OR list for include_levels
+    if isinstance(include_levels, str):
+        include_levels = [include_levels]
+
+    table_df = df.copy()
+
+    # Filter by facility level if the column is present
+    if "Facility_Level" in table_df.columns:
+        table_df = table_df[table_df["Facility_Level"].isin(include_levels)]
+
+    # Aggregate means per item
+    grouped = (
+        table_df
+        .groupby([groupby_var], dropna=False)[list(scenario_cols)]
+        .mean()
+        .reset_index()
+    )
+
+    # Rename first column to "Consumable"
+    grouped.rename(columns={groupby_var: "Consumable"}, inplace=True)
+
+    # Escape LaTeX in Consumable names
+    def _latex_escape(s):
+        if pd.isna(s):
+            return ""
+        s = str(s)
+        # Order matters for backslashes; escape backslash first
+        s = s.replace("\\", r"\\")
+        s = s.replace("&", r"\&").replace("%", r"\%").replace("_", r"\_")
+        s = s.replace("#", r"\#").replace("{", r"\{").replace("}", r"\}")
+        s = s.replace("$", r"\$").replace("^", r"\^{}").replace("~", r"\~{}")
+        return s
+
+    grouped["Consumable"] = grouped["Consumable"].map(_latex_escape)
+
+    # Convert proportions -> percentage strings with escaped %
+    def pct_format(x: float) -> str:
+        if pd.isna(x):
+            return ""
+        return f"{x * 100:.{decimals}f}\\%"
+
+    for c in scenario_cols:
+        grouped[c] = grouped[c].map(pct_format)
+
+    # Build column format dynamically
+    # First col wider for names, then one col per scenario
+    column_format = (
+        f"|R{{{col_width_groupbyvar}}}|"
+        + "|".join([f"R{{{col_width_scenario}}}"] * len(scenario_cols))
+        + "|"
+    )
+
+    # Caption/label
+    if caption is None:
+        caption = "Summarized availability by consumable"
+    label = f"{label_prefix}{groupby_var}_{include_levels}"
+
+    # Export to LaTeX (escape=False since we already escaped)
+    latex_table = grouped.to_latex(
+        longtable=longtable,
+        column_format=column_format,
+        caption=caption,
+        label=label,
+        position="h",
+        index=False,
+        escape=False,
+        header=True,
+    )
+
+    # Add \hline after each row
+    latex_table = latex_table.replace("\\\\\n", "\\\\ \\hline\n")
+
+    # Save
+    outpath = outputpath / f"availability_by_{groupby_var}_{include_levels}.tex"
+    with open(outpath, "w", encoding="utf-8") as f:
+        f.write(latex_table)
+
+    return latex_table
+
+
+# Import and clean data files
+#**********************************
+# Import TLO model availability data
+tlo_availability_df = pd.read_csv(consumable_resourcefilepath / "ResourceFile_Consumables_availability_small_original.csv")
+scenario_names_dict={
+        'available_prop': 'Actual',
+        'available_prop_scenario1': 'Non-therapeutic consumables',
+        'available_prop_scenario2': 'Vital medicines',
+        'available_prop_scenario3': 'Pharmacist- managed',
+        'available_prop_scenario4': 'Level 1b',
+        'available_prop_scenario5': 'CHAM',
+        'available_prop_scenario6': '75th percentile facility',
+        'available_prop_scenario7': '90th percentile facility',
+        'available_prop_scenario8': 'Best facility',
+        'available_prop_scenario9': 'Best facility (including DHO)',
+        'available_prop_scenario10': 'HIV supply chain',
+        'available_prop_scenario11': 'EPI supply chain',
+        'available_prop_scenario12': 'HIV moved to Govt supply chain (Avg by Level)',
+        'available_prop_scenario13': 'HIV moved to Govt supply chain (Avg by Facility_ID)',
+        'available_prop_scenario14': 'HIV moved to Govt supply chain (Avg by Facility_ID times 1.25)',
+        'available_prop_scenario15': 'HIV moved to Govt supply chain (Avg by Facility_ID times 0.75)',
+        'available_prop_scenario16': 'Redistribution (District pooling)',
+        'available_prop_scenario17': 'Redistribution (Neighbourhood pooling)',
+        'available_prop_scenario18': 'Redistribution (Pairwise exchanges - large radius)',
+        'available_prop_scenario19': 'Redistribution (Pairwise exchanges - small radius)'
+    }
+
+tlo_availability_df = prepare_availability_dataset_for_plots(
+    _df=tlo_availability_df,
+    scenario_list=[1, 2, 3, 6, 7, 8, 16, 17, 18, 19],
+    scenario_names_dict=scenario_names_dict,
+    consumable_resourcefilepath=consumable_resourcefilepath,
+    resourcefilepath=resourcefilepath
+)
+
+# Generate figures for manuscript
+#**********************************
+# TODO Consider redoing these plots with the legacy Resourcefile
+# Figure 1: Average probability of consumable availability in public and CHAM health facilities in Malawi
+_ = generate_heatmap(
+    df=tlo_availability_df,
+    value_col="Actual",
+    row="item_category",
+    col="Facility_Level",
+    figurespath = outputfilepath / 'manuscript',
+    filename="heatmap_program_and_level_actual.png",
+    figsize=(10, 8),
+    cmap="RdYlGn",
+    round_decimals=4,
+    cbar_label="Proportion of days on which consumable is available",
+    xlabel="Facility Level",
+    ylabel="Program",
+)
+
+# Figure 3: Comparison of consumable availability across modelled scenarios
+scenario_cols = ['Actual', 'Non-therapeutic consumables', 'Vital medicines', 'Pharmacist- managed',
+                 '75th percentile facility', '90th percentile facility', 'Best facility',
+                 'Redistribution (District pooling)', 'Redistribution (Neighbourhood pooling)',
+                 'Redistribution (Pairwise exchanges - large radius)',
+                 'Redistribution (Pairwise exchanges - small radius)',]
+for level in ['1a', '1b']:
+    _ = generate_heatmap(
+        df=tlo_availability_df,
+        include_levels = [level],
+        value_col="Actual",
+        row="item_category",
+        col="Facility_Level",
+        figurespath=outputfilepath / 'manuscript',
+        filename=f"scenarios_heatmap_{level}.png",
+        figsize=(10, 8),
+        cmap="RdYlGn",
+        round_decimals=4,
+        cbar_label="Proportion of days on which consumable is available",
+        xlabel="Facility Level",
+        ylabel="Program",
+        scenario_axis = True,  # if True, columns become scenarios
+        scenario_cols = scenario_cols,
+    )
+
+
+# Figure A.1: Trend in average consumable availability by facility level
+
+# Figure A.2: Comparison of consumable availability as per Open Logistics Management Information System (OpenLMIS), 2018 and Harmonised
+# Health Facility Assessment, 2018-19
+
+# Table B.1: Average probability of availability for each consumable under all scenarios (Level 1a)
+tlo_availability_df = assign_consumable_names_to_item_codes(tlo_availability_df)
+
+# Table B.1: Average probability of availability for each consumable under all scenarios (Level 1a)
+availablity_by_item_1a = generate_detail_availability_table_by_scenario(
+    df=tlo_availability_df,
+    groupby_var="item_name",
+    scenario_cols=scenario_cols,
+    include_levels="1a",
+    longtable=True,
+    outputpath=outputfilepath / "appendix",
+    decimals=2,
+    caption="Average probability of availability for each consumable under all scenarios (Level 1a)",
+)
+
+# Table B.2: Average probability of availability for each consumable under all scenarios (Level 1b)
+availablity_by_item_1b = generate_detail_availability_table_by_scenario(
+    df=tlo_availability_df,
+    groupby_var="item_name",
+    scenario_cols=scenario_cols,
+    include_levels="1b",
+    longtable=True,
+    outputpath=outputfilepath / "appendix",
+    decimals=2,
+    caption="Average probability of availability for each consumable under all scenarios (Level 1b)",
+)
diff --git a/src/scripts/consumables_analyses/manuscript/analysis_improved_consumable_availability.py b/src/scripts/consumables_analyses/manuscript/analysis_improved_consumable_availability.py
new file mode 100644
index 0000000000..e47aef3b7c
--- /dev/null
+++ b/src/scripts/consumables_analyses/manuscript/analysis_improved_consumable_availability.py
@@ -0,0 +1,592 @@
+"""Produce outputs for Impact of Improved Consumables Availability Paper
+"""
+import datetime
+import os
+import textwrap
+from pathlib import Path
+
+import matplotlib.pyplot as plt
+import seaborn as sns
+import pandas as pd
+import numpy as np
+
+from scripts.costing.cost_estimation import (
+    clean_consumable_name,
+    create_summary_treemap_by_cost_subgroup,
+    do_line_plot_of_cost,
+    do_stacked_bar_plot_of_cost_by_category,
+    estimate_input_cost_of_scenarios,
+    summarize_cost_data
+)
+from tlo import Date
+from tlo.analysis.utils import (
+    compute_summary_statistics,
+    extract_params,
+    extract_results,
+    get_scenario_info,
+    get_scenario_outputs,
+    load_pickled_dataframes,
+    create_pickles_locally,
+    summarize,
+)
+
+# Define a timestamp for script outputs
+timestamp = datetime.datetime.now().strftime("_%Y_%m_%d_%H_%M")
+
+# Print the start time of the script
+print('Script Start', datetime.datetime.now().strftime('%H:%M'))
+
+# Create folders to store results
+resourcefilepath = Path("./resources")
+outputfilepath = Path('./outputs/')
+figurespath = Path('./outputs/consumables_impact_analysis/manuscript')
+if not os.path.exists(figurespath):
+    os.makedirs(figurespath)
+path_for_consumable_resourcefiles = resourcefilepath / "healthsystem/consumables"
+
+# Load result files
+# ------------------------------------------------------------------------------------------------------------------
+results_folder = get_scenario_outputs('consumables_costing-2026-01-29T121903Z.py', outputfilepath)[0] # Dec 2025 runs
+#create_pickles_locally(scenario_output_dir = "./outputs/consumables_costing-2026-01-29T121903Z") # from .log files
+
+# Check can read results from draw=0, run=0
+log = load_pickled_dataframes(results_folder, 0, 0)  # look at one log (so can decide what to extract)
+params = extract_params(results_folder)
+info = get_scenario_info(results_folder)
+
+# Declare default parameters for cost analysis
+# ------------------------------------------------------------------------------------------------------------------
+# Period relevant for costing
+TARGET_PERIOD = (Date(2010, 1, 1), Date(2012, 12, 31))  # This is the period that is costed
+relevant_period_for_costing = [i.year for i in TARGET_PERIOD]
+list_of_relevant_years_for_costing = list(range(relevant_period_for_costing[0], relevant_period_for_costing[1] + 1))
+list_of_years_for_plot = list(range(2010, 2012))
+number_of_years_costed = relevant_period_for_costing[1] - 2010 + 1
+
+discount_rate_health = 0
+chosen_metric = 'mean'
+
+# Scenarios
+cons_scenarios = {
+    0:  "Baseline availability – Default health system",
+    1:  "Baseline availability – Perfect health system",
+
+    2:  "Non-therapeutic consumables – Default health system",
+    3:  "Non-therapeutic consumables – Perfect health system",
+
+    4:  "Vital medicines – Default health system",
+    5:  "Vital medicines – Perfect health system",
+
+    6:  "Pharmacist-managed stocks – Default health system",
+    7:  "Pharmacist-managed stocks – Perfect health system",
+
+    8:  "75th percentile facility – Default health system",
+    9:  "75th percentile facility – Perfect health system",
+
+    10: "90th percentile facility – Default health system",
+    11: "90th percentile facility – Perfect health system",
+
+    12: "Best facility – Default health system",
+    13: "Best facility – Perfect health system",
+
+    14: "District pooling – Default health system",
+    15: "District pooling – Perfect health system",
+
+    16: "Neighbourhood pooling – Default health system",
+    17: "Neighbourhood pooling – Perfect health system",
+
+    18: "Pairwise exchange (Large radius) – Default health system",
+    19: "Pairwise exchange (Large radius) – Perfect health system",
+
+    20: "Pairwise exchange (Small radius) – Default health system",
+    21: "Pairwise exchange (Small radius) – Perfect health system",
+
+    22: "Perfect availability – Default health system",
+    23: "Perfect availability – Perfect health system",
+}
+
+main_analysis_subset = [
+    k for k, v in cons_scenarios.items()
+    if "Default health system" in v
+]
+
+cons_scenarios_main = {
+    k: v.replace(" – Default health system", "")
+    for k, v in cons_scenarios.items()
+    if k in main_analysis_subset
+}
+
+# Function to get incremental values
+def find_difference_relative_to_comparison(_ser: pd.Series,
+                                           comparison: str,
+                                           scaled: bool = False,
+                                           drop_comparison: bool = True,
+                                           ):
+    """Find the difference in the values in a pd.Series with a multi-index, between the draws (level 0)
+    within the runs (level 1), relative to where draw = `comparison`.
+    The comparison is `X - COMPARISON`."""
+    return _ser \
+        .unstack(level=0) \
+        .apply(lambda x: (x - x[comparison]) / (x[comparison] if scaled else 1.0), axis=1) \
+        .drop(columns=([comparison] if drop_comparison else [])) \
+        .stack()
+
+# Define a function to create bar plots
+def do_standard_bar_plot_with_ci(_df: pd.DataFrame, set_colors=None, annotations=None,
+                                 xticklabels_horizontal_and_wrapped=False,
+                                 put_labels_in_legend=True,
+                                 offset=1e6):
+    """Make a vertical bar plot for each row of _df, using the columns to identify the height of the bar and the
+     extent of the error bar."""
+
+    substitute_labels = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
+
+    yerr = np.array([
+        (_df[chosen_metric] - _df['lower']).values,
+        (_df['upper'] - _df[chosen_metric]).values,
+    ])
+
+    xticks = {(i + 0.5): k for i, k in enumerate(_df.index)}
+
+    if set_colors is not None:
+        # dict mapping -> use index keys; list/tuple/Series -> use as-is
+        if isinstance(set_colors, dict):
+            colors = [set_colors.get(k, 'grey') for k in _df.index]
+            # Optional debug:
+            # missing = [k for k in _df.index if k not in set_colors]
+            # if missing: print("No color for:", missing)
+        else:
+            colors = list(set_colors)
+    else:
+        cmap = sns.color_palette('Spectral', as_cmap=True)
+        rescale = lambda y: (y - np.min(y)) / (np.max(y) - np.min(y))  # noqa: E731
+        colors = list(map(cmap, rescale(np.array(list(xticks.keys()))))) if put_labels_in_legend else None
+
+    fig, ax = plt.subplots(figsize=(10, 5))
+    ax.bar(
+        xticks.keys(),
+        _df[chosen_metric].values,
+        yerr=yerr,
+        ecolor='black',
+        color=colors,
+        capsize=10,
+        label=xticks.values()
+    )
+
+    if annotations:
+        for xpos, (ypos, text) in zip(xticks.keys(), zip(_df['upper'].values.flatten(), annotations)):
+            annotation_y = ypos + offset
+
+            ax.text(
+                xpos,
+                annotation_y,
+                '\n'.join(text.split(' ', 1)),
+                horizontalalignment='center',
+                verticalalignment='bottom',  # Aligns text at the bottom of the annotation position
+                fontsize='x-small',
+                rotation='horizontal'
+            )
+
+    ax.set_xticks(list(xticks.keys()))
+
+    if put_labels_in_legend:
+        # Update xticks label with substitute labels
+        # Insert legend with updated labels that shows correspondence between substitute label and original label
+        # Use all_manuscript_scenarios for the legend
+        xtick_legend = [f'{letter}: {cons_scenarios.get(label, label)}' for letter, label in
+                        zip(substitute_labels, xticks.values())]
+        xtick_values = [letter for letter, label in zip(substitute_labels, xticks.values())]
+
+        h, legs = ax.get_legend_handles_labels()
+        ax.legend(h, xtick_legend, loc='center left', fontsize='small', bbox_to_anchor=(1, 0.5))
+        ax.set_xticklabels(xtick_values)
+    else:
+        if not xticklabels_horizontal_and_wrapped:
+            # xticklabels will be vertical and not wrapped
+            ax.set_xticklabels(list(xticks.values()), rotation=90)
+        else:
+            wrapped_labs = ["\n".join(textwrap.wrap(_lab, 20)) for _lab in xticks.values()]
+            ax.set_xticklabels(wrapped_labs)
+
+    # Extend ylim to accommodate data labels
+    ymin, ymax = ax.get_ylim()
+    extension = 0.1 * (ymax - ymin)  # 10% of range
+    ax.set_ylim(ymin - extension, ymax + extension)  # Set new y-axis limits with the extended range
+
+    ax.grid(axis="y")
+    ax.spines['top'].set_visible(False)
+    ax.spines['right'].set_visible(False)
+    # fig.tight_layout()
+    fig.tight_layout(pad=2.0)
+    plt.subplots_adjust(left=0.15, right=0.5, top=0.88)
+
+    return fig, ax
+
+def plot_stacked_mean_with_total_ci(
+    summary_df,
+    colors: dict,
+    scenario_labels: dict | None = None,
+    ylabel: str = "",
+    xlabel: str = "Scenario",
+    title: str | None = None,
+    figsize=(12, 6),
+    legend_outside: bool = True,
+):
+    """
+    Plot stacked bars using mean values by disease group, with a confidence
+    interval for the total (lower/upper summed across groups).
+
+    Parameters
+    ----------
+    summary_df : pd.DataFrame
+        Index: disease_group
+        Columns: MultiIndex (draw, stat) where stat ∈ {'lower','mean','upper'}
+
+    colors : dict
+        Mapping {disease_group: color}
+
+    scenario_labels : dict, optional
+        Mapping {draw: label} for x-axis
+
+    ylabel : str
+        Y-axis label
+
+    xlabel : str
+        X-axis label
+
+    title : str, optional
+        Figure title
+
+    figsize : tuple
+        Figure size
+
+    legend_outside : bool
+        Whether to place legend outside the plot
+    """
+
+    # ---- Extract mean values (for stacking) ----
+    mean_df = summary_df.xs("mean", level="stat", axis=1)
+
+    # ---- Extract totals for CI ----
+    total_mean = mean_df.sum(axis=0)
+    total_lower = (
+        summary_df.xs("lower", level="stat", axis=1)
+        .sum(axis=0)
+    )
+    total_upper = (
+        summary_df.xs("upper", level="stat", axis=1)
+        .sum(axis=0)
+    )
+
+    # ---- X axis ----
+    draws = mean_df.columns
+    x = np.arange(len(draws))
+
+    # ---- Plot ----
+    fig, ax = plt.subplots(figsize=figsize)
+
+    bottom = np.zeros(len(draws))
+
+    for group in mean_df.index:
+        vals = mean_df.loc[group].values
+        ax.bar(
+            x,
+            vals,
+            bottom=bottom,
+            color=colors.get(group, "grey"),
+            label=group
+        )
+        bottom += vals
+
+    # ---- CI for total only ----
+    yerr = np.vstack([
+        total_mean - total_lower,
+        total_upper - total_mean
+    ])
+
+    ax.errorbar(
+        x,
+        total_mean,
+        yerr=yerr,
+        fmt="none",
+        ecolor="black",
+        elinewidth=1.5,
+        capsize=4,
+        zorder=5
+    )
+
+    # ---- Formatting ----
+    ax.axhline(0, color="black", linewidth=0.8)
+
+    ax.set_xlabel(xlabel)
+    ax.set_ylabel(ylabel)
+
+    if title:
+        ax.set_title(title, pad=10)
+
+    if scenario_labels:
+        ax.set_xticks(x)
+        ax.set_xticklabels(
+            [scenario_labels.get(d, d) for d in draws],
+            rotation=45,
+            ha="right"
+        )
+    else:
+        ax.set_xticks(x)
+        ax.set_xticklabels(draws, rotation=45)
+
+    if legend_outside:
+        ax.legend(
+            title="Disease group",
+            bbox_to_anchor=(1.02, 1),
+            loc="upper left",
+            frameon=False
+        )
+    else:
+        ax.legend(frameon=False)
+
+    fig.tight_layout()
+    return fig, ax
+
+#
+def get_num_dalys(_df):
+    """Return total number of DALYS (Stacked) by label (total within the TARGET_PERIOD).
+    Throw error if not a record for every year in the TARGET PERIOD (to guard against inadvertently using
+    results from runs that crashed mid-way through the simulation.
+    """
+    years_needed = relevant_period_for_costing
+    assert set(_df.year.unique()).issuperset(years_needed), "Some years are not recorded."
+    _df = _df.loc[_df.year.between(*years_needed)].drop(columns=['date', 'sex', 'age_range']).groupby(
+        'year').sum().sum(axis=1)
+
+    # Initial year and discount rate
+    initial_year = min(_df.index.unique())
+
+    # Calculate the discounted values
+    discounted_values = _df / (1 + discount_rate_health) ** (_df.index - initial_year)
+
+    return pd.Series(discounted_values.sum())
+
+num_dalys = extract_results(
+        results_folder,
+        module='tlo.methods.healthburden',
+        key='dalys_stacked',
+        custom_generate_series=get_num_dalys,
+        do_scaling=True
+    )
+
+# Get absolute DALYs averted
+num_dalys_averted = (-1.0 *
+                     pd.DataFrame(
+                         find_difference_relative_to_comparison(
+                             num_dalys.loc[0],
+                             comparison=0)  # sets the comparator to 0 which is the Actual scenario
+                     ).T.iloc[0].unstack(level='run'))
+
+# Plot DALYs
+num_dalys_averted_summarized = summarize_cost_data(num_dalys_averted, _metric=chosen_metric)
+num_dalys_averted_summarized = num_dalys_averted_summarized[num_dalys_averted_summarized.index.isin(main_analysis_subset)]
+num_dalys_averted_subset_for_figure = num_dalys_averted_summarized[
+    num_dalys_averted_summarized.index.get_level_values('draw').isin(list(cons_scenarios.keys()))]
+name_of_plot = f'Incremental DALYs averted compared to baseline {relevant_period_for_costing[0]}-{relevant_period_for_costing[1]}'
+fig, ax = do_standard_bar_plot_with_ci(
+    (num_dalys_averted_subset_for_figure / 1e6),
+    annotations=[
+        f"{row[chosen_metric] / 1e6:.2f} ({row['lower'] / 1e6 :.2f}- {row['upper'] / 1e6:.2f})"
+        for _, row in num_dalys_averted_subset_for_figure.iterrows()
+    ],
+    xticklabels_horizontal_and_wrapped=False,
+    put_labels_in_legend=True,
+    offset=0.05,
+)
+#ax.set_title(name_of_plot)
+ax.set_ylabel('DALYs \n(Millions)')
+ax.set_ylim(bottom=0)
+fig.savefig(figurespath / name_of_plot.replace(' ', '_').replace(',', ''))
+plt.close(fig)
+
+def get_num_dalys_by_disease(_df):
+    """
+    Return discounted total DALYs by disease over the TARGET_PERIOD.
+    Output: Series indexed by disease name.
+    """
+    years_needed = relevant_period_for_costing
+    assert set(_df.year.unique()).issuperset(years_needed), \
+        "Some years are not recorded."
+
+    # Keep only years of interest
+    _df = _df.loc[_df.year.between(*years_needed)]
+
+    # Drop non-disease columns
+    disease_cols = _df.columns.difference(
+        ['date', 'sex', 'age_range', 'year']
+    )
+
+    # Sum by year × disease
+    by_year_disease = (
+        _df[['year'] + list(disease_cols)]
+        .groupby('year')
+        .sum()
+    )
+
+    # Discounting
+    initial_year = by_year_disease.index.min()
+    discount_factors = (1 + discount_rate_health) ** (
+        by_year_disease.index - initial_year
+    )
+
+    discounted = by_year_disease.div(discount_factors, axis=0)
+
+    # Sum over time → total DALYs by disease
+    return discounted.sum()
+
+num_dalys_by_disease = extract_results(
+    results_folder,
+    module='tlo.methods.healthburden',
+    key='dalys_stacked',
+    custom_generate_series=get_num_dalys_by_disease,
+    do_scaling=True
+)
+
+disease_groups = {
+    # --- HIV / TB / Malaria ---
+    "HIV/AIDS": [
+        "AIDS",
+    ],
+    "Malaria": [
+        "Malaria",
+    ],
+
+    # --- MNCH ---
+    "RMNCH": [
+        "Maternal Disorders",
+        "Neonatal Disorders",
+        "Congenital birth defects",
+        "Childhood Diarrhoea",
+        "Childhood Undernutrition",
+        "Lower respiratory infections",
+        "Measles",
+    ],
+
+    # --- NCDs ---
+    "Cardiometabolic": [
+        "Heart Disease",
+        "Stroke",
+        "Diabetes",
+        "Kidney Disease",
+    ],
+    "Cancer": [
+        "Cancer (Bladder)",
+        "Cancer (Breast)",
+        "Cancer (Cervix)",
+        "Cancer (Oesophagus)",
+        "Cancer (Prostate)",
+        "Cancer (Other)",
+    ],
+    "Mental & Neurological": [
+        "Depression / Self-harm",
+        "Epilepsy",
+        "Lower Back Pain",
+    ],
+    "Other": [
+        "COPD",
+        "Schistosomiasis",
+        "Other",
+    ],
+
+    # --- Injuries ---
+    "Injuries": [
+        "Transport Injuries",
+    ],
+}
+def aggregate_by_disease_group(df: pd.DataFrame,
+                               disease_groups: dict) -> pd.DataFrame:
+    """
+    Aggregate disease-level rows into disease-group rows.
+
+    Parameters
+    ----------
+    df : pd.DataFrame
+        Index: disease names
+        Columns: MultiIndex (draw, run)
+        Values: DALYs
+
+    disease_groups : dict
+        Mapping {group_name: [list of disease names]}
+
+    Returns
+    -------
+    pd.DataFrame
+        Index: disease groups
+        Columns: same as df (draw, run)
+        Values: summed DALYs per group
+    """
+    grouped_rows = {}
+
+    for group_name, diseases in disease_groups.items():
+        # Select diseases that actually exist in the index
+        present = [d for d in diseases if d in df.index]
+
+        if not present:
+            continue
+
+        grouped_rows[group_name] = df.loc[present].sum(axis=0)
+
+    return pd.DataFrame.from_dict(grouped_rows, orient="index")
+
+num_dalys_by_group = aggregate_by_disease_group(
+    num_dalys_by_disease,
+    disease_groups
+)
+
+# Get absolute DALYs averted
+num_dalys_averted_by_group = {}
+
+for disease in num_dalys_by_group.index:
+    # Extract ONE disease → Series with index (draw, run)
+    ser = num_dalys_by_group.loc[disease]
+
+    # Name index levels correctly (optional but good hygiene)
+    ser.index.names = ["draw", "run"]
+
+    # Apply the existing function
+    num_dalys_averted_by_group[disease] = find_difference_relative_to_comparison(
+        ser,
+        comparison=0,
+        scaled=False,
+        drop_comparison=True
+    )
+
+num_dalys_averted_by_group = pd.concat(
+    num_dalys_averted_by_group,
+    names=["disease_group"]
+)
+
+# Plot DALYs
+num_dalys_averted_by_group_summarized = summarize(num_dalys_averted_by_group.unstack().unstack())
+num_dalys_averted_by_group_summarized  = (
+    num_dalys_averted_by_group_summarized
+    .loc[:, num_dalys_averted_by_group_summarized.columns
+          .get_level_values("draw")
+          .isin(main_analysis_subset)]
+)
+disease_colors = {
+    "HIV/AIDS": "#e41a1c",
+    "Malaria": "#377eb8",
+    "RMNCH": "#4daf4a",
+    "Cardiometabolic": "#984ea3",
+    "Cancer": "#ff7f00",
+    "Mental & Neurological": "#a65628",
+    "Injuries": "#f781bf",
+    "Other": "#999999",
+}
+
+fig, ax = plot_stacked_mean_with_total_ci(
+    summary_df=num_dalys_averted_by_group_summarized,
+    colors=disease_colors,
+    scenario_labels=cons_scenarios_main,
+    ylabel="DALYs averted",
+    title=""
+)
+fig.savefig(figurespath / "dalys_averted_by_disease_group.png",
+            dpi=300, bbox_inches="tight")
diff --git a/src/scripts/consumables_analyses/manuscript/scenario_improved_consumable_availability.py b/src/scripts/consumables_analyses/manuscript/scenario_improved_consumable_availability.py
new file mode 100644
index 0000000000..d875bd807b
--- /dev/null
+++ b/src/scripts/consumables_analyses/manuscript/scenario_improved_consumable_availability.py
@@ -0,0 +1,119 @@
+
+
+"""This Scenario file run the model under different assumptions for the consumable availability in order to estimate the
+cost under each scenario for the HSSP-III duration
+
+Run on the batch system using:
+```
+tlo batch-submit src/scripts/consumables_analyses/manuscript/scenario_improved_consumable_availability.py
+```
+
+or locally using:
+```
+tlo scenario-run src/scripts/consumables_analyses/manuscript/scenario_improved_consumable_availability.py
+
+# TODO Pending actions
+# Private market substitution - derive percentage from TLM data
+# Don't run sensitivity analyses yet (can be added later) - only run the HR one --> 24 scenarios
+ ```
+
+"""
+from tlo import Date, logging
+from tlo.methods.fullmodel import fullmodel
+from tlo.methods.scenario_switcher import ImprovedHealthSystemAndCareSeekingScenarioSwitcher
+from tlo.scenario import BaseScenario
+
+class ConsumablesImpact(BaseScenario):
+    # -----------------------------
+    # 1) DEFINE SCENARIOS EXPLICITLY
+    # -----------------------------
+    CONSUMABLE_SCENARIOS = [
+        'default',
+        'scenario1', 'scenario2', 'scenario3',  # Predictive factors
+        'scenario6', 'scenario7', 'scenario8',  # Benchmark facilities
+        'scenario16', 'scenario17', 'scenario18', 'scenario19',  # Redistribution
+        'all'  # Perfect
+    ]
+
+    SYSTEM_MODES = [
+        {
+            "mode_appt_constraints": 2,
+            "max_healthsystem_function": [False, False],
+            "max_healthcare_seeking": [False, False],
+        },
+        {
+            "mode_appt_constraints": 1,
+            "max_healthsystem_function": [False, True],
+            "max_healthcare_seeking": [False, True],
+        },
+    ]
+
+    def __init__(self):
+        super().__init__()
+        self.seed = 0
+        self.start_date = Date(2010, 1, 1)
+        self.end_date = Date(2028, 1, 1) # TODO change to 2041
+        # Run until 2040 even though analysis maybe focused on years until 2030
+        self.pop_size = 5_000 # TODO change to 100_000
+
+
+        # Build cartesian product of scenarios
+        self.SCENARIOS = [
+            (cons, sys)
+            for cons in self.CONSUMABLE_SCENARIOS
+            for sys in self.SYSTEM_MODES
+        ]
+
+        self.number_of_draws = len(self.SCENARIOS)
+        self.scenarios = list(range(self.number_of_draws))
+
+        self.runs_per_draw = 1 #TODO change to 5
+
+    def log_configuration(self):
+        return {
+            'filename': 'consumables_costing',
+            'directory': './outputs',
+            'custom_levels': {
+                '*': logging.WARNING,
+                "tlo.methods.demography": logging.INFO,
+                "tlo.methods.healthburden": logging.INFO,
+                "tlo.methods.healthsystem.summary": logging.INFO,
+            }
+        }
+
+    def modules(self):
+        return (
+                fullmodel()
+                + [ImprovedHealthSystemAndCareSeekingScenarioSwitcher()]
+        )
+
+    def draw_parameters(self, draw_number, rng):
+        cons_scenario, sys = self.SCENARIOS[draw_number]
+
+        return {
+            'HealthSystem': {
+                'cons_availability': 'default',
+                'data_source_for_cons_availability_estimates': 'updated',
+                'year_cons_availability_switch': 2026,
+                'cons_availability_postSwitch': cons_scenario,
+                'mode_appt_constraints':1,
+                'mode_appt_constraints_postSwitch':sys["mode_appt_constraints"], # once without HR constraints and once with HR constraints
+                'year_mode_switch':2026,
+                'policy_name': 'EHP_III',
+                'use_funded_or_actual_staffing': 'actual',
+                'scale_to_effective_capabilities':True,  # <-- Transition into Mode2 with the effective capabilities in HRH 'revealed' in Mode 1
+                'yearly_HR_scaling_mode': 'historical_scaling', # allow historical HRH scaling to occur 2018-2024
+                'equip_availability':'all',
+                'beds_availability':'all',
+                },
+            "ImprovedHealthSystemAndCareSeekingScenarioSwitcher": {
+                "max_healthsystem_function": sys["max_healthsystem_function"],
+                "max_healthcare_seeking": sys["max_healthcare_seeking"],
+                "year_of_switch": 2026,
+            }
+        }
+
+if __name__ == '__main__':
+    from tlo.cli import scenario_run
+
+    scenario_run([__file__])
diff --git a/src/scripts/data_file_processing/healthsystem/consumables/consumable_resource_analyses_with_lmis/consumables_availability_estimation.py b/src/scripts/data_file_processing/healthsystem/consumables/consumable_resource_analyses_with_lmis/consumables_availability_estimation.py
index a5862a02a9..9fb8b135ee 100644
--- a/src/scripts/data_file_processing/healthsystem/consumables/consumable_resource_analyses_with_lmis/consumables_availability_estimation.py
+++ b/src/scripts/data_file_processing/healthsystem/consumables/consumable_resource_analyses_with_lmis/consumables_availability_estimation.py
@@ -39,16 +39,19 @@
 import os
 from collections import defaultdict
 from pathlib import Path
-from typing import List, Optional
 
 import matplotlib.pyplot as plt
 import numpy as np
 import pandas as pd
+from typing import Optional, List
+import re
 
-from scripts.data_file_processing.healthsystem.consumables.generating_consumable_scenarios import (
+from scripts.data_file_processing.healthsystem.consumables.generating_consumable_scenarios.generate_consumable_availability_scenarios_for_impact_analysis import (
     generate_alternative_availability_scenarios,
-    generate_descriptive_consumable_availability_plots,
+    generate_descriptive_consumable_availability_plots
 )
+from scripts.data_file_processing.healthsystem.consumables.generating_consumable_scenarios.create_consumable_redistribution_scenarios import generate_redistribution_scenarios
+
 from tlo.methods.consumables import check_format_of_consumables_file
 
 # Set local shared folder source
@@ -70,8 +73,8 @@
 resourcefilepath = Path("./resources")
 path_for_new_resourcefiles = resourcefilepath / "healthsystem/consumables"
 
+
 # Define necessary functions
-# Functions to clean LMIS data
 def change_colnames(df, NameChangeList):  # Change column names
     ColNames = df.columns
     ColNames2 = ColNames
@@ -328,6 +331,15 @@ def update_level1b_availability(
 
     return availability_df
 
+# Function to count scenarios at any stage of generating the cons availability RF
+def get_max_scenario_number(df: pd.DataFrame) -> int:
+    scenario_nums = [
+        int(m.group(1))
+        for c in df.columns
+        if (m := re.match(r"available_prop_scenario(\d+)$", c))
+    ]
+    return max(scenario_nums) if scenario_nums else 0
+
 # Function to compute average availability by facility level
 def compute_avg_availability_by_var(df: pd.DataFrame = None, # TLO availability dataframe with each row representing one Facility_ID, item, month,
                              mfl: Optional[pd.DataFrame] = None, # Master Facility list mapping Facility_Level to Faciility_ID
@@ -567,7 +579,9 @@ def comparison_plot_by_level(fac_type):
 months_withdata = ['January', 'February', 'April', 'October', 'November']
 months_interpolated = ['March', 'May', 'June', 'July', 'August', 'September', 'December']
 
-# 1B. RESHAPE AND REORDER
+# 2. RESHAPE AND REORDER ##
+#########################################################################################
+
 # Reshape dataframe so that each row represent a unique consumable and facility
 lmis_df_wide = lmis_df.pivot_table(index=['district', 'fac_type_tlo', 'fac_name', 'program', 'item'], columns='month',
                                    values=['closing_bal', 'dispensed', 'received', 'stkout_days', 'amc'],
@@ -582,7 +596,8 @@ def comparison_plot_by_level(fac_type):
 num = lmis_df_wide._get_numeric_data()
 lmis_df_wide[num < 0] = np.nan
 
-# 1C. INTERPOLATE MISSING VALUES ##
+# 3. INTERPOLATE MISSING VALUES ##
+#########################################################################################
 # When stkout_days is empty but closing balance, dispensed and received entries are available
 lmis_df_wide_flat = lmis_df_wide.reset_index()
 count_stkout_entries = lmis_df_wide_flat['stkout_days'].count(axis=1).sum()
@@ -658,6 +673,45 @@ def comparison_plot_by_level(fac_type):
 # TODO check whether there is any issue with the above items_introduced_in_september which only show up from September
 #  onwards
 
+def rename_items_to_address_inconsistentencies(_df, item_dict):
+    """Return a dataframe with rows for the same item with inconsistent names collapsed into one"""
+    # Recode item names appearing from Jan to Aug to the new names adopted from September onwards
+    old_unique_item_count = _df.item.nunique()
+    for item in item_dict:
+        print(len(_df[_df.item == item_dict[item]]), ''' instances of "''', item_dict[item], '''"'''
+                                                                                             ''' changed to "''', item,
+              '''"''')
+        # row_newname = _df.item == item
+        row_oldname = _df.item == item_dict[item]
+        _df.loc[row_oldname, 'item'] = item
+
+    # Make a list of column names to be collapsed using different methods
+    columns_to_sum = [col for col in _df.columns if
+                      col[0].startswith(('amc', 'closing_bal', 'dispensed', 'received', 'stkout_days'))]
+    columns_to_preserve = [col for col in _df.columns if
+                           col[0].startswith(('data_source'))]
+
+    # Define aggregation function to be applied to collapse data by item
+    def custom_agg(x):
+        if x.name in columns_to_sum:
+            return x.sum(skipna=True) if np.any(
+                x.notnull() & (x >= 0)) else np.nan  # this ensures that the NaNs are retained
+        # , i.e. not changed to 0, when the corresponding data for both item name variations are NaN, and when there
+        # is a 0 or positive value for one or both item name variation, the sum is taken.
+        elif x.name in columns_to_preserve:
+            return x.str.cat(
+                sep='')  # for the data_source column, this function concatenates the string values
+
+    # Collapse dataframe
+    _collapsed_df = _df.groupby(['program', 'item', 'district', 'fac_type_tlo', 'fac_name']).agg(
+        {col: custom_agg for col in columns_to_preserve + columns_to_sum}
+    ).reset_index()
+
+    # Test that all items in the dictionary have been found in the dataframe
+    new_unique_item_count = _collapsed_df.item.nunique()
+    assert len(item_dict) == old_unique_item_count - new_unique_item_count
+    return _collapsed_df
+
 # Hold out the dataframe with no naming inconsistencies
 list_of_items_with_inconsistent_names_zipped = set(zip(inconsistent_item_names_mapping.keys(), inconsistent_item_names_mapping.values()))
 list_of_items_with_inconsistent_names = [item for sublist in list_of_items_with_inconsistent_names_zipped for item in sublist]
@@ -670,7 +724,7 @@ def comparison_plot_by_level(fac_type):
 lmis_df_wide_flat = pd.concat([df_without_consistent_item_names_corrected, df_with_consistent_item_names],
                               ignore_index=True)
 
-# 1. --- RULE: 1.If i) stockout is missing, ii) closing_bal, amc and received are not missing , and iii) amc !=0 and,
+# --- 3.1 RULE: 1.If i) stockout is missing, ii) closing_bal, amc and received are not missing , and iii) amc !=0 and,
 #          then stkout_days[m] = (amc[m] - closing_bal[m-1] - received)/amc * number of days in the month ---
 # (Note that the number of entries for closing balance, dispensed and received is always the same)
 for m in range(2, 13):
@@ -706,7 +760,7 @@ def comparison_plot_by_level(fac_type):
 count_stkout_entries = lmis_df_wide_flat['stkout_days'].count(axis=1).sum()
 print(count_stkout_entries, "stockout entries after first interpolation")
 
-# 2. --- If any stockout_days < 0 after the above interpolation, update to stockout_days = 0 ---
+# 3.2 --- If any stockout_days < 0 after the above interpolation, update to stockout_days = 0 ---
 # RULE: If closing balance[previous month] - dispensed[this month] + received[this month] > 0, stockout == 0
 for m in range(1, 13):
     cond1 = lmis_df_wide_flat['stkout_days', months_dict[m]] < 0
@@ -724,7 +778,7 @@ def comparison_plot_by_level(fac_type):
 # Flatten multilevel columns
 lmis_df_wide_flat.columns = [' '.join(col).strip() for col in lmis_df_wide_flat.columns.values]
 
-# 3. --- If the consumable was previously reported and during a given month, if any consumable was reported, assume
+# 3.3 --- If the consumable was previously reported and during a given month, if any consumable was reported, assume
 # 100% days of stckout ---
 # RULE: If the balance on a consumable is ever reported and if any consumables are reported during the month, stkout_
 # days = number of days of the month
@@ -752,7 +806,9 @@ def comparison_plot_by_level(fac_type):
     count_stkout_entries = count_stkout_entries + lmis_df_wide_flat['stkout_days ' + months_dict[m]].count().sum()
 print(count_stkout_entries, "stockout entries after third interpolation")
 
-# 1D. CALCULATE STOCK OUT RATES BY MONTH and FACILITY
+# 4. CALCULATE STOCK OUT RATES BY MONTH and FACILITY ##
+#########################################################################################
+
 lmis = lmis_df_wide_flat  # choose dataframe
 
 # Generate variables denoting the stockout proportion in each month
@@ -771,9 +827,10 @@ def comparison_plot_by_level(fac_type):
                        sep=' ', suffix=r'\w+')
 lmis = lmis.reset_index()
 
-# 2. LOAD CLEANED MATCHED CONSUMABLE LIST FROM TLO MODEL AND MERGE WITH LMIS DATA
-########################################################################################################################
-# 1. --- Load and clean data ---
+# 5. LOAD CLEANED MATCHED CONSUMABLE LIST FROM TLO MODEL AND MERGE WITH LMIS DATA ##
+#########################################################################################
+
+# 5.1 --- Load and clean data ---
 # Import matched list of consumanbles
 consumables_df = pd.read_csv(path_for_new_resourcefiles / 'ResourceFile_consumables_matched.csv', low_memory=False,
                              encoding="ISO-8859-1")
@@ -792,12 +849,49 @@ def comparison_plot_by_level(fac_type):
 
 
 # Update matched consumable name where the name in the OpenLMIS data was updated in September
+def replace_old_item_names_in_lmis_data(_df, item_dict):
+    """Return a dataframe with old LMIS consumable names replaced with the new name"""
+    for item in item_dict:
+        cond_oldname = _df.item == item_dict[item]
+        _df.loc[cond_oldname, 'item'] = item
+    return _df
+
+
 matched_consumables = replace_old_item_names_in_lmis_data(matched_consumables, inconsistent_item_names_mapping)
 
-# 2. --- Merge data with LMIS data ---
+# 5.2 --- Merge data with LMIS data ---
 lmis_matched_df = pd.merge(lmis, matched_consumables, how='inner', on=['item'])
 lmis_matched_df = lmis_matched_df.sort_values('data_source')
 
+
+def collapse_stockout_data(_df, groupby_list, var):
+    """Return a dataframe with rows for the same TLO model item code collapsed into 1"""
+    # Define column lists based on the aggregation function to be applied
+    columns_to_multiply = [var]
+    columns_to_sum = ['closing_bal', 'amc', 'dispensed', 'received']
+    columns_to_preserve = ['data_source', 'consumable_reporting_freq', 'consumables_reported_in_mth']
+
+    # Define aggregation function to be applied to collapse data by item
+    def custom_agg_stkout(x):
+        if x.name in columns_to_multiply:
+            return x.prod(skipna=True) if np.any(
+                x.notnull() & (x >= 0)) else np.nan  # this ensures that the NaNs are retained
+        elif x.name in columns_to_sum:
+            return x.sum(skipna=True) if np.any(
+                x.notnull() & (x >= 0)) else np.nan  # this ensures that the NaNs are retained
+        # , i.e. not changed to 1, when the corresponding data for both item name variations are NaN, and when there
+        # is a 0 or positive value for one or both item name variation, the sum is taken.
+        elif x.name in columns_to_preserve:
+            return x.iloc[0]  # this function extracts the first value
+
+    # Collapse dataframe
+    _collapsed_df = _df.groupby(groupby_list).agg(
+        {col: custom_agg_stkout for col in columns_to_multiply + columns_to_sum + columns_to_preserve}
+    ).reset_index()
+
+    return _collapsed_df
+
+
 # 2.i. For substitable drugs (within drug category), collapse by taking the product of stkout_prop (OR condition)
 # This represents Pr(all substitutes with the item code are stocked out)
 groupby_list1 = ['module_name', 'district', 'fac_type_tlo', 'fac_name', 'month', 'item_code', 'consumable_name_tlo',
@@ -845,9 +939,10 @@ def comparison_plot_by_level(fac_type):
      'available_prop', 'closing_bal', 'amc', 'dispensed', 'received',
      'data_source', 'consumable_reporting_freq', 'consumables_reported_in_mth']]
 
-# 3. ADD STOCKOUT DATA FROM OTHER SOURCES TO COMPLETE STOCKOUT DATAFRAME
-########################################################################################################################
-# --- 1. Generate a dataframe of stock availability for consumables which were not found in the OpenLMIS data but
+# 6. ADD STOCKOUT DATA FROM OTHER SOURCES TO COMPLETE STOCKOUT DATAFRAME ##
+#########################################################################################
+
+# --- 6.1. Generate a dataframe of stock availability for consumables which were not found in the OpenLMIS data but
 # available in the HHFA 2018/19 --- #
 # Save the list of items for which a match was not found in the OpenLMIS data
 unmatched_consumables = consumables_df.drop_duplicates(['item_code'])
@@ -923,13 +1018,13 @@ def comparison_plot_by_level(fac_type):
                   ('available_prop_hhfa', 'available_prop')]
 change_colnames(unmatched_consumables_df, NameChangeList)
 
-# --- 2. Append OpenLMIS stockout dataframe with HHFA stockout dataframe and Extract in .csv format --- #
+# --- 6.2 Append OpenLMIS stockout dataframe with HHFA stockout dataframe and Extract in .csv format --- #
 # Append common consumables stockout dataframe with the main dataframe
 cond = unmatched_consumables_df['available_prop'].notna()
 unmatched_consumables_df.loc[~cond, 'data_source'] = 'Not available'
 stkout_df = pd.concat([stkout_df, unmatched_consumables_df], axis=0, ignore_index=True)
 
-# --- 3. Append stockout rate for facility level 0 from HHFA --- #
+# --- 6.3 Append stockout rate for facility level 0 from HHFA --- #
 cond = hhfa_df['item_code'].notna()
 hhfa_fac0 = hhfa_df[cond][
     ['item_code', 'consumable_name_tlo', 'fac_count_Facility_level_0', 'available_prop_hhfa_Facility_level_0']]
@@ -946,7 +1041,47 @@ def comparison_plot_by_level(fac_type):
 stkout_df = stkout_df[~cond]
 stkout_df = pd.concat([stkout_df, hhfa_fac0], axis=0, ignore_index=True)
 
-# --- 4. Generate new category variable for analysis --- #
+# --- 6.4 Generate new category variable for analysis --- #
+def recategorize_modules_into_consumable_categories(_df):
+    _df['item_category'] = _df['module_name'].str.lower()
+    cond_RH = (_df['item_category'].str.contains('care_of_women_during_pregnancy')) | \
+              (_df['item_category'].str.contains('labour'))
+    cond_newborn = (_df['item_category'].str.contains('newborn'))
+    cond_newborn[cond_newborn.isna()] = False
+    cond_childhood = (_df['item_category'] == 'acute lower respiratory infections') | \
+                     (_df['item_category'] == 'measles') | \
+                     (_df['item_category'] == 'diarrhoea')
+    cond_rti = _df['item_category'] == 'road traffic injuries'
+    cond_cancer = _df['item_category'].str.contains('cancer')
+    cond_cancer[cond_cancer.isna()] = False
+    cond_ncds = (_df['item_category'] == 'epilepsy') | \
+                (_df['item_category'] == 'depression')
+    _df.loc[cond_RH, 'item_category'] = 'reproductive_health'
+    _df.loc[cond_cancer, 'item_category'] = 'cancer'
+    _df.loc[cond_newborn, 'item_category'] = 'neonatal_health'
+    _df.loc[cond_childhood, 'item_category'] = 'other_childhood_illnesses'
+    _df.loc[cond_rti, 'item_category'] = 'road_traffic_injuries'
+    _df.loc[cond_ncds, 'item_category'] = 'ncds'
+    cond_condom = _df['item_code'] == 2
+    _df.loc[cond_condom, 'item_category'] = 'contraception'
+
+    # Create a general consumables category
+    general_cons_list = [300, 33, 57, 58, 141, 5, 6, 10, 21, 23, 127, 24, 80, 93, 144, 149, 154, 40, 67, 73, 76,
+                         82, 101, 103, 88, 126, 135, 71, 98, 171, 133, 134, 244, 247, 49, 112, 1933, 1960]
+    cond_general = _df['item_code'].isin(general_cons_list)
+    _df.loc[cond_general, 'item_category'] = 'general'
+
+    # Fill gaps in categories
+    dict_for_missing_categories = {292: 'acute lower respiratory infections', 293: 'acute lower respiratory infections',
+                                   307: 'reproductive_health', 2019: 'reproductive_health',
+                                   2678: 'tb', 1171: 'other_childhood_illnesses', 1237: 'cancer', 1239: 'cancer'}
+    # Use map to create a new series from item_code to fill missing values in category
+    mapped_categories = _df['item_code'].map(dict_for_missing_categories)
+    # Use fillna on the 'item_category' column to fill missing values using the mapped_categories
+    _df['item_category'] = _df['item_category'].fillna(mapped_categories)
+
+    return _df
+
 stkout_df = recategorize_modules_into_consumable_categories(stkout_df)
 item_code_category_mapping = stkout_df[['item_category', 'item_code']].drop_duplicates()
 
@@ -956,12 +1091,12 @@ def comparison_plot_by_level(fac_type):
 item_designations = item_designations.merge(item_code_category_mapping, left_on = 'Item_Code', right_on = 'item_code', how = 'left', validate = '1:1')
 item_designations.drop(columns = 'item_code').to_csv(path_for_new_resourcefiles  / 'ResourceFile_Consumables_Item_Designations.csv', index = False)
 
-# --- 5. Replace district/fac_name/month entries where missing --- #
+# --- 6.5 Replace district/fac_name/month entries where missing --- #
 for var in ['district', 'fac_name', 'month']:
     cond = stkout_df[var].isna()
     stkout_df.loc[cond, var] = 'Aggregate'
 
-# --- 6. Export final stockout dataframe --- #
+# --- 6.6 Export final stockout dataframe --- #
 # stkout_df.to_csv(path_for_new_resourcefiles / "ResourceFile_Consumables_availability_and_usage.csv")
 # <-- this line commented out as the file is very large.
 
@@ -971,6 +1106,26 @@ def comparison_plot_by_level(fac_type):
 lmis_consumable_usage = stkout_df.copy()
 # TODO Generate a smaller version of this file
 # Collapse individual facilities
+def get_inflow_to_outflow_ratio_by_item_and_facilitylevel(_df):
+    df_by_item_level_month = \
+    _df.groupby(['item_category', 'item_code', 'district', 'fac_type_tlo', 'month'])[
+        ['closing_bal', 'dispensed', 'received']].sum()
+    df_by_item_level_month = df_by_item_level_month.loc[df_by_item_level_month.index.get_level_values('month') != "Aggregate"]
+    # Opening balance in January is the closing balance for the month minus what was received during the month plus what was dispensed
+    opening_bal_january = df_by_item_level_month.loc[df_by_item_level_month.index.get_level_values('month') == 'January', 'closing_bal'] + \
+                          df_by_item_level_month.loc[df_by_item_level_month.index.get_level_values('month') == 'January', 'dispensed'] - \
+                          df_by_item_level_month.loc[df_by_item_level_month.index.get_level_values('month') == 'January', 'received']
+    closing_bal_december = df_by_item_level_month.loc[df_by_item_level_month.index.get_level_values('month') == 'December', 'closing_bal']
+    # the consumable inflow during the year is the opening balance in January + what was received throughout the year - what was transferred to the next year (i.e. closing bal of December)
+    total_consumables_inflow_during_the_year = df_by_item_level_month['received'].groupby(level=['item_category', 'item_code', 'district', 'fac_type_tlo']).sum() +\
+                                             opening_bal_january.reset_index(level='month', drop=True) -\
+                                             closing_bal_december.reset_index(level='month', drop=True)
+    total_consumables_outflow_during_the_year  = df_by_item_level_month['dispensed'].groupby(level=['item_category', 'item_code', 'district', 'fac_type_tlo']).sum()
+    inflow_to_outflow_ratio = total_consumables_inflow_during_the_year.div(total_consumables_outflow_during_the_year, fill_value=1)
+    inflow_to_outflow_ratio.loc[inflow_to_outflow_ratio < 1] = 1  # Ratio can't be less than 1
+
+    return inflow_to_outflow_ratio
+
 inflow_to_outflow_ratio = get_inflow_to_outflow_ratio_by_item_and_facilitylevel(lmis_consumable_usage)
 # Clean values for analysis
 inflow_to_outflow_ratio.loc[inflow_to_outflow_ratio < 1] = 1 # Ratio can't be less than 1
@@ -992,6 +1147,7 @@ def comparison_plot_by_level(fac_type):
 #  the Master Facilities List.
 
 # unify the set within each facility_id
+
 mfl = pd.read_csv(resourcefilepath / "healthsystem" / "organisation" / "ResourceFile_Master_Facilities_List.csv")
 districts = set(pd.read_csv(resourcefilepath / 'demography' / 'ResourceFile_Population_2010.csv')['District'])
 fac_levels = {'0', '1a', '1b', '2', '3', '4'}
@@ -1017,8 +1173,7 @@ def comparison_plot_by_level(fac_type):
 # Take averages (now that 'Mzimba' is mapped-to by both 'Mzimba South' and 'Mzimba North'.)
 sf = sf.groupby(by=['district_std', 'fac_type_tlo', 'month', 'item_code'])['available_prop'].mean().reset_index()
 
-# 4. INTERPOLATE MISSING DATA TO ENSURE DATA IS AVAILABLE FOR ALL ITEMS, MONTHS, LEVELS, DISTRICTS
-########################################################################################################################
+# Fill in missing data:
 # 1) Cities to get same results as their respective regions
 copy_source_to_destination = {
     'Mzimba': 'Mzuzu City',
@@ -1118,10 +1273,26 @@ def comparison_plot_by_level(fac_type):
 full_set = full_set.combine_first(sf_final.set_index(['Facility_ID', 'month', 'item_code'])['available_prop'])
 
 # Fill in the blanks with rules for interpolation.
+
 facilities_by_level = defaultdict(set)
 for ix, row in mfl.iterrows():
     facilities_by_level[row['Facility_Level']].add(row['Facility_ID'])
 
+
+def get_other_facilities_of_same_level(_fac_id):
+    """Return a set of facility_id for other facilities that are of the same level as that provided."""
+    for v in facilities_by_level.values():
+        if _fac_id in v:
+            return v - {_fac_id}
+
+
+def interpolate_missing_with_mean(_ser):
+    """Return a series in which any values that are null are replaced with the mean of the non-missing."""
+    if pd.isnull(_ser).all():
+        raise ValueError
+    return _ser.fillna(_ser.mean())
+
+
 # Create new dataset that include the interpolations (The operation is not done "in place", because the logic is based
 # on what results are missing before the interpolations in other facilities).
 full_set_interpolated = full_set * np.nan
@@ -1168,11 +1339,22 @@ def comparison_plot_by_level(fac_type):
 
 full_set_interpolated = full_set_interpolated.reset_index()
 #full_set_interpolated = full_set_interpolated.reset_index().merge(item_code_category_mapping, on = 'item_code', how = 'left', validate = 'm:1')
+full_set_interpolated.to_csv(
+    path_for_new_resourcefiles / "ResourceFile_Consumables_availability_small.csv",
+    index=False
+) # Save as .csv - this file is then read to apply the following functions - generate_alternative_availability_scenarios
+# & generate_redistribution_scenarios
 
 # 5. ADD ALTERNATIVE AVAILABILITY SCENARIOS
 ########################################################################################################################
 # Add alternative availability scenarios to represent improved or reduce consumable availability
 full_set_interpolated_with_scenarios = generate_alternative_availability_scenarios(full_set_interpolated)
+max_scenario = get_max_scenario_number(full_set_interpolated_with_scenarios) # Get current scenario count
+full_set_interpolated_with_scenarios = generate_redistribution_scenarios(full_set_interpolated_with_scenarios,
+                                                                         scenario_count = max_scenario,
+                                                                         outputfilepath = Path("./outputs/consumables_impact_analysis"))
+available_cols = [c for c in full_set_interpolated_with_scenarios.columns if c.startswith("available_prop")]
+full_set_interpolated_with_scenarios = full_set_interpolated_with_scenarios[['Facility_ID', 'month', 'item_code'] + available_cols]
 
 full_set_interpolated_with_scenarios_level1b_fixed = update_level1b_availability(
     availability_df=full_set_interpolated_with_scenarios,
@@ -1182,8 +1364,13 @@ def comparison_plot_by_level(fac_type):
     weighting = 'district_1b_to_2_ratio',
 )
 
+# Verify that the shape of this dataframe is identical to the original availability dataframe
+assert sorted(set(full_set_interpolated_with_scenarios_level1b_fixed.Facility_ID)) == sorted(set(pd.unique(full_set_interpolated.Facility_ID)))
+assert sorted(set(full_set_interpolated_with_scenarios_level1b_fixed.month)) == sorted(set(pd.unique(full_set_interpolated.month)))
+assert sorted(set(full_set_interpolated_with_scenarios_level1b_fixed.item_code)) == sorted(set(pd.unique(full_set_interpolated.item_code)))
+assert len(full_set_interpolated_with_scenarios_level1b_fixed) == len(full_set_interpolated.item_code)
+
 # Compare availability averages by Facility_Level before and after the 1b fix
-available_cols = [c for c in full_set_interpolated_with_scenarios.columns if c.startswith("available_prop")]
 level1b_fix_plots_path = outputfilepath / 'comparison_plots'
 figurespath_scenarios = outputfilepath / 'consumable_scenarios'
 if not os.path.exists(level1b_fix_plots_path):
@@ -1206,9 +1393,16 @@ def comparison_plot_by_level(fac_type):
     index=False
 )
 
+# Save legacy availability resourcefile before level 1b-2 fix
+check_format_of_consumables_file(df=full_set_interpolated_with_scenarios, fac_ids=fac_ids)
+full_set_interpolated_with_scenarios.to_csv(
+    path_for_new_resourcefiles / "ResourceFile_Consumables_availability_small_original.csv",
+    index=False
+)
+
 # %%
-# 7. COMPARISON WITH HHFA DATA, 2018/19
-########################################################################################################################
+# 7. COMPARISON WITH HHFA DATA, 2018/19 ##
+#########################################################################################
 # --- 7.1 Prepare comparison dataframe --- ##
 # Note that this only plot consumables for which data is available in the HHFA
 # i. Prepare data from HHFA
@@ -1223,7 +1417,7 @@ def comparison_plot_by_level(fac_type):
 hhfa_comparison_df = hhfa_comparison_df.rename({'fac_type_tlo': 'Facility_Level'}, axis=1)
 
 # ii. Collapse final model availability data by facility level
-final_availability_df = full_set_interpolated_with_scenarios_level1b_fixed
+final_availability_df = full_set_interpolated
 mfl = pd.read_csv(resourcefilepath / "healthsystem" / "organisation" / "ResourceFile_Master_Facilities_List.csv")
 final_availability_df = pd.merge(final_availability_df, mfl[['District', 'Facility_Level', 'Facility_ID']], how="left",
                                  on=['Facility_ID'],
@@ -1245,7 +1439,39 @@ def comparison_plot_by_level(fac_type):
 size = 10
 comparison_df['consumable_labels'] = comparison_df['consumable_name_tlo'].str[:10]
 
+# Define function to draw calibration plots at different levels of disaggregation
+def comparison_plot(level_of_disaggregation, group_by_var, colour):
+    comparison_df_agg = comparison_df.groupby([group_by_var],
+                                              as_index=False).agg({'available_prop': 'mean',
+                                                                   'available_prop_hhfa': 'mean',
+                                                                   'Facility_Level': 'first',
+                                                                   'consumable_labels': 'first'})
+    comparison_df_agg['labels'] = comparison_df_agg[level_of_disaggregation]
+
+    ax = comparison_df_agg.plot.scatter('available_prop', 'available_prop_hhfa', c=colour)
+    ax.axline([0, 0], [1, 1])
+    for i, label in enumerate(comparison_df_agg['labels']):
+        plt.annotate(label,
+                     (comparison_df_agg['available_prop'][i] + 0.005,
+                      comparison_df_agg['available_prop_hhfa'][i] + 0.005),
+                     fontsize=6, rotation=38)
+    if level_of_disaggregation != 'aggregate':
+        plt.title('Disaggregated by ' + level_of_disaggregation, fontsize=size, weight="bold")
+    else:
+        plt.title('Aggregate', fontsize=size, weight="bold")
+    plt.xlabel('Pr(drug available) as per TLO model')
+    plt.ylabel('Pr(drug available) as per HHFA')
+    save_name = 'comparison_plots/calibration_to_hhfa_' + level_of_disaggregation + '.png'
+    plt.savefig(outputfilepath / save_name)
+
+
 # 7.2.1 Aggregate plot
+# First create folder in which to store the plots
+
+if not os.path.exists(outputfilepath / 'comparison_plots'):
+    os.makedirs(outputfilepath / 'comparison_plots')
+    print("folder to store Model-HHFA comparison plots created")
+
 comparison_df['aggregate'] = 'aggregate'
 level_of_disaggregation = 'aggregate'
 colour = 'red'
@@ -1264,7 +1490,23 @@ def comparison_plot_by_level(fac_type):
 colour = 'yellow'
 comparison_plot(level_of_disaggregation, group_by_var, colour)
 
+
 # 7.2.4 Plot by item and facility level
+def comparison_plot_by_level(fac_type):
+    cond_fac_type = comparison_df['Facility_Level'] == fac_type
+    comparison_df_by_level = comparison_df[cond_fac_type].reset_index()
+    plt.scatter(comparison_df_by_level['available_prop'],
+                comparison_df_by_level['available_prop_hhfa'])
+    plt.axline([0, 0], [1, 1])
+    for i, label in enumerate(comparison_df_by_level['consumable_labels']):
+        plt.annotate(label, (comparison_df_by_level['available_prop'][i] + 0.005,
+                             comparison_df_by_level['available_prop_hhfa'][i] + 0.005),
+                     fontsize=6, rotation=27)
+    plt.title(fac_type, fontsize=size, weight="bold")
+    plt.xlabel('Pr(drug available) as per TLO model')
+    plt.ylabel('Pr(drug available) as per HHFA')
+
+
 fig = plt.figure(figsize=(22, 22))
 plt.subplot(421)
 comparison_plot_by_level(comparison_df['Facility_Level'].unique()[1])
@@ -1291,7 +1533,10 @@ def comparison_plot_by_level(fac_type):
                 'available_prop_scenario9': 'Best facility \n (including DHO)','available_prop_scenario10': 'HIV supply \n chain', 'available_prop_scenario11': 'EPI supply \n chain',
                 'available_prop_scenario12': 'HIV moved to \n Govt supply chain \n (Avg by Level)', 'available_prop_scenario13': 'HIV moved to \n Govt supply chain  \n (Avg by Facility_ID)',
                 'available_prop_scenario14': 'HIV moved to \n Govt supply chain  \n (Avg by Facility_ID times 1.25)',
-                'available_prop_scenario15': 'HIV moved to \n Govt supply chain  \n (Avg by Facility_ID times 0.75)'}
+                'available_prop_scenario15': 'HIV moved to \n Govt supply chain  \n (Avg by Facility_ID times 0.75)',
+                'available_prop_scenario16': 'Redistribution: District pooling', 'available_prop_scenario17':  'Redistribution: Cluster pooling',
+                'available_prop_scenario18':  'Redistribution: Pairwise (large radius)', 'available_prop_scenario19':  'Redistribution: Pairwise (small radius)',
+}
 
 # Generate descriptive plots of consumable availability
 program_item_mapping = pd.read_csv(path_for_new_resourcefiles  / 'ResourceFile_Consumables_Item_Designations.csv')[['Item_Code', 'item_category']]
@@ -1300,7 +1545,7 @@ def comparison_plot_by_level(fac_type):
                                                        figurespath = figurespath_scenarios,
                                                        mfl = mfl,
                                                        program_item_mapping = program_item_mapping,
-                                                       chosen_availability_columns  = None,
+                                                       chosen_availability_columns  = chosen_availability_columns,
                                                        scenario_names_dict = scenario_names_dict,)
 
 
diff --git a/src/scripts/data_file_processing/healthsystem/consumables/generating_consumable_scenarios/create_consumable_redistribution_scenarios.py b/src/scripts/data_file_processing/healthsystem/consumables/generating_consumable_scenarios/create_consumable_redistribution_scenarios.py
new file mode 100644
index 0000000000..c544a3d0e1
--- /dev/null
+++ b/src/scripts/data_file_processing/healthsystem/consumables/generating_consumable_scenarios/create_consumable_redistribution_scenarios.py
@@ -0,0 +1,2184 @@
+import datetime
+from pathlib import Path
+import pickle
+import calendar
+
+import matplotlib.pyplot as plt
+import seaborn as sns
+import numpy as np
+import pandas as pd
+import time
+import matplotlib.patches as mpatches
+import matplotlib.lines as mlines
+
+from typing import Literal, Optional, Dict, Tuple, Iterable
+import textwrap
+from functools import reduce
+import requests
+from collections import defaultdict
+
+from pulp import LpProblem, LpMaximize, LpVariable, LpBinary, LpStatus, value, lpSum, LpContinuous, PULP_CBC_CMD
+from math import ceil
+
+from scripts.costing.cost_estimation import clean_consumable_name
+
+# define a timestamp for script outputs
+timestamp = datetime.datetime.now().strftime("_%Y_%m_%d_%H_%M")
+
+# print the start time of the script
+print('Script Start', datetime.datetime.now().strftime('%H:%M'))
+
+# define folder pathways
+outputfilepath = Path("./outputs/consumables_impact_analysis")
+resourcefilepath = Path("./resources")
+path_for_new_resourcefiles = resourcefilepath / "healthsystem/consumables"
+# Set local shared drive source
+path_to_share = Path(  # <-- point to the shared folder
+    '/Users/sm2511/CloudStorage/OneDrive-SharedLibraries-ImperialCollegeLondon/TLOModel - WP - Documents/'
+)
+
+def generate_redistribution_scenarios(tlo_availability_df: pd.DataFrame,
+                                    scenario_count: int,
+                                    outputfilepath: Path = Path("./outputs/consumables_impact_analysis")) -> pd.DataFrame:
+    # 1. Import and clean data files
+    #**********************************
+    # Import Cleaned OpenLMIS data from 2018
+    lmis = (pd.read_csv(outputfilepath / "ResourceFile_Consumables_availability_and_usage.csv")
+            [['district', 'fac_type_tlo', 'fac_name', 'month', 'item_code', 'available_prop',
+           'closing_bal', 'amc', 'dispensed', 'received']])
+
+    # Drop duplicated facility, item, month combinations
+    print(lmis.shape, "rows before collapsing duplicates")
+    key_cols = ["district", "item_code", "fac_name", "month"] # keys that define a unique record
+
+    # helper to keep one facility level per group (mode → most common; fallback to first non-null)
+    def _mode_or_first(s: pd.Series):
+        s = s.dropna()
+        if s.empty:
+            return np.nan
+        m = s.mode()
+        return m.iloc[0] if not m.empty else s.iloc[0]
+
+    lmis = (
+        lmis
+        .groupby(key_cols, as_index=False)
+        .agg(
+            closing_bal=("closing_bal", "sum"),
+            dispensed=("dispensed", "sum"),
+            received=("received", "sum"),
+            amc=("amc", "sum"),
+            available_prop=("available_prop", "mean"),
+            fac_type_tlo=("fac_type_tlo", _mode_or_first),   # optional; remove if not needed
+        )
+    )
+
+    print(lmis.shape, "rows after collapsing duplicates")
+
+    # Import data on facility location
+    location = (pd.read_excel(path_to_share / "07 - Data/Facility_GPS_Coordinates/gis_data_for_openlmis/LMISFacilityLocations_raw.xlsx")
+            [['LMIS Facility List', 'LATITUDE', 'LONGITUDE']])
+    # Find duplicates in facilty names in the location dataset
+    duplicates = location[location['LMIS Facility List'].duplicated(keep=False)]
+    location = location.drop(duplicates[duplicates['LATITUDE'].isna()].index).reset_index(drop=True) # Drop those duplicates where location is missing
+    # Import ownership data
+    ownership = (pd.read_csv(path_to_share / "07 - Data/Consumables data/OpenLMIS/lmis_facility_ownership.csv"))[['fac_name', 'fac_owner']]
+    ownership = ownership.drop_duplicates(subset=['fac_name'])
+
+    # Merge OpenLMIS and location and ownership data
+    lmis = lmis.merge(location, left_on='fac_name', right_on = 'LMIS Facility List', how = 'left', validate='m:1')
+    lmis = lmis.merge(ownership, on='fac_name', how = 'left', validate='m:1')
+    lmis.rename(columns = {'LATITUDE':'lat', 'LONGITUDE':'long', 'fac_type_tlo': 'Facility_Level'}, inplace = True)
+
+    # Cleaning to match date to the same format as consumable availability RF in TLO model
+    month_map = {
+        "January": 1, "February": 2, "March": 3, "April": 4,
+        "May": 5, "June": 6, "July": 7, "August": 8,
+        "September": 9, "October": 10, "November": 11, "December": 12
+    }
+    lmis["month"] = lmis["month"].map(month_map)
+    lmis["Facility_Level"] = lmis["Facility_Level"].str.replace("Facility_level_", "", regex=False)
+
+    # Clean data types before analysis
+    # 1) Normalize fac_name
+    lmis["fac_name"] = (
+        lmis["fac_name"]
+        .astype("string")            # Pandas string dtype (not just object)
+        .str.normalize("NFKC")       # unify unicode forms
+        .str.strip()                 # trim leading/trailing spaces
+        .str.replace(r"\s+", "_", regex=True)  # collapse internal whitespace
+    )
+
+    # 2) Normalize other key columns used in grouping/joins
+    lmis["item_code"] = lmis["item_code"].astype("string").str.strip()
+    lmis["district"] = lmis["district"].astype("string").str.strip().str.replace(r"\s+", "_", regex=True)
+    lmis["Facility_Level"] = lmis["Facility_Level"].astype("string").str.strip()
+
+    # 3) Ensure numeric types (quietly coerce bad strings to NaN)
+    lmis["amc"] = pd.to_numeric(lmis["amc"], errors="coerce")
+    lmis["closing_bal"] = pd.to_numeric(lmis["closing_bal"], errors="coerce")
+
+    # Keep only those facilities whose location is available
+    old_facility_count = lmis.fac_name.nunique()
+    lmis = lmis[lmis.lat.notna()]
+    new_facility_count = lmis.fac_name.nunique()
+    print(f"{old_facility_count - new_facility_count} facilities out of {old_facility_count} in the lmis data dropped due to "
+          f"missing location information")
+
+    # Explore missingness
+    def compute_opening_balance(df: pd.DataFrame) -> pd.Series:
+        """
+        Compute opening balance from same-month records.
+
+        Formula:
+            OB = closing_bal - received + dispensed
+        Any negative OB values are replaced with 0.
+        Equivalent to: OB_(m) = CB_(m-1)
+        """
+        ob = df["closing_bal"] - df["received"] + df["dispensed"]
+        return ob.clip(lower=0)
+
+    # 1. Compute opening balance
+    lmis["opening_bal"] = compute_opening_balance(lmis).replace([np.inf, -np.inf], np.nan).fillna(0.0)
+    # Mechanistic probability (p_mech = OB / AMC)
+    amc_safe = np.maximum(1e-6, lmis["amc"].astype(float))
+    lmis["p_mech"] = np.clip(lmis["opening_bal"] / amc_safe, 0.0, 1.0)
+    # Identify inconsistent rows (where reported p > mechanistic p)
+    mask_inconsistent = lmis["p_mech"] < lmis["available_prop"]
+    # Adjust opening balance upward to match reported availability
+    lmis.loc[mask_inconsistent, "opening_bal"] = (
+        lmis.loc[mask_inconsistent, "available_prop"] * lmis.loc[mask_inconsistent, "amc"]
+    )
+    print(f"Adjusted {mask_inconsistent.sum():,} rows "
+          f"({mask_inconsistent.mean()*100:.2f}%) where recorded availability "
+          f"exceeded mechanistic availability.")
+
+    lmis.reset_index(inplace=True, drop = True)
+
+    # ----------------------------------------------------------------------------------------------------------------------
+    # 1) Data exploration
+    # ----------------------------------------------------------------------------------------------------------------------
+    def generate_stock_adequacy_heatmap(
+        df: pd.DataFrame,
+        figures_path: Path = Path("figures"),
+        filename: str = "heatmap_adequacy_opening_vs_3xamc.png",
+        y_var: str = "district", # the variable on the y-axis of the heatmap
+        value_var: str = "item_code", # the count variable on the basis of which the values are calculated
+        value_label: str = "", # label describing the values in the heatmap
+        include_missing_as_fail: bool = False,   # if True, items with NaN opening/amc count as NOT adequate
+        amc_threshold: float = 3.0,
+        compare: str = "ge" ,        # "ge" for >= threshold*AMC, "le" for <= threshold*AMC
+        decimals: int = 0,
+        cmap: str = "RdYlGn",
+        figsize= None,
+        xtick_rotation: int = 45,
+        ytick_rotation: int = 0,
+        annotation: bool = True,
+        footnote: str = None,
+    ):
+        """
+        Heatmap values: for each (month, district), the % of item_code groups where
+        sum(opening_balance over Facility_ID) >= 3 * sum(amc over Facility_ID).
+        """
+        df = df.copy()
+
+        # --- 1. Ensure month is int and build label ---
+        df["month"] = pd.to_numeric(df["month"], errors="coerce").astype("Int64")
+        df = df.dropna(subset=["month"])
+        df["month"] = df["month"].astype(int)
+
+        df["_month_label"] = df["month"].map(lambda m: calendar.month_abbr[m])
+
+        # ---- 2) Aggregate to (month, district, item_code) over facilities ----
+        agg = (
+            df.groupby(["month", "_month_label", y_var, value_var], dropna=False)
+              .agg(opening_bal=("opening_bal", "sum"),
+                   amc=("amc", "sum"))
+              .reset_index()
+        )
+
+        # Keep:
+        # - 1. all rows where amc != 0
+        # - 2. rows where the (fac_name, item_code) pair never had any non-zero amc
+        # (because this would indicate that their AMC may in fact be zero)
+        # - 3. rows where both Opening balance and AMC are not zero
+        agg = agg[(agg["amc"] != 0)]
+        agg = agg[~((agg["amc"] == 0) & (agg["opening_bal"] == 0))]
+
+        # ---- 3) Adequacy indicator per (month, district, item_code) ----
+        if include_missing_as_fail:
+            # NaNs treated as fail -> fill with NaN-safe compare: set False when either missing
+            ok = agg[["opening_bal", "amc"]].notna().all(axis=1)
+            left = agg["opening_bal"]
+            right = amc_threshold * agg["amc"]
+            if compare == "le":
+                cond = (left <= right) & ok
+            else:  # default to ">="
+                cond = (left >= right) & ok
+        else:
+            valid = agg.dropna(subset=["opening_bal", "amc"])
+            cond = pd.Series(False, index=agg.index)
+            left = valid["opening_bal"]
+            right = amc_threshold * valid["amc"]
+            if compare == "le":
+                cond.loc[valid.index] = left <= right
+            else:
+                cond.loc[valid.index] = left >= right
+
+        agg["condition_met"] = cond.astype(int)
+
+        # --- % meeting condition per (month, district) across item_code ---
+        if include_missing_as_fail:
+            denom = agg.groupby(["month", "_month_label", y_var])[value_var].nunique()
+            numer = agg.groupby(["month", "_month_label", y_var])["condition_met"].sum()
+        else:
+            valid_mask = agg[["opening_bal", "amc"]].notna().all(axis=1)
+            denom = agg[valid_mask].groupby(["month", "_month_label", y_var])[value_var].nunique()
+            numer = agg[valid_mask].groupby(["month", "_month_label", y_var])["condition_met"].sum()
+
+        pct = (numer / denom * 100).replace([np.inf, -np.inf], np.nan).reset_index(name="pct_meeting")
+
+        # ---- 5) Pivot: districts (rows) x months (columns) ----
+        # Sort months by _month_sort and use _month_label as the displayed column name
+        month_order = (
+            pct[["month", "_month_label"]]
+            .drop_duplicates()
+            .sort_values("month")
+            ["_month_label"]
+            .tolist()
+        )
+        heatmap_df = pct.pivot(index=y_var, columns="_month_label", values="pct_meeting")
+        heatmap_df = heatmap_df.reindex(columns=month_order)
+
+        # --- Add average row and column ---
+        # Column average (mean of each month)
+        heatmap_df.loc["Average"] = heatmap_df.mean(axis=0)
+        # Row average (mean of each y_var)
+        heatmap_df["Average"] = heatmap_df.mean(axis=1)
+
+        # Fix overall average (bottom-right)
+        overall_avg = heatmap_df.loc["Average", "Average"]
+        heatmap_df.loc["Average", "Average"] = overall_avg
+
+        # Optional rounding for nicer colorbar ticks (doesn't affect color)
+        if decimals is not None:
+            heatmap_df = heatmap_df.round(decimals)
+
+        # --- Dynamic figure size ---
+        if figsize is None:
+            n_rows = len(heatmap_df)
+            n_cols = len(heatmap_df.columns)
+            height = max(6, n_rows * 0.2)  # taller if many rows
+            width = max(8, n_cols * 0.6)
+            figsize = (width, height)
+
+        # ---- 6) Plot heatmap ----
+        sns.set(font_scale=1.0)
+        fig, ax = plt.subplots(figsize=figsize)
+
+        cbar_kws = value_label
+        hm = sns.heatmap(
+            heatmap_df,
+            cmap=cmap,
+            cbar_kws={"label": value_label},
+            ax=ax,
+            annot=annotation, annot_kws={"size": 10},
+            vmin = 0, vmax = 100)
+
+        ax.set_xlabel("Month")
+        ax.set_ylabel(f"{y_var}")
+        ax.set_xticklabels(ax.get_xticklabels(), rotation=xtick_rotation)
+        ax.set_yticklabels(ax.get_yticklabels(), rotation=ytick_rotation)
+
+        # Keep colorbar ticks plain (no scientific notation)
+        try:
+            cbar_ax = ax.figure.axes[-1]
+            cbar_ax.ticklabel_format(style="plain")
+        except Exception:
+            pass
+
+        # ---- Footnote ----
+        if footnote is not None:
+            fig.subplots_adjust(bottom=0.08)
+            fig.text(
+                0.5, 0.035,
+                footnote,
+                ha="center",
+                va="top",
+                fontsize=10
+            )
+
+        # ---- 7) Save & return ----
+        figures_path.mkdir(parents=True, exist_ok=True)
+        outpath = figures_path / filename
+        plt.savefig(outpath, dpi=300, bbox_inches="tight")
+        plt.close(fig)
+
+        return fig, ax, heatmap_df
+
+    # ----------------------------------------------------------------------------------------------------------------------
+    # 2) Estimate travel time matrix
+    # ----------------------------------------------------------------------------------------------------------------------
+    def _chunk_indices(n: int, chunk: int):
+        """Yield (start, end) index pairs for chunking 0..n-1."""
+        for s in range(0, n, chunk):
+            e = min(n, s + chunk)
+            yield s, e
+
+    def build_travel_time_matrix(
+        fac_df: pd.DataFrame,
+        *,
+        id_col: str = "fac_name",
+        lat_col: str = "lat",
+        lon_col: str = "long",
+        mode: Literal["car", "bicycle"] = "car",
+        backend: Literal["ors", "osrm"] = "ors",
+        # ORS options
+        ors_api_key: Optional[str] = None,
+        ors_base_url: str = "https://api.openrouteservice.org/v2/matrix",
+        # OSRM options (self-hosted or public; note: public often has only 'car')
+        osrm_base_url: str = "https://router.project-osrm.org",
+        osrm_profile_map: dict = None,
+        # matrix request chunking (keeps requests within API limits)
+        max_chunk: int = 40,
+        timeout: int = 60,
+    ) -> pd.DataFrame:
+        """
+        Build an NxN *road* travel-time matrix (minutes) for facilities, by CAR or BICYCLE
+
+        backends:
+          - 'ors'  -> uses OpenRouteService Matrix API (profiles: driving-car, cycling-regular).
+                      Requires ors_api_key. Has rate/size limits; we auto-chunk.
+          - 'osrm' -> uses OSRM 'table' service (profiles typically 'car' or 'bike').
+                      For bicycle, you'll likely need a self-hosted OSRM with the bicycle profile.
+
+        Parameters
+        ----------
+        mode : 'car' | 'bicycle'
+            Travel mode on roads.
+        max_chunk : int
+            Max #origins (and #destinations) per sub-matrix request to keep within API limits.
+
+        Returns
+        -------
+        pd.DataFrame
+            Square DataFrame (minutes), index/columns = facility names.
+        """
+        facs = fac_df[[id_col, lat_col, lon_col]].dropna().drop_duplicates().reset_index(drop=True)
+        ids = facs[id_col].tolist()
+        lats = facs[lat_col].to_numpy()
+        lons = facs[lon_col].to_numpy()
+        n = len(ids)
+
+        T = pd.DataFrame(np.full((n, n), np.nan, dtype=float), index=ids, columns=ids)
+        np.fill_diagonal(T.values, 0.0)
+
+        if n == 0:
+            return T
+
+        if backend == "ors":
+            if ors_api_key is None:
+                raise ValueError("OpenRouteService requires ors_api_key.")
+            profile = "driving-car" if mode == "car" else "cycling-regular"
+
+            # ORS expects [lon, lat]
+            coords = [[float(lons[i]), float(lats[i])] for i in range(n)]
+
+            headers = {"Authorization": ors_api_key, "Content-Type": "application/json"}
+
+            # Chunk both sources and destinations to stay under limits
+            for si, sj in _chunk_indices(n, max_chunk):
+                for di, dj in _chunk_indices(n, max_chunk):
+                    sources = list(range(si, sj))
+                    destinations = list(range(di, dj))
+
+                    body = {
+                        "locations": coords,
+                        "sources": sources,
+                        "destinations": destinations,
+                        "metrics": ["duration"],
+                    }
+                    url = f"{ors_base_url}/{profile}"
+                    r = requests.post(url, json=body, headers=headers, timeout=timeout)
+                    r.raise_for_status()
+                    data = r.json()
+
+                    # durations in seconds; fill submatrix
+                    durs = data.get("durations")
+                    if durs is None:
+                        raise RuntimeError(f"ORS returned no durations for block {si}:{sj} x {di}:{dj}")
+                    sub = (np.array(durs, dtype=float) / 60.0)  # minutes
+                    T.iloc[si:sj, di:dj] = sub
+
+        elif backend == "osrm":
+            # Map desired mode to OSRM profile names
+            if osrm_profile_map is None:
+                osrm_profile_map = {"car": "car", "bicycle": "bike"}
+            profile = osrm_profile_map.get(mode)
+            if profile is None:
+                raise ValueError(f"No OSRM profile mapped for mode='{mode}'.")
+
+            # NOTE: The public OSRM demo often supports *car only*.
+            # For bicycle, run your own OSRM with the bicycle profile.
+            # We use the OSRM 'table' service; POST with 'sources' and 'destinations' indices.
+
+            coords = ";".join([f"{lons[i]},{lats[i]}" for i in range(n)])  # lon,lat
+
+            for si, sj in _chunk_indices(n, max_chunk):
+                for di, dj in _chunk_indices(n, max_chunk):
+                    sources = ";".join(map(str, range(si, sj)))
+                    destinations = ";".join(map(str, range(di, dj)))
+
+                    url = (
+                        f"{osrm_base_url}/table/v1/{profile}/{coords}"
+                        f"?sources={sources}&destinations={destinations}&annotations=duration"
+                    )
+                    r = requests.get(url, timeout=timeout)
+                    r.raise_for_status()
+                    data = r.json()
+
+                    durs = data.get("durations")
+                    if durs is None:
+                        raise RuntimeError(f"OSRM returned no durations for block {si}:{sj} x {di}:{dj}")
+                    sub = (np.array(durs, dtype=float) / 60.0)  # minutes
+                    T.iloc[si:sj, di:dj] = sub
+
+        else:
+            raise ValueError("backend must be 'ors' or 'osrm'.")
+
+        # Safety: replace any residual NaNs (unroutable pairs) with inf or a large number
+        T = T.fillna(np.inf)
+        return T
+
+    # Because ORS and ORSM can only handle a limited number of elements at a time, it's better to run this by district
+    # each of which already has under 50 facilities
+    def build_time_matrices_by_district(
+        df: pd.DataFrame,
+        *,
+        district_col: str = "district",
+        id_col: str = "fac_name",
+        lat_col: str = "lat",
+        lon_col: str = "long",
+        mode: str = "car",
+        backend: str = "osrm",
+        osrm_base_url: str = "https://router.project-osrm.org",
+        ors_api_key: str | None = None,
+        max_chunk: int = 50,               # safe for both OSRM/ORS
+    ) -> dict[str, pd.DataFrame]:
+        """
+        Returns a dict: {district -> square minutes matrix DataFrame}, computed within each district only.
+        """
+        matrices = {}
+        # unique facilities per district (drop duplicates in case of repeated rows)
+        fac_cols = [district_col, id_col, lat_col, lon_col]
+        fac_coords = df[fac_cols].dropna().drop_duplicates()
+
+        for d, facs_d in fac_coords.groupby(district_col):
+            # Skip tiny groups
+            if len(facs_d) < 2:
+                continue
+
+            T = build_travel_time_matrix(
+                fac_df=facs_d[[id_col, lat_col, lon_col]],
+                id_col=id_col, lat_col=lat_col, lon_col=lon_col,
+                mode=mode, backend=backend,
+                osrm_base_url=osrm_base_url,
+                ors_api_key=ors_api_key,
+                max_chunk=max_chunk,
+            )
+            matrices[d] = T
+
+        return matrices
+
+    # -----------------------------------------------
+    # 3) Data prep for redistribution linear program
+    # -----------------------------------------------
+    def presumed_availability(ob, amc, eps=1e-9) -> float:
+        """
+        Presumed likelihood of cons availability =  p = min(1, OB/AMC) at month start (no additional receipts considered
+        at this point in time).
+        """
+        return float(min(1.0, max(0.0, (ob / max(eps, amc)))))
+
+
+    def build_edges_within_radius(
+        time_matrix: pd.DataFrame,
+        max_minutes: float
+    ) -> Dict[str, set]:
+        """
+        For each facility g, return set of receivers f such that T[g,f] <= max_minutes, f != g.
+        """
+        edges = {}
+        for g in time_matrix.index:
+            feasible = set(time_matrix.columns[(time_matrix.loc[g] <= max_minutes) & (time_matrix.columns != g)])
+            edges[g] = feasible
+        return edges
+
+    def build_edges_within_radius_flat(T_by_dist: dict, max_minutes: float) -> dict[str, set[str]]:
+        """
+        Takes the district-wise dictionary of time travel matrices and converts it into a flat dictionary of facilities
+        and their edge neighbours depending on the maximum allowable travel distance.
+        T_by_dist: {district -> square DataFrame of minutes (index/cols = facility IDs)}
+        Returns: {facility_id -> set(of facility_ids)} for all districts combined.
+        """
+        edges: dict[str, set[str]] = {}
+        for _, T in T_by_dist.items():
+            for g in T.index:
+                row = T.loc[g].to_numpy()
+                feasible_mask = (row <= max_minutes) & np.isfinite(row)
+                # Exclude self
+                feasible = [f for f in T.columns[feasible_mask] if f != g]
+                if g not in edges:
+                    edges[g] = set()
+                edges[g].update(feasible)
+        return edges
+
+    # a = build_edges_within_radius(T_car, max_minutes = 18)
+
+    # Defining clusters of health facilities within district
+    # This function helps find the facilities which would be appropriate cluster centers
+    def _farthest_first_seeds(T: pd.DataFrame, k: int, big: float = 1e9) -> list:
+        """
+        Pick k seed medoids via farthest-first traversal on a travel-time matrix.
+        Treat inf/NaN distances as 'big' so disconnected components get separate seeds.
+        """
+        n = T.shape[0]
+        facs = T.index.tolist()
+        D = T.to_numpy().astype(float)
+        D[~np.isfinite(D)] = big
+
+        # Start at the row with largest average distance (covers sparse areas first)
+        start = int(np.nanargmax(np.nanmean(D, axis=1))) # the remotest facility
+        seeds_idx = [start]
+
+        # Iteratively add the point with max distance to its nearest seed
+        for _ in range(1, k):
+            # min distance to any existing seed for every point
+            min_to_seed = np.min(D[:, seeds_idx], axis=1) # this has a dimension of [facs, number of seeds]
+            next_idx = int(np.argmax(min_to_seed)) # for each facility find the distance to its nearest seed
+            # and the facility farthest from the nearest seed becomes the next seed
+            if next_idx in seeds_idx:
+                # Fallback: pick any non-seed with highest min_to_seed
+                candidates = [i for i in range(n) if i not in seeds_idx]
+                if not candidates:
+                    break
+                next_idx = int(candidates[np.argmax(min_to_seed[candidates])])
+            seeds_idx.append(next_idx)
+
+        return [facs[i] for i in seeds_idx] # list of length k representing the clustering points
+
+    # Assign each facility to its nearest seed subject to a hard cluster capacity (≤ X members)
+    def _assign_to_cluster_with_fixed_capacity(T: pd.DataFrame, seeds: list, capacity: int, big: float = 1e9) -> Dict[str, int]:
+        """
+        Greedy assignment of facilities to nearest seed that still has capacity (based on maximum cluster size).
+        Returns: mapping facility -> seed_index (position in seeds list).
+        """
+        facs = T.index.tolist()
+        D = T.loc[facs, seeds].to_numpy().astype(float) # Distance of all facilities from the k seeds
+        D[~np.isfinite(D)] = big
+
+        # each facility: nearest distance to any seed (for stable ordering)
+        nearest = D.min(axis=1) # find the shortest distance to a cluster for each facility
+        order = np.argsort(nearest)  # sort all facilities in ascending order of their distance from the nearest facility
+
+        cap_left = {j: capacity for j in range(len(seeds))} # the capacity left for each seed
+        assign = {}
+
+        for idx in order:
+            f = facs[idx]
+            # try seeds in ascending distance
+            seq = np.argsort(D[idx, :]) # the sequence of seeds most suitable for idx
+            placed = False
+            for j in seq:
+                if cap_left[j] > 0:
+                    assign[f] = j
+                    cap_left[j] -= 1
+                    placed = True
+                    break
+            if not placed:
+                # total capacity >= n, so this should be rare; if it happens, put in least-loaded seed
+                j = min(cap_left, key=lambda jj: cap_left[jj])
+                assign[f] = j
+                cap_left[j] -= 1
+
+        return assign
+
+    def capacity_clusters_for_district(
+        T_d: pd.DataFrame, cluster_size: int = 5, big: float = 1e9, refine_swaps: int = 0
+    ) -> Dict[str, str]:
+        """
+        Build ~equal-size clusters (size<=cluster_size) from a district's travel-time matrix via
+        capacity-constrained k-medoids (farthest-first seeds + greedy capacity assignment).
+
+        Returns: {facility_id -> cluster_id} (cluster ids like 'C00','C01',...)
+        """
+        facs = T_d.index.tolist()
+        n = len(facs)
+        if n == 0:
+            return {}
+        if n <= cluster_size:
+            return {f: "C00" for f in facs}
+
+        k = ceil(n / cluster_size)
+        seeds = _farthest_first_seeds(T_d, k=k, big=big)
+        assign_seed_idx = _assign_to_cluster_with_fixed_capacity(T_d, seeds=seeds, capacity=cluster_size, big=big)
+
+        # Optional tiny refinement: (off by default)
+        # You could add 1–2 passes of medoid swap within clusters to reduce intra-cluster travel.
+
+        # Build cluster ids in seed order
+        seed_to_cid = {j: f"C{j:02d}" for j in range(len(seeds))}
+        return {f: seed_to_cid[assign_seed_idx[f]] for f in facs}
+
+    def build_capacity_clusters_all(
+        T_by_dist: Dict[str, pd.DataFrame], cluster_size: int = 5
+    ) -> pd.Series:
+        """
+        Apply capacity clustering to all districts.
+        Args:
+          T_by_dist: {'DistrictName': time_matrix (minutes, square DF with facility ids)}
+          cluster_size: desired max cluster size (e.g., 5)
+
+        Returns:
+          pd.Series mapping facility_id -> cluster_id names scoped by district, e.g. 'Nkhotakota#C03'
+        """
+        mappings = []
+        for d, T_d in T_by_dist.items():
+            if T_d is None or T_d.empty:
+                continue
+            local_map = capacity_clusters_for_district(T_d, cluster_size=cluster_size)
+            if not local_map:
+                continue
+            s = pd.Series(local_map, name="cluster_id")
+            s = s.map(lambda cid: f"{d}#{cid}")  # scope cluster name by district
+            mappings.append(s)
+        if not mappings:
+            return pd.Series(dtype=object)
+        return pd.concat(mappings)
+
+    # -----------------------------------------------
+    # 3) LP/MILP Redistribution
+    # -----------------------------------------------
+    def redistribute_radius_lp(
+        df: pd.DataFrame,
+        time_matrix: Dict[str, pd.DataFrame] | pd.DataFrame,
+        radius_minutes: float,
+        # policy knobs
+        tau_keep: float = 1.0,                # donors must keep ≥ tau_keep * AMC
+        tau_tar: float = 1.0,                 # receivers target OB = AMC
+        K_in: int = 1,                        # per-item: max donors per receiver
+        K_out: int = 10,                       # per-item: max receivers per donor
+        Qmin_proportion: float = 0.25,        # min lot as a fraction of receiver AMC (e.g., 0.25 ≈ 7–8 days)
+        eligible_levels: Iterable[str] = ("1a","1b"),
+        # schema
+        id_cols=("district","month","item_code"),
+        facility_col="fac_name",
+        level_col="Facility_Level",
+        amc_col="amc",
+        # outputs/behaviour
+        return_edge_log: bool = True,
+        floor_to_baseline: bool = True,       # if True, never let reported availability drop below baseline
+        # numerics
+        amc_eps: float = 1e-6,
+        eps: float = 1e-9,
+    ) -> Tuple[pd.DataFrame, Optional[pd.DataFrame]]:
+        """
+        Pairwise (radius) redistribution with per-item degree caps.
+
+        MILP per (district, month, item):
+          variables: t[g,f] ≥ 0 (transfer), y[g,f] ∈ {0,1} (edge activation)
+          objective: maximize Σ_p
+          key constraints:
+            - donors keep ≥ tau_keep * AMC
+            - receivers (only eligible levels) limited to deficit: tau_tar*AMC - OB
+            - travel-time ≤ radius
+            - min lot: t[g,f] ≥ Qmin * y[g,f], with Qmin = Qmin_proportion * AMC_receiver
+            - edge capacity: t[g,f] ≤ min(surplus_g, deficit_f) * y[g,f]
+            - degree caps per item: inbound ≤ K_in, outbound ≤ K_out
+        Availability is recomputed mechanistically and written back **only** where a transfer occurred.
+        """
+        out = df.copy()
+
+        # Opening balance
+        out["OB"] = out["opening_bal"]
+        # Preserve only necessary columns
+        selected_cols = list(id_cols) + [level_col, facility_col, 'OB', amc_col, 'available_prop']
+        out = out[selected_cols]
+
+        # clean AMC & level
+        out[amc_col] = pd.to_numeric(out[amc_col], errors="coerce").fillna(0.0)
+        out[level_col] = out[level_col].astype(str)
+
+        # Container for updated OB and edge log
+        out["OB_prime"] = out["OB"]
+        edge_rows = [] if return_edge_log else None
+
+        # Group at (district, month, item_code)
+        group_cols = list(id_cols)
+
+        skipped_nodes = []
+        for (d, m, i), g in out.groupby(group_cols, sort=False):
+            g = g.copy()
+
+            # --- Pick the travel-time matrix slice ---
+            if isinstance(time_matrix, dict):
+                T_d = time_matrix.get(d)
+                if T_d is None or T_d.empty:
+                    continue
+            else:
+                T_d = time_matrix  # single matrix for all, if you pass one
+
+            facs_slice = g[facility_col].dropna().unique().tolist()
+            facs = [f for f in facs_slice if f in T_d.index and f in T_d.columns]
+            if len(facs) < 2:
+                continue
+
+            T_sub = T_d.loc[facs, facs].replace(np.nan, np.inf)
+
+            # --- Pull per-fac data for this item ---
+            AMC = (g.set_index(facility_col)[amc_col].astype(float)
+                     .replace([np.inf, -np.inf], np.nan).fillna(0.0))
+            OB0 = (g.set_index(facility_col)["OB"].astype(float)
+                     .replace([np.inf, -np.inf], np.nan).fillna(0.0))
+            LVL = (g.set_index(facility_col)[level_col].astype(str))
+
+
+            # Align to facs and guard AMC
+            AMC = AMC.reindex(facs).fillna(0.0)
+            AMC_guard = AMC.copy()
+            AMC_guard[AMC_guard <= 0.0] = amc_eps
+            OB0 = OB0.reindex(facs).fillna(0.0)
+            LVL = LVL.reindex(facs)
+
+            # --- Surplus / deficit ---
+            surplus = np.maximum(0.0, OB0.values - tau_keep * AMC_guard.values)  # donors
+            deficit = np.maximum(0.0, tau_tar * AMC_guard.values - OB0.values)   # receivers
+
+            # Leave AMC == 0 untouched
+            #recv_pos_mask = AMC.values > amc_eps  # forbid AMC≈0 from receiving
+            #deficit = np.where(recv_pos_mask, deficit0, 0.0)
+
+            # Only eligible levels can receive
+            elig_recv = LVL.isin(eligible_levels).values
+            deficit = np.where(elig_recv, deficit, 0.0)
+
+            donors = [f for f, s in zip(facs, surplus) if s > eps]
+            recvs  = [f for f, h in zip(facs, deficit) if h > eps]
+            if not donors or not recvs:
+                continue
+
+            s_map = dict(zip(facs, surplus))
+            h_map = dict(zip(facs, deficit))
+            qmin_map = dict(zip(facs, Qmin_proportion * AMC_guard.values))
+
+            # --- Feasible edges (within radius), HARD PRUNE if capacity < qmin ---
+            M_edge = {}   # capacity per edge
+            Qmin = {}     # min lot per edge
+            for g_fac in donors:
+                row = T_sub.loc[g_fac].to_numpy()
+                feas_idx = np.where((row <= radius_minutes) & np.isfinite(row))[0]
+                for idx in feas_idx:
+                    f_fac = T_sub.columns[idx]
+                    if f_fac == g_fac or f_fac not in recvs:
+                        continue
+                    M = min(s_map[g_fac], h_map[f_fac])
+                    if not np.isfinite(M) or M <= eps:
+                        continue
+                    qmin = float(qmin_map[f_fac])
+                    if not np.isfinite(qmin) or qmin <= eps or M < qmin:
+                        # cannot move at least qmin -> drop the edge
+                        continue
+                    M_edge[(g_fac, f_fac)] = float(M)
+                    Qmin[(g_fac, f_fac)] = float(qmin)
+
+            if not M_edge:
+                continue
+
+            # --- MILP (per item) ---
+            prob = LpProblem(f"Radius_{d}_{m}_{i}", LpMaximize)
+
+            # decision vars
+            t = {e: LpVariable(f"t_{e[0]}->{e[1]}", lowBound=0, upBound=M_edge[e], cat=LpContinuous)
+                 for e in M_edge.keys()}
+            y = {e: LpVariable(f"y_{e[0]}->{e[1]}", lowBound=0, upBound=1, cat=LpBinary)
+                 for e in M_edge.keys()}
+            p = {f: LpVariable(f"p_{f}", lowBound=0, upBound=1) for f in
+                 facs}  # or only for eligible receivers
+
+            # objective: maximize total shipped
+            prob += lpSum(p[f] for f in recvs)  # or for level-eligible facilities
+
+            AMC_guard = AMC.reindex(facs).fillna(0.0)
+            AMC_guard[AMC_guard <= 0.0] = amc_eps
+
+            # donor outflow caps (per item)
+            for g_fac in donors:
+                vars_out = [t[(g_fac, f_fac)] for (gg, f_fac) in t.keys() if gg == g_fac]
+                if vars_out:
+                    s_cap = float(max(0.0, OB0[g_fac] - tau_keep * AMC_guard[g_fac]))
+                    prob += lpSum(vars_out) <= s_cap
+
+            # receiver inflow caps (per item; eligibility already enforced)
+            for f_fac in recvs:
+                vars_in = [t[(g_fac, f_fac)] for (g_fac, ff) in t.keys() if ff == f_fac]
+                if vars_in:
+                    h_cap = float(max(0.0, tau_tar * AMC_guard[f_fac] - OB0[f_fac]))
+                    prob += lpSum(vars_in) <= h_cap
+
+            # link t & y + min-lot
+            for e, M in M_edge.items():
+                prob += t[e] <= M * y[e]
+                qmin = min(Qmin[e], M_edge[e]) # TODO should this be qmin = Qmin[e]?
+                if qmin > eps:
+                    prob += t[e] >= qmin * y[e]
+
+            # per-item degree caps
+            for f_fac in recvs:
+                inbound_y = [y[(g_fac, f_fac)] for (g_fac, ff) in y.keys() if ff == f_fac]
+                if inbound_y:
+                    prob += lpSum(inbound_y) <= K_in
+            for g_fac in donors:
+                outbound_y = [y[(g_fac, f_fac)] for (gg, f_fac) in y.keys() if gg == g_fac]
+                if outbound_y:
+                    prob += lpSum(outbound_y) <= K_out
+
+            # 3) Availability linearization per facility
+            # Need inflow and outflow expressions from your t[(g,f)]
+            for f_fac in facs:
+                inflow = lpSum(t[(g_fac, f_fac)] for (g_fac, ff) in t.keys() if ff == f_fac)
+                outflow = lpSum(t[(f_fac, h_fac)] for (gg, h_fac) in t.keys() if gg == f_fac)
+                prob += float(AMC_guard[f_fac]) * p[f_fac] <= float(OB0.get(f_fac, 0.0)) + inflow - outflow
+
+            # solve
+            status = prob.solve(PULP_CBC_CMD(msg=False, cuts=0, presolve=True, threads=1))
+            if LpStatus[prob.status] != "Optimal":
+                skipped_nodes.append((d, m, i))
+                continue
+
+            # --- Apply transfers & log ---
+            delta = {f: 0.0 for f in facs}  # net change in OB by facility (this item)
+            any_transfer = False
+            for (g_fac, f_fac), var in t.items():
+                moved = float(value(var) or 0.0)
+                if moved > eps:
+                    any_transfer = True
+                    delta[g_fac] -= moved
+                    delta[f_fac] += moved
+                    if return_edge_log:
+                        tm = float(T_sub.loc[g_fac, f_fac]) if np.isfinite(T_sub.loc[g_fac, f_fac]) else np.nan
+                        edge_rows.append({
+                            "district": d, "month": m, "item_code": i,
+                            "donor_fac": g_fac, "receiver_fac": f_fac,
+                            "units_moved": moved, "travel_minutes": tm
+                        })
+
+            if not any_transfer:
+                continue
+
+            sel = (out["district"].eq(d) & out["month"].eq(m) & out["item_code"].eq(i))
+            out.loc[sel, "OB_prime"] = out.loc[sel].apply(
+                lambda r: r["OB"] + delta.get(r[facility_col], 0.0),
+                axis=1
+            )
+        print("Skipped ", len(skipped_nodes), "district-month-item combinations - no optimal solution")
+
+        # ---------- Availability: update ONLY where positive transfers happened ----------
+        changed_mask = (out["OB_prime"] - out["OB"]) > 1e-6
+        denom = np.maximum(amc_eps, out[amc_col].astype(float).values)
+        p_mech = np.minimum(1.0, np.maximum(0.0, out["OB_prime"].values / denom))
+
+        # start from baseline
+        new_p = out["available_prop"].astype(float).values if floor_to_baseline else out["available_prop"].astype(float).values.copy()
+        # update only changed rows; optionally floor to baseline
+        if floor_to_baseline:
+            new_p[changed_mask] = np.maximum(p_mech[changed_mask], out["available_prop"].astype(float).values[changed_mask])
+        else:
+            new_p[changed_mask] = p_mech[changed_mask]
+
+        # force non-eligible levels to keep baseline (mirrors pooling)
+        non_elig = ~out[level_col].isin(eligible_levels)
+        new_p[non_elig] = out.loc[non_elig, "available_prop"].astype(float).values
+
+        out["available_prop_redis"] = new_p
+
+        edge_df = pd.DataFrame(edge_rows) if return_edge_log else None
+        return out, edge_df
+
+    def redistribute_pooling_lp(
+        df: pd.DataFrame,
+        tau_min: float = 0.25,   # lower floor in "months of demand" (≈ 7–8 days) - minimum transfer required
+        tau_max: float = 3.0,    # upper ceiling (storage/policy max)
+        tau_donor_keep: float = 3.0, # minimum the donor keeps before donating
+        id_cols=("district","month","item_code"),
+        facility_col="fac_name",
+        level_col="Facility_Level",
+        amc_col="amc",
+        close_cols=("closing_bal","received","dispensed"),
+        amc_eps: float = 1e-6,                  # threshold to treat AMC as "zero"
+        return_move_log: bool = True, # return a detailed df showing net movement of consumables after redistribution
+        pooling_level: str = "district",  # "district" or "cluster"
+        cluster_map: pd.Series | None = None,  # required if pooling_level=="cluster"; this specifes which cluster each facility belongs to
+        floor_to_baseline: bool = True # if True, report availability floored to baseline (no decrease in outputs)
+    ) -> pd.DataFrame:
+        """
+        Scenario 3: district-level pooling/push .
+        Maximizes total availability with:
+          - NaN/inf guards on AMC/OB
+          - duplicate facility IDs collapsed within group
+          - floors scaled if total stock < sum floors
+          - optional 'excess' sink if total stock > sum ceilings
+          - availability computed safely; AMC≈0 rows keep baseline (optional)
+
+        Pooling redistribution that can operate at the district level (default)
+        or within fixed-size clusters inside districts.
+
+        If pooling_level == "cluster", you must pass cluster_map: Series mapping facility_id -> cluster_id
+        (cluster ids should already be district-scoped, e.g., "Dedza#C01").
+
+            Returns:
+          - out: original df plus columns:
+              OB, OB_prime, available_prop_redis, received_from_pool
+            where received_from_pool = OB_prime - OB (pos=received, neg=donated)
+          - (optional) move_log: per (district, month, item, facility) net movement summary
+        """
+        if pooling_level not in ("district", "cluster"):
+            raise ValueError("pooling_level must be 'district' or 'cluster'.")
+
+        closing_bal, received, dispensed = close_cols
+        out = df.copy()
+
+        # Opening balance
+        # Ensure OB is consistent with observed availability
+        amc_safe = np.maximum(1e-6, lmis["amc"].astype(float))
+        lmis["p_mech"] = np.clip(lmis["opening_bal"] / amc_safe, 0.0, 1.0)
+
+        mask_inconsistent = lmis["p_mech"] < lmis["available_prop"]
+        lmis.loc[mask_inconsistent, "opening_bal"] = (
+            lmis.loc[mask_inconsistent, "available_prop"] * lmis.loc[mask_inconsistent, "amc"]
+        )
+        out["OB"] = out["opening_bal"]
+
+        # Default (will overwrite per group)
+        out["OB_prime"] = out["OB"]
+
+        # Attach cluster if needed
+        if pooling_level == "cluster":
+            if cluster_map is None:
+                raise ValueError("cluster_map is required when pooling_level='cluster'.")
+            # cluster_map: index = facility_id (facility_col), value = cluster_id (already district-scoped)
+            out = out.merge(
+                cluster_map.rename("cluster_id"),
+                how="left",
+                left_on=facility_col,
+                right_index=True,
+            )
+            if out["cluster_id"].isna().any():
+                # facilities missing a cluster—assign singleton clusters to keep them
+                out["cluster_id"] = out["cluster_id"].fillna(
+                    out["district"].astype(str) + "#CXX_" + out[facility_col].astype(str))
+
+        group_cols = list(id_cols)
+        node_label = "district"
+        if pooling_level == "cluster":
+            group_cols = ["cluster_id", "month", "item_code"]
+            node_label = "cluster_id"
+        move_rows = []  # optional movement log
+        # TODO could remove the movement log
+
+        skipped_nodes = []  # collect nodes that did NOT solve optimally
+        for keys, g in out.groupby(group_cols, sort=False):
+            g = g.copy()
+            # Resolve node ID for logging and selection masks
+            if pooling_level == "district":
+                node_val, m, i = g["district"].iloc[0], keys[1], keys[2]
+            else:
+                node_val, m, i = keys
+
+            # Build per-facility Series (unique index) for AMC, OB, Level, Baseline p
+            AMC = (g.set_index(facility_col)[amc_col]
+                   .astype(float).replace([np.inf, -np.inf], np.nan).fillna(0.0))
+            OB0 = (g.set_index(facility_col)["OB"]
+                   .astype(float).replace([np.inf, -np.inf], np.nan).fillna(0.0))
+            LVL = (g.set_index(facility_col)[level_col].astype(str)
+                   .replace({np.nan: ""}))
+
+            # collapse duplicates if any
+            if AMC.index.duplicated().any():
+                AMC = AMC[~AMC.index.duplicated(keep="first")]
+            if LVL.index.duplicated().any():
+                LVL = LVL[~LVL.index.duplicated(keep="first")]
+            if OB0.index.duplicated().any():
+                OB0 = OB0.groupby(level=0).sum()
+
+            total_stock = float(OB0.sum())
+            if total_stock <= 1e-9:
+                continue
+
+            # Participants (positive demand)
+            mask_pos = AMC >= amc_eps
+            facs_pos = AMC.index[mask_pos].tolist()
+            if not facs_pos:
+                # nothing to reallocate to; they will be donors only (handled by OB' defaults)
+                continue
+
+            AMC_pos = AMC.loc[facs_pos]
+            OB0_pos = OB0.loc[facs_pos]
+            LVL_pos = LVL.reindex(facs_pos)
+
+            # policy floors/ceilings
+            tau_min_floor = (tau_min * AMC_pos).astype(float)
+            donor_protect  = np.minimum(OB0_pos, tau_donor_keep * AMC_pos)  # retain min(OB, tau_donor_keep*AMC)
+            LB0 = np.maximum(tau_min_floor, donor_protect)
+
+            UB = (tau_max * AMC_pos).astype(float)
+            UB.loc[~LVL_pos.isin(["1a", "1b"])] = np.minimum(
+                OB0_pos.loc[~LVL_pos.isin(["1a", "1b"])],
+                UB.loc[~LVL_pos.isin(["1a", "1b"])]
+            )
+
+            # Feasibility: scale only the tau_min component if sum LB > total_stock
+            sum_LB0 = float(LB0.sum())
+            if total_stock + 1e-9 < sum_LB0:
+                # Scale down the tau_min part (not the donor protection)
+                base_guard = donor_protect
+                extra = np.maximum(0.0, tau_min_floor - np.minimum(tau_min_floor, base_guard))
+                need = float(extra.sum())
+                budget = total_stock - float(base_guard.sum())
+                scale = 0.0 if need <= 1e-12 else max(0.0, min(1.0, budget / max(1e-9, need)))
+                tau_min_scaled = np.minimum(base_guard, tau_min_floor) + extra * scale
+                LB = np.maximum(base_guard, tau_min_scaled)
+            else:
+                LB = LB0
+
+            # ---- Excess sink if ceilings bind
+            sum_UB = float(UB.sum())
+            allow_excess_sink = total_stock > sum_UB + 1e-9
+
+            # 1) Per-facility feasibility guard
+            bad = LB > UB + 1e-12
+            if bad.any():
+                # clip LB to UB; if that still leaves negative room, the facility is degenerate
+                LB = np.minimum(LB, UB - 1e-9)
+
+            # ---------- LP ----------
+            prob = LpProblem(f"Pooling_{node_val}_{m}_{i}", LpMaximize)
+            x = {f: LpVariable(f"x_{f}", lowBound=0) for f in facs_pos}
+            p = {f: LpVariable(f"p_{f}", lowBound=0, upBound=1) for f in facs_pos}
+            excess = LpVariable("excess", lowBound=0) if allow_excess_sink else None
+            # note that even though facilities with AMC == 0 are not considered for optimisation, their postive OB is
+            # included in the total stock
+
+            # Objective: maximize total availability
+            prob += lpSum(p.values())
+
+            # Conservation
+            if excess is None:
+                prob += lpSum(x.values()) == total_stock
+            else:
+                prob += lpSum(x.values()) + excess == total_stock
+
+            # Bounds + linearization
+            for f in facs_pos:
+                prob += x[f] >= float(LB.loc[f])  # donor protection + tau_min (scaled) + (optional) no-harm
+                prob += x[f] <= float(UB.loc[f])  # eligibility-aware ceiling
+                prob += float(max(AMC_pos.loc[f], amc_eps)) * p[f] <= x[f] # TODO CHECK max(AMC_pos.loc[f], amc_eps) or just AMC_pos
+
+            # Solve
+            prob.solve(PULP_CBC_CMD(msg=False, cuts=0, presolve=True, threads=1))
+            if LpStatus[prob.status] != "Optimal":
+                skipped_nodes.append((node_val, m, i))
+                #print("NO Optimal solution found", node_val, m, i)
+                continue
+            #else:
+                #print("Optimal solution found", node_val, m, i)
+
+            # Apply solution to OB'
+            x_sol = {f: float(value(var) or 0.0) for f, var in x.items()}
+
+            # Selection mask for writing back
+            if pooling_level == "district":
+                sel = (out["district"].eq(node_val) & out["month"].eq(m) & out["item_code"].eq(i))
+            else:
+                sel = (out["cluster_id"].eq(node_val) & out["month"].eq(m) & out["item_code"].eq(i))
+
+            # Facilities with AMC>=eps get x_f
+            mask_rows_pos = sel & out[facility_col].isin(facs_pos)
+            out.loc[mask_rows_pos, "OB_prime"] = out.loc[mask_rows_pos, facility_col].map(x_sol).values
+
+            # Facilities with AMC<eps donate entirely: OB' = min(OB, tau_donor_keep*AMC) but since AMC≈0, pLB=0 -> OB' = 0
+            # (this matches the "donate to pool" assumption)
+            mask_rows_zero = sel & ~out[facility_col].isin(facs_pos)
+            out.loc[mask_rows_zero, "OB_prime"] = 0.0
+
+            if return_move_log:
+                for f in AMC.index:  # include amc≈0 facilities (x_f=0)
+                    x_f = x_sol.get(f, 0.0) if f in facs_pos else 0.0
+                    net = x_f - float(OB0.get(f, 0.0))
+                    move_rows.append({
+                        node_label: node_val,
+                        "month": m,
+                        "item_code": i,
+                        "facility": f,
+                        "received_from_pool": net,
+                        "x_allocated": x_f,
+                        "OB0_agg": float(OB0.get(f, 0.0)),
+                        "eligible_receiver": bool(LVL.get(f, "") in {"1a", "1b"}),
+                    })
+
+        # --- Availability after redistribution: update ONLY where OB' changed ---
+        amc_safe_all = out[amc_col].astype(float).replace([np.inf, -np.inf], np.nan).fillna(0.0)
+        denom = np.maximum(amc_eps, amc_safe_all.values)
+
+        # Start from baseline everywhere
+        out["available_prop_redis"] = out["available_prop"].astype(float).values
+
+        # Change availability only for those rows where OB has increased
+        changed = (out["OB_prime"] - out["OB"]) > 1e-6
+        p_new = np.minimum(1.0, np.maximum(0.0, out.loc[changed, "OB_prime"].values / denom[changed]))
+        if floor_to_baseline:
+            p_new = np.maximum(p_new, out.loc[changed, "available_prop"].astype(float).values)
+
+        out.loc[changed, "available_prop_redis"] = p_new
+        out["received_from_pool"] = out["OB_prime"] - out["OB"]
+
+        move_log = pd.DataFrame(move_rows) if return_move_log else None
+
+        # Force non-eligible levels back to baseline (mirror analysis scope)
+        non_elig = ~out[level_col].isin(["1a", "1b"])
+        out.loc[non_elig, "available_prop_redis"] = out.loc[non_elig, "available_prop"].values  # this should ideally happen automatically
+        # however, there are facilities at levels 2-4 whether some stock out was experienced even though OB > AMC
+        # We want to retain the original probability in these cases because our overall analysis is restricted to levels 1a and 1b
+
+        # Per-row movement
+        out["received_from_pool"] = out["OB_prime"] - out["OB"]
+
+        # Check if the rules are correctly applied
+        # This section until  if return_move_log:, is not required for the solution
+        #---------------------------------------------------------------------------------
+        # --- Build masks for skipping ---
+        # 1. Nodes that failed to solve optimally
+        # Exclude any node/month/item combinations that didn't solve optimally or which were skipped due to AMC == 0
+        if skipped_nodes:
+            skipped_df = pd.DataFrame(skipped_nodes, columns=[node_label, "month", "item_code"])
+
+            # Merge to flag rows belonging to skipped groups
+            out = out.merge(
+                skipped_df.assign(skip_flag=True),
+                on=[node_label, "month", "item_code"],
+                how="left",
+            )
+            mask_skip_solution = out["skip_flag"].fillna(False)
+        else:
+            mask_skip_solution = pd.Series(False, index=out.index)
+
+        #out[mask_skip_solution].to_csv(outputfilepath / 'skipped_nodes_no_optiimal_soln.csv', index = False)
+
+        # 2. Facilities with AMC effectively zero
+        mask_skip_amc = out["amc"].astype(float) <= 1e-9
+
+        # Combined skip mask
+        mask_skip = mask_skip_solution | mask_skip_amc
+        mask_solved = ~mask_skip
+        print(f"Skipping {mask_skip.sum()} rows due to non-optimal LPs or AMC≈0")
+
+        # No facility should end below min(OB, tau_donor_keep*AMC) (# Lower bound check)
+        tol = 1e-6 #tolerance
+        viol_lb = (
+            (out.loc[mask_solved, "OB_prime"] <
+             (np.minimum(out.loc[mask_solved, "OB"], tau_donor_keep * out.loc[mask_solved, "amc"]) - tol))
+        )
+
+        # No facility ends up above upper bounds (# Upper bound check)
+        elig = out.loc[mask_solved, "Facility_Level"].isin(["1a", "1b"]).values
+        ub = np.where(
+            elig,
+            tau_max * out.loc[mask_solved, "amc"],
+            np.minimum(out.loc[mask_solved, "OB"], tau_max * out.loc[mask_solved, "amc"])
+        )
+        viol_ub = out.loc[mask_solved, "OB_prime"].values > (ub + tol)
+
+        temp = out[mask_solved]
+        if viol_lb.any():
+            print("For the following rows (facility, item and month combinations), unclear why OB_prime < tau_donor_keep * AMC "
+                  "which violates a constraint in the LPP")
+            print(temp[viol_lb][['Facility_Level', 'amc', 'OB', 'OB_prime']])
+            temp[viol_lb][['Facility_Level', 'fac_name', 'amc', 'OB', 'OB_prime']].to_csv('violates_lb.csv')
+        if viol_ub.any():
+            print("For the following rows (facility, item and month combinations), unclear why OB_prime > tau_max * AMC "
+                  "which violates a constraint in the LPP")
+            print(temp[viol_ub][['Facility_Level', 'amc', 'OB', 'OB_prime']])
+            temp[viol_ub][['Facility_Level', 'fac_name', 'amc', 'OB', 'OB_prime']].to_csv('violates_ub.csv')
+
+        if return_move_log:
+            move_log = pd.DataFrame(move_rows)
+            return out, move_log
+
+        return out
+    # pooled_df, pool_moves = redistribute_pooling_lp(lmis, tau_min=0.25, tau_max=3.0, return_move_log=True)
+
+    # Functions to generate summary plots of the outcomes of redistribution
+    def prep_violin_df(df, scenario_name, keep_facs_with_no_change = True):
+        out = df.copy()
+        out["delta_p"] = out["available_prop_redis"] - out["available_prop"]
+
+        if (keep_facs_with_no_change == True):
+            mask = (
+                (out["Facility_Level"].isin(["1a", "1b"])) &
+                (out["amc"] > 0)
+            )
+        else:
+            mask = (
+                (out["OB_prime"] > out["OB"]) &
+                (out["Facility_Level"].isin(["1a", "1b"])) &
+                (out["amc"] > 0)
+            )
+
+        return (
+            out.loc[mask, ["district", "delta_p"]]
+               .assign(scenario=scenario_name)
+        )
+
+    def _add_custom_legend(fig=None, legend_location="upper right"):
+        iqr_patch = mpatches.Rectangle(
+            (0, 0), 1, 1,
+            facecolor="grey",
+            edgecolor="black",
+            linewidth=1,
+            label="Interquartile range (IQR)"
+        )
+        median_patch = mlines.Line2D(
+            [], [], color="#b2182b", marker="o", linestyle="None",
+            markersize=5, label="Median"
+        )
+        mean_patch = mlines.Line2D(
+            [], [], color="#b2182b", marker="D", linestyle="None",
+            markersize=6, label="Mean"
+        )
+
+        if fig is None:
+            plt.legend(handles=[iqr_patch, median_patch, mean_patch],
+                       loc=legend_location, fontsize=8, frameon=True)
+        else:
+            fig.legend(
+                handles=[iqr_patch, median_patch, mean_patch],
+                loc=legend_location,
+                ncol=3,
+                fontsize=8,
+                frameon=True
+            )
+
+    def do_violin_plot_change_in_p(
+        violin_df: pd.DataFrame,
+        figname: str,
+        by_district: bool = False,
+        district_col: str = "district",
+        ncol: int = 4,
+        legend_location = "upper right",
+    ):
+        """
+        Create violin + box + mean/median overlay plots of change in availability.
+
+        If by_district=False:
+            Single national-level plot.
+
+        If by_district=True:
+            One combined faceted figure with one panel per district.
+        """
+
+        # ---------- National-level plot ----------
+        if not by_district:
+            mean_df = violin_df.groupby("scenario", as_index=False)["delta_p"].mean()
+            median_df = violin_df.groupby("scenario", as_index=False)["delta_p"].median()
+
+            plt.figure(figsize=(10, 5))
+
+            sns.violinplot(
+                data=violin_df,
+                x="scenario",
+                y="delta_p",
+                cut=0,
+                scale="width",
+                inner=None,
+                linewidth=0.8,
+                color="#4C72B0",
+                alpha=0.6
+            )
+
+            sns.boxplot(
+                data=violin_df,
+                x="scenario",
+                y="delta_p",
+                width=0.03,
+                showcaps=True,
+                showfliers=False,
+                boxprops={"facecolor": "grey", "edgecolor": "black", "linewidth": 1},
+                whiskerprops={"linewidth": 1},
+                medianprops={"linewidth": 0}
+            )
+
+            sns.scatterplot(
+                data=mean_df,
+                x="scenario",
+                y="delta_p",
+                color="#b2182b",
+                marker="D",
+                s=60,
+                zorder=10,
+                label="Mean"
+            )
+
+            sns.scatterplot(
+                data=median_df,
+                x="scenario",
+                y="delta_p",
+                color="#b2182b",
+                marker="o",
+                s=45,
+                zorder=11,
+                label="Median"
+            )
+
+            plt.axhline(0, color="black", linewidth=0.8, linestyle="--")
+            plt.ylabel("Change in probability of availability (Δp)")
+            plt.xlabel("")
+
+            _add_custom_legend(legend_location=legend_location)
+            plt.tight_layout()
+            plt.savefig(outputfilepath / figname, dpi=600)
+            plt.close()
+            return
+
+        # ---------- District-faceted plot ----------
+        g = sns.catplot(
+            data=violin_df,
+            x="scenario",
+            y="delta_p",
+            col=district_col,
+            col_wrap=ncol,
+            kind="violin",
+            cut=0,
+            scale="width",
+            inner=None,
+            linewidth=0.6,
+            color="#4C72B0",
+            alpha=0.6,
+            height=3,
+            aspect=1
+        )
+
+        # Overlay boxplots, means, medians per facet
+        for ax, (district, df_d) in zip(g.axes.flat, violin_df.groupby(district_col)):
+            mean_df = df_d.groupby("scenario", as_index=False)["delta_p"].mean()
+            median_df = df_d.groupby("scenario", as_index=False)["delta_p"].median()
+
+            sns.boxplot(
+                data=df_d,
+                x="scenario",
+                y="delta_p",
+                width=0.03,
+                showcaps=True,
+                showfliers=False,
+                boxprops={"facecolor": "grey", "edgecolor": "black", "linewidth": 0.8},
+                whiskerprops={"linewidth": 0.8},
+                medianprops={"linewidth": 0},
+                ax=ax
+            )
+
+            sns.scatterplot(
+                data=mean_df,
+                x="scenario",
+                y="delta_p",
+                color="#b2182b",
+                marker="D",
+                s=35,
+                zorder=10,
+                ax=ax
+            )
+
+            sns.scatterplot(
+                data=median_df,
+                x="scenario",
+                y="delta_p",
+                color="#b2182b",
+                marker="o",
+                s=30,
+                zorder=11,
+                ax=ax
+            )
+
+            ax.axhline(0, color="black", linewidth=0.6, linestyle="--")
+            ax.set_xlabel("")
+            ax.set_ylabel("Δp")
+            ax.tick_params(axis="x", labelrotation=45, labelsize=8)
+            ax.tick_params(axis="y", labelsize=8)
+            ax.set_title(district, fontsize=9)
+
+        _add_custom_legend(fig=g.fig, legend_location = legend_location)
+        g.fig.tight_layout()
+        g.fig.savefig(outputfilepath / figname, dpi=600)
+        plt.close()
+
+
+    # IMPLEMENT
+    # 1) Build a time matrix
+    fac_coords = lmis[['fac_name', 'district', 'lat','long']]
+    #T_car = build_time_matrices_by_district(
+    #    fac_coords,
+    #    mode="car",
+    #    backend="osrm",
+    #    osrm_base_url="https://router.project-osrm.org",
+    #    max_chunk=50)
+
+    # Store dictionary in pickle format
+    #with open(outputfilepath / "T_car2.pkl", "wb") as f:
+    #    pickle.dump(T_car, f)
+    # -> Commented out because it takes long to run. The result has been stored in pickle format
+
+    # Load pre-generated dictionary
+    with open(outputfilepath / "T_car2.pkl", "rb") as f:
+        T_car = pickle.load(f)
+    # T_car2 was created after cleaning fac names and getting rid of spaces in the text
+
+    #edges_flat = build_edges_within_radius_flat(T_car, max_minutes= 60)
+
+    # 2) Explore the availability and distances to make decisions about optimisation rules
+    # Attach clean item names to lmis
+    consumables_dict = \
+        pd.read_csv(resourcefilepath / 'healthsystem' / 'consumables' / 'ResourceFile_Consumables_Items_and_Packages.csv',
+                    low_memory=False,
+                    encoding="ISO-8859-1")[['Items', 'Item_Code']]
+    consumables_dict = dict(zip(consumables_dict['Item_Code'], consumables_dict['Items']))
+    lmis['item_name'] = pd.to_numeric(lmis["item_code"], errors="coerce").map(consumables_dict)
+    lmis['item_name'] = (
+        lmis['item_name']
+        .astype(str)
+        .apply(clean_consumable_name)
+    )
+
+    # Plot stock adequacy by district and month to assess what bounds to set when pooling
+    empty_cell_note = "Note: Grey cells in the heatmap indicate missing data."
+    fig, ax, hm_df = generate_stock_adequacy_heatmap(df = lmis, figures_path = outputfilepath,
+                                                     y_var = 'district', value_var = 'item_name',
+                                                     value_label= f"% of consumables with Opening Balance ≥ 3 × AMC",
+                                                     amc_threshold = 3, compare = "ge",
+                                                     filename = "mth_district_stock_adequacy_3amc.png", figsize = (12,10))
+    fig, ax, hm_df = generate_stock_adequacy_heatmap(df = lmis, figures_path = outputfilepath,
+                                                     y_var = 'district', value_var = 'item_name',
+                                                     value_label= f"% of consumables with Opening Balance ≥ 1.5 × AMC",
+                                                     amc_threshold = 1.5, compare = "ge",
+                                                     filename = "mth_district_stock_adequacy_1.5amc.png", figsize = (12,10))
+    fig, ax, hm_df = generate_stock_adequacy_heatmap(df = lmis, figures_path = outputfilepath,
+                                                     y_var = 'district', value_var = 'item_name',
+                                                     value_label= f"% of consumables with Opening Balance <= 1 × AMC",
+                                                     amc_threshold = 1, compare = "le",
+                                                     filename = "mth_district_stock_inadequacy_1amc.png", figsize = (12,10))
+    fig, ax, hm_df = generate_stock_adequacy_heatmap(df = lmis, figures_path = outputfilepath,
+                                                     y_var = 'item_name', value_var = 'fac_name',
+                                                     value_label= f"% of facilities with Opening Balance ≥ 3 × AMC",
+                                                     amc_threshold = 3, compare = "ge",
+                                                     footnote = empty_cell_note,
+                                                     filename = "mth_item_stock_adequacy_3amc.png")
+    fig, ax, hm_df = generate_stock_adequacy_heatmap(df = lmis, figures_path = outputfilepath,
+                                                     y_var = 'item_name', value_var = 'fac_name',
+                                                     value_label= f"% of facilities with Opening Balance ≥ 1.5 × AMC",
+                                                     amc_threshold = 1.5, compare = "ge",
+                                                     footnote = empty_cell_note,
+                                                     filename = "mth_item_stock_adequacy_1.5amc.png")
+    fig, ax, hm_df = generate_stock_adequacy_heatmap(df = lmis, figures_path = outputfilepath,
+                                                     y_var = 'item_name', value_var = 'fac_name',
+                                                     value_label= f"% of facilities with Opening Balance <= 1 × AMC",
+                                                     amc_threshold = 1, compare = "le",
+                                                     footnote = empty_cell_note,
+                                                     filename = "mth_item_stock_inadequacy_1amc.png")
+
+
+    # Browse the number of eligible neighbours depending on allowable travel time
+    results = []
+    for mins in [30, 60, 90, 120]:
+        edges_flat = build_edges_within_radius_flat(T_car, max_minutes=mins)
+        neighbors_count = pd.Series({fac: len(neigh) for fac, neigh in edges_flat.items()})
+        mean = neighbors_count.mean()
+        sem = neighbors_count.sem()  # standard error of mean
+        ci95 = 1.96 * sem
+        results.append({"radius": mins, "mean": mean, "ci95": ci95})
+
+    results_df = pd.DataFrame(results)
+
+    # Plot
+    plt.figure(figsize=(6,4))
+    plt.bar(results_df["radius"], results_df["mean"], yerr=results_df["ci95"], capsize=5, color="skyblue")
+    plt.xlabel("Travel time radius (minutes)")
+    plt.ylabel("Average number of facilities within radius")
+    plt.title("Average connectivity of facilities with 95% CI")
+    plt.xticks(results_df["radius"])
+    plt.savefig(outputfilepath / "neighbour_count_by_max_travel_time")
+
+    # A manual check shows that for distances greater than 60 minutes ORS underestimates the travel time a little
+    # TODO consider using google maps API
+
+    #Drop NAs
+    # TODO find a more generalisable solution for this issue (within the optimisation functions)
+    #lmis = lmis[(lmis.amc != 0) & (lmis.amc.notna())]
+
+    # ----------------------------------------------------------------------------------------------------------------------
+    # 3) Implement pooled redistribution
+    # ----------------------------------------------------------------------------------------------------------------------
+    # Build clusters from per-district travel-time matrices
+    #    T_car_by_dist: {"District A": DF(index=fac_ids, cols=fac_ids), ...}
+    cluster_size = 3
+    cluster_series = build_capacity_clusters_all(T_car, cluster_size=cluster_size)
+    # cluster_series is a pd.Series: index=facility_id, value like "District A#C00", "District A#C01", ...
+
+    # a) Run optimisation at district level
+    '''
+    # Commented out for quicker runs
+    print("Now running Pooled Redistribution at District level")
+    start = time.time()
+    pooled_district_df, cluster_district_moves = redistribute_pooling_lp(
+        df=lmis,  # the LMIS dataframe
+        tau_min=0.25, tau_max=3.0,
+        tau_donor_keep = 1.5,
+        pooling_level="district",
+        cluster_map=None,
+        return_move_log=True,
+        floor_to_baseline=True
+    )
+    print(pooled_district_df.drop(columns = ['LMIS Facility List', 'lat', 'long', 'fac_owner']).groupby('Facility_Level')[['available_prop_redis', 'available_prop']].mean())
+    pooled_district_df[['district', 'item_code',	'fac_name', 'month', 'amc', 'available_prop', 'Facility_Level',
+                        'OB', 'OB_prime', 'available_prop_redis', 'received_from_pool']].to_csv(
+                        outputfilepath/ 'clustering_district_df.csv', index=False)
+    end = time.time()
+    print(f"District redistribution completed in {end - start:.3f} seconds") # 1.1 hour
+    '''
+    pooled_district_df = pd.read_csv(outputfilepath / 'clustering_district_df.csv')
+    tlo_pooled_district = (
+            pooled_district_df
+            .groupby(["item_code", "district", "Facility_Level", "month"], as_index=False)
+            .agg(available_prop_scenario16=("available_prop_redis", "mean"))
+            .sort_values(["item_code","district","Facility_Level","month"])
+        )
+
+
+    #  b) Run optimisation at cluster (size = 3) level
+    '''
+    # Commented out for quicker runs
+    print("Now running pooled redistribution at Cluster (Size = 3) level")
+    start = time.time()
+    pooled_cluster_df, cluster_moves = redistribute_pooling_lp(
+        df=lmis,  # the LMIS dataframe
+        tau_min=0, tau_max=3.0,
+        tau_donor_keep = 1.5,
+        pooling_level="cluster",
+        cluster_map=cluster_series,
+        return_move_log=True,
+        floor_to_baseline=True
+    )
+    print(pooled_cluster_df.drop(columns = ['LMIS Facility List', 'lat', 'long', 'fac_owner']).groupby('Facility_Level')[['available_prop_redis', 'available_prop']].mean())
+    pooled_cluster_df[['district', 'item_code',	'fac_name', 'month', 'amc', 'available_prop', 'Facility_Level',
+                        'OB', 'OB_prime', 'available_prop_redis', 'received_from_pool']].to_csv(
+                        outputfilepath/ 'clustering_n3_df.csv', index=False)
+
+    end = time.time()
+    print(f"Cluster redistribution completed in {end - start:.3f} seconds") # 18 hours
+    '''
+    pooled_cluster_df = pd.read_csv(outputfilepath / 'clustering_n3_df.csv')
+
+    tlo_pooled_cluster = (
+            pooled_cluster_df
+            .groupby(["item_code", "district", "Facility_Level", "month"], as_index=False)
+            .agg(available_prop_scenario17=("available_prop_redis", "mean"))
+            .sort_values(["item_code","district","Facility_Level","month"])
+        )
+
+
+
+    # c) Implement pairwise redistribution
+    '''
+    # Commented out for quicker runs
+    print("Now running pairwise redistribution with maximum radius 60 minutes")
+    start = time.time()
+    # c.i) 1-hour radius
+    large_radius_df, large_radius_moves = redistribute_radius_lp(
+        df=lmis,
+        time_matrix=T_car,
+        radius_minutes=60,      # facilities within 1 hour by car
+        tau_keep=1.5,           # donor must keep 1.5 × AMC
+        tau_tar=1.0,            # receivers target 1× AMC
+        K_in=2,           # at most 1 inbound transfers per item
+        K_out=10,          # at most 10 outbound transfers # TODO could increase this
+        Qmin_proportion=0.25,          # min lot = one week of demand
+        eligible_levels=("1a", "1b"),  # only 1a/1b can receive
+    )
+    print(large_radius_df.groupby('Facility_Level')[['available_prop_redis', 'available_prop']].mean())
+    large_radius_df.to_csv(outputfilepath/ 'large_radius_df.csv', index=False)
+    end = time.time()
+    print(f"Large radius exchange distribution completed in {end - start:.3f} seconds")
+    '''
+    large_radius_df = pd.read_csv(outputfilepath / 'large_radius_df.csv')
+    tlo_large_radius = (
+            large_radius_df
+            .groupby(["item_code", "district", "Facility_Level", "month"], as_index=False)
+            .agg(available_prop_scenario18=("available_prop_redis", "mean"))
+            .sort_values(["item_code","district","Facility_Level","month"])
+        )
+
+
+
+    # c.ii) 30-minute radius
+    '''
+    print("Now running pairwise redistribution with maximum radius 30 minutes")
+    start = time.time()
+    small_radius_df, small_radius_moves = redistribute_radius_lp(
+        df=lmis,
+        time_matrix=T_car,
+        radius_minutes=30,      # facilities within 1 hour by car
+        tau_keep=1.5,           # donor must keep 1 × AMC
+        tau_tar=1.0,            # receivers target 1 × AMC
+        K_in=2,           # at most 2 inbound transfers per item
+        K_out=10,          # at most 10 outbound transfers
+        Qmin_proportion=0.25,          # min lot = one week of demand
+        eligible_levels=("1a", "1b"),  # only 1a/1b can receive
+    )
+    print(small_radius_df.groupby('Facility_Level')[['available_prop_redis', 'available_prop']].mean())
+    small_radius_df.to_csv(outputfilepath/ 'small_radius_df.csv', index=False)
+    end = time.time()
+    print(f"Small radius exchange redistributino completed in {end - start:.3f} seconds")
+    '''
+    small_radius_df = pd.read_csv(outputfilepath / 'small_radius_df.csv')
+    tlo_small_radius = (
+            small_radius_df
+            .groupby(["item_code", "district", "Facility_Level", "month"], as_index=False)
+            .agg(available_prop_scenario19=("available_prop_redis", "mean"))
+            .sort_values(["item_code","district","Facility_Level","month"])
+        )
+
+    # Summarise the outcome of the redistribution in violin plots
+    violin_df_all_facs = pd.concat([
+        prep_violin_df(pooled_district_df, "District pooling", keep_facs_with_no_change = True),
+        prep_violin_df(pooled_cluster_df,  "Cluster pooling", keep_facs_with_no_change = True),
+        prep_violin_df(large_radius_df,    "Pairwise (large radius)", keep_facs_with_no_change = True),
+        prep_violin_df(small_radius_df,    "Pairwise (small radius)", keep_facs_with_no_change = True)
+    ], ignore_index=True)
+    violin_df_only_facs_with_change = pd.concat([
+        prep_violin_df(pooled_district_df, "District pooling", keep_facs_with_no_change = False),
+        prep_violin_df(pooled_cluster_df,  "Cluster pooling", keep_facs_with_no_change = False),
+        prep_violin_df(large_radius_df,    "Pairwise (large radius)", keep_facs_with_no_change = False),
+        prep_violin_df(small_radius_df,    "Pairwise (small radius)", keep_facs_with_no_change = False)
+    ], ignore_index=True)
+
+    do_violin_plot_change_in_p(
+        violin_df = violin_df_all_facs,
+        figname="violin_redistribution_national_all_facs.png",
+        legend_location= "upper right"
+    )
+    do_violin_plot_change_in_p(
+        violin_df = violin_df_only_facs_with_change,
+        figname="violin_redistribution_national_only_facs_with_change.png",
+        legend_location = "lower right"
+    )
+
+    do_violin_plot_change_in_p(
+        violin_df = violin_df_all_facs,
+        figname="violin_by_district_all_facs",
+        by_district=True,
+        ncol=4
+    )
+
+    do_violin_plot_change_in_p(
+        violin_df = violin_df_only_facs_with_change,
+        figname="violin_redistribution_national_only_facs_with_change",
+        by_district=True,
+        ncol=4
+    )
+
+
+    # ----------------------------------------------------------------------------------------------------------------------
+    # 4) Compile update probabilities and merge with Resourcefile
+    # ----------------------------------------------------------------------------------------------------------------------
+    # 1) Merge the new dataframes together
+    # ----------------------------------------------------------------------------------------------------------------------
+    tlo_redis = reduce(
+        lambda left, right: pd.merge(
+            left, right,
+            on=["item_code", "district", "Facility_Level", "month"],
+            how="outer"
+        ),
+        [tlo_pooled_district, tlo_pooled_cluster, tlo_large_radius, tlo_small_radius]
+    )
+
+    tlo_redis.to_csv(outputfilepath/ 'tlo_redis.csv', index=False)
+
+    # Edit new dataframe to match mfl formatting
+    list_of_new_scenario_variables = ['available_prop_scenario16', 'available_prop_scenario17',
+                                  'available_prop_scenario18', 'available_prop_scenario19']
+    tlo_redis = tlo_redis[['item_code', 'month', 'district', 'Facility_Level'] + list_of_new_scenario_variables].dropna()
+    tlo_redis["item_code"] = tlo_redis["item_code"].astype(float).astype(int)
+
+    # Load master facility list
+    mfl = pd.read_csv(resourcefilepath / "healthsystem" / "organisation" / "ResourceFile_Master_Facilities_List.csv")
+    mfl["District"] = mfl["District"].astype("string").str.strip().str.replace(r"\s+", "_", regex=True)
+    districts = set(mfl[mfl.District.notna()]["District"].unique())
+    kch = (mfl.Region == 'Central') & (mfl.Facility_Level == '3')
+    qech = (mfl.Region == 'Southern') & (mfl.Facility_Level == '3')
+    mch = (mfl.Region == 'Northern') & (mfl.Facility_Level == '3')
+    zmh = mfl.Facility_Level == '4'
+    mfl.loc[kch, "District"] = "Lilongwe"
+    mfl.loc[qech, "District"] = "Blantyre"
+    mfl.loc[mch, "District"] = "Mzimba"
+    mfl.loc[zmh, "District"] = "Zomba"
+
+    # Do some mapping to make the Districts line-up with the definition of Districts in the model
+    rename_and_collapse_to_model_districts = {
+        'Nkhota_Kota': 'Nkhotakota',
+        'Mzimba_South': 'Mzimba',
+        'Mzimba_North': 'Mzimba',
+        'Nkhata_bay': 'Nkhata_Bay',
+    }
+
+    tlo_redis['district_std'] = tlo_redis['district'].replace(rename_and_collapse_to_model_districts)
+    # Take averages (now that 'Mzimba' is mapped-to by both 'Mzimba South' and 'Mzimba North'.)
+    tlo_redis = tlo_redis.groupby(by=['district_std', 'Facility_Level', 'month', 'item_code'])[list_of_new_scenario_variables].mean().reset_index()
+
+    # Fill in missing data:
+    # 1) Cities to get same results as their respective regions
+    copy_source_to_destination = {
+        'Mzimba': 'Mzuzu_City',
+        'Lilongwe': 'Lilongwe_City',
+        'Zomba': 'Zomba_City',
+        'Blantyre': 'Blantyre_City'
+    }
+
+    for source, destination in copy_source_to_destination.items():
+        new_rows = tlo_redis.loc[(tlo_redis.district_std == source) & (tlo_redis.Facility_Level.isin(['1a', '1b', '2']))].copy()
+        new_rows.district_std = destination
+        tlo_redis = pd.concat([tlo_redis, new_rows], axis=0, ignore_index=True)
+
+    # 2) Fill in Likoma (for which no data) with the means
+    means = tlo_redis.loc[tlo_redis.Facility_Level.isin(['1a', '1b', '2'])].groupby(by=['Facility_Level', 'month', 'item_code'])[
+        list_of_new_scenario_variables].mean().reset_index()
+    new_rows = means.copy()
+    new_rows['district_std'] = 'Likoma'
+    tlo_redis = pd.concat([tlo_redis, new_rows], axis=0, ignore_index=True)
+    assert sorted(set(districts)) == sorted(set(pd.unique(tlo_redis.district_std)))
+
+    # 3) copy the results for 'Mwanza/1b' to be equal to 'Mwanza/1a'.
+    mwanza_1a = tlo_redis.loc[(tlo_redis.district_std == 'Mwanza') & (tlo_redis.Facility_Level == '1a')]
+    mwanza_1b = tlo_redis.loc[(tlo_redis.district_std == 'Mwanza') & (tlo_redis.Facility_Level == '1a')].copy().assign(Facility_Level='1b')
+    tlo_redis= pd.concat([tlo_redis, mwanza_1b], axis=0, ignore_index=True)
+
+    # 4) Copy all the results to create a level 0 with an availability equal to half that in the respective 1a
+    all_1a = tlo_redis.loc[tlo_redis.Facility_Level == '1a']
+    all_0 = tlo_redis.loc[tlo_redis.Facility_Level == '1a'].copy().assign(Facility_Level='0')
+    all_0[list_of_new_scenario_variables] *= 0.5
+    tlo_redis = pd.concat([tlo_redis, all_0], axis=0, ignore_index=True)
+
+    # Now, merge-in facility_id
+    tlo_redis = tlo_redis.merge(mfl[['District', 'Facility_Level', 'Facility_ID']],
+                        left_on=['district_std', 'Facility_Level'],
+                        right_on=['District', 'Facility_Level'], how='left', indicator=True, validate = 'm:1')
+    tlo_redis = tlo_redis[tlo_redis.Facility_ID.notna()].rename(columns = {'district_std': 'district'})
+    assert sorted(set(mfl.loc[mfl.Facility_Level != '5','Facility_ID'].unique())) == sorted(set(pd.unique(tlo_redis.Facility_ID)))
+
+    # Load original availability dataframe
+    # ----------------------------------------------------------------------------------------------------------------------
+    tlo_availability_df = tlo_availability_df
+    list_of_old_scenario_variables = [f"available_prop_scenario{i}" for i in range(1, scenario_count + 1)]
+    tlo_availability_df = tlo_availability_df[['Facility_ID', 'month', 'item_code', 'available_prop'] + list_of_old_scenario_variables]
+
+    # Attach district,  facility level and item_category to this dataset
+    program_item_mapping = pd.read_csv(path_for_new_resourcefiles  / 'ResourceFile_Consumables_Item_Designations.csv')[['Item_Code', 'item_category']] # Import item_category
+    program_item_mapping = program_item_mapping.rename(columns ={'Item_Code': 'item_code'})[program_item_mapping.item_category.notna()]
+    fac_levels = {'0', '1a', '1b', '2', '3', '4'}
+    tlo_availability_df = tlo_availability_df.merge(mfl[['District', 'Facility_Level', 'Facility_ID']],
+                        on = ['Facility_ID'], how='left').rename(columns = {'District': 'district'})
+    tlo_availability_df = tlo_availability_df.merge(program_item_mapping,
+                        on = ['item_code'], how='left')
+    #tlo_availability_df = tlo_availability_df[~tlo_availability_df[['District', 'Facility_Level', 'month', 'item_code']].duplicated()]
+
+    # Because some of the availbility data in the original availability comes from data sources other than OpenLMIS, there are
+    # more unique item codes in tlo_availability_df than in tlo_redis. For these items, assume that the proportion of 'uplift'
+    # is the same as the average 'uplift' experienced across the consumables in tlo_redis disaggregated by district,
+    # facility level, and month.
+
+    # First fix any unexpected changes in availability probability
+    # Merge the old and new dataframe
+    redis_levels = ['1a','1b']
+    tlo_redis = tlo_redis[tlo_redis.Facility_Level.isin(redis_levels)]
+
+    tlo_redis = tlo_redis.merge(
+        tlo_availability_df[["district", "Facility_Level", "item_code", "month", "available_prop"]],
+        on=["district", "Facility_Level", "item_code", "month"],
+        how="left",
+        validate="one_to_one"
+    )
+
+    for redis_scenario_col in list_of_new_scenario_variables:
+        pre = (
+            tlo_redis[redis_scenario_col] < tlo_redis["available_prop"]
+        ).mean()
+        print(f"Pre-fix {redis_scenario_col}: {pre:.3%}")
+
+        # Enforce no-harm
+        tlo_redis[redis_scenario_col] = np.maximum(
+            tlo_redis[redis_scenario_col],
+            tlo_redis["available_prop"]
+        )
+
+        post = (
+            tlo_redis[redis_scenario_col] < tlo_redis["available_prop"]
+        ).mean()
+        print(f"Post-fix {redis_scenario_col}: {post:.3%}")
+
+    # Next create an uplift dataframe
+    modelled_items = tlo_redis["item_code"].unique()
+    # Compute uplift once per scenario, store in a dict
+    uplift_maps = {}
+
+    for scenario_col in list_of_new_scenario_variables:
+        uplift_maps[scenario_col] = (
+            tlo_redis.assign(
+                uplift=lambda x: np.where(
+                    x["available_prop"] > 0,
+                    x[scenario_col] / x["available_prop"],
+                    np.nan
+                )
+            )
+            .groupby(["district", "Facility_Level", "month"], as_index=False)["uplift"]
+            .mean()
+            .rename(columns={"uplift": f"uplift_{scenario_col}"})
+        )
+
+    # Get baseline rows for missing items
+    missing_mask = ~tlo_availability_df["item_code"].isin(modelled_items)
+
+    df_missing = (
+        tlo_availability_df[
+            (tlo_availability_df["Facility_Level"].isin(redis_levels)) &
+            missing_mask
+        ]
+        .copy()
+    )
+
+    # Merge all uplifts horizontally
+    for scenario_col, uplift_df in uplift_maps.items():
+        df_missing = df_missing.merge(
+            uplift_df,
+            on=["district", "Facility_Level", "month"],
+            how="left"
+        )
+        df_missing[scenario_col] = np.minimum(
+            1.0,
+            df_missing["available_prop"] * df_missing[f"uplift_{scenario_col}"]
+        )
+        df_missing.drop(columns=[f"uplift_{scenario_col}"], inplace=True)
+
+    # Concatenate
+    tlo_redis = pd.concat(
+        [tlo_redis, df_missing],
+        ignore_index=True
+    )
+
+    dupes = tlo_redis.duplicated(
+        ["district", "Facility_Level", "item_code", "month"]
+    )
+    assert (dupes.sum() == 0)
+
+    for scenario_col in list_of_new_scenario_variables:
+        assert ((tlo_redis[scenario_col]<
+            tlo_redis["available_prop"]).sum()) == 0
+
+    tlo_redis = tlo_redis[['Facility_ID', 'month', 'item_code'] + list_of_new_scenario_variables]
+
+    # Interpolate missing values in tlo_redis for all levels except 0
+    # ----------------------------------------------------------------------------------------------------------------------
+    # Generate the dataframe that has the desired size and shape
+    fac_ids = set(mfl.loc[mfl.Facility_Level.isin(redis_levels)].Facility_ID)
+    item_codes = set(tlo_availability_df.item_code.unique())
+    months = range(1, 13)
+
+    # Create a MultiIndex from the product of fac_ids, months, and item_codes
+    index = pd.MultiIndex.from_product([fac_ids, months, item_codes], names=['Facility_ID', 'month', 'item_code'])
+
+    # Initialize a DataFrame with the MultiIndex and columns, filled with NaN
+    full_set = pd.DataFrame(index=index, columns=list_of_new_scenario_variables)
+    full_set = full_set.astype(float)  # Ensure all columns are float type and filled with NaN
+
+    # Insert the data, where it is available.
+    full_set = full_set.combine_first(tlo_redis.set_index(['Facility_ID', 'month', 'item_code'])[list_of_new_scenario_variables])
+
+    # Fill in the blanks with rules for interpolation.
+    facilities_by_level = defaultdict(set)
+    for ix, row in mfl.iterrows():
+        facilities_by_level[row['Facility_Level']].add(row['Facility_ID'])
+
+    items_by_category = defaultdict(set)
+    for ix, row in program_item_mapping.iterrows():
+        items_by_category[row['item_category']].add(row['item_code'])
+
+    def get_other_facilities_of_same_level(_fac_id):
+        """Return a set of facility_id for other facilities that are of the same level as that provided."""
+        for v in facilities_by_level.values():
+            if _fac_id in v:
+                return v - {_fac_id}
+
+    def get_other_items_of_same_category(_item_code):
+        """Return a set of item_codes for other items that are in the same category/program as that provided."""
+        for v in items_by_category.values():
+            if _item_code in v:
+                return v - {_item_code}
+    def interpolate_missing_with_mean(_ser):
+        """Return a series in which any values that are null are replaced with the mean of the non-missing."""
+        if pd.isnull(_ser).all():
+            raise ValueError
+        return _ser.fillna(_ser.mean())
+
+    # Create new dataset that include the interpolations (The operation is not done "in place", because the logic is based
+    # on what results are missing before the interpolations in other facilities).
+    full_set_interpolated = full_set * np.nan
+    full_set_interpolated[list_of_new_scenario_variables] = full_set[list_of_new_scenario_variables]
+
+    for fac in fac_ids:
+        for item in item_codes:
+            for col in list_of_new_scenario_variables:
+                print(f"Now doing: fac={fac}, item={item}, column={col}")
+
+                # Get records of the availability of this item in this facility.
+                _monthly_records = full_set.loc[(fac, slice(None), item), col].copy()
+
+                if pd.notnull(_monthly_records).any():
+                    # If there is at least one record of this item at this facility, then interpolate the missing months from
+                    # the months for there are data on this item in this facility. (If none are missing, this has no effect).
+                    _monthly_records = interpolate_missing_with_mean(_monthly_records)
+
+                else:
+                    # If there is no record of this item at this facility, check to see if it's available at other facilities
+                    # of the same level
+                    # Or if there is no record of item at other facilities at this level, check to see if other items of this category
+                    # are available at this facility level
+                    facilities = list(get_other_facilities_of_same_level(fac))
+
+                    other_items = get_other_items_of_same_category(item)
+                    items = list(other_items) if other_items else other_items
+
+                    recorded_at_other_facilities_of_same_level = pd.notnull(
+                        full_set.loc[(facilities, slice(None), item), col]
+                    ).any()
+
+                    if not items:
+                        category_recorded_at_other_facilities_of_same_level = False
+                    else:
+                        # Filter only items that exist in the MultiIndex at this facility
+                        valid_items = [
+                            itm for itm in items
+                            if any((fac, m, itm) in full_set.index for m in months)
+                        ]
+
+                        category_recorded_at_other_facilities_of_same_level = pd.notnull(
+                            full_set.loc[(fac, slice(None), valid_items), col]
+                        ).any()
+
+                    if recorded_at_other_facilities_of_same_level:
+                        # If it recorded at other facilities of same level, find the average availability of the item at other
+                        # facilities of the same level.
+                        print("Data for facility ", fac, " extrapolated from other facilities within level - ", facilities)
+                        facilities = list(get_other_facilities_of_same_level(fac))
+                        _monthly_records = interpolate_missing_with_mean(
+                            full_set.loc[(facilities, slice(None), item), col].groupby(level=1).mean()
+                        )
+
+                    elif category_recorded_at_other_facilities_of_same_level and valid_items:
+                        # If it recorded at other facilities of same level, find the average availability of the item at other
+                        # facilities of the same level.
+                        print("Data for item ", item, " extrapolated from other items within category - ", valid_items)
+
+                        _monthly_records = interpolate_missing_with_mean(
+                            full_set.loc[(fac, slice(None), valid_items), col].groupby(level=1).mean()
+                        )
+
+                    else:
+                        # If it is not recorded at other facilities of same level, then assume that there is no change
+                        print("No interpolation worked")
+                        _monthly_records = _monthly_records.fillna(1.0)
+
+                # Insert values (including corrections) into the resulting dataset.
+                full_set_interpolated.loc[(fac, slice(None), item), col] = _monthly_records.values
+                # temporary code
+                assert full_set_interpolated.loc[(fac, slice(None), item), col].mean() >= 0
+
+    # Check that there are not missing values
+    assert not pd.isnull(full_set_interpolated).any().any()
+
+    full_set_interpolated = full_set_interpolated.reset_index()
+
+    # Add to this dataset original availability for all the other levels of care
+    base_other_levels = tlo_availability_df[
+        ~tlo_availability_df["Facility_Level"].isin(redis_levels)
+    ].copy()
+    for col in list_of_new_scenario_variables:
+        base_other_levels[col] = base_other_levels["available_prop"]
+    base_other_levels = base_other_levels[['Facility_ID', 'month', 'item_code'] + list_of_new_scenario_variables]
+    tlo_redis_final = pd.concat(
+        [full_set_interpolated, base_other_levels],
+        ignore_index=True,
+    )
+    #tlo_redis_final.to_csv(outputfilepath / 'ResourceFile_consumable_availability_after_redistribution.csv', index = False)
+
+    # Verify that the shape of this dataframe is identical to the original availability dataframe
+    assert sorted(set(tlo_redis_final.Facility_ID)) == sorted(set(pd.unique(tlo_availability_df.Facility_ID)))
+    assert sorted(set(tlo_redis_final.month)) == sorted(set(pd.unique(tlo_availability_df.month)))
+    assert sorted(set(tlo_redis_final.item_code)) == sorted(set(pd.unique(tlo_availability_df.item_code)))
+    assert len(tlo_redis_final) == len(tlo_availability_df.item_code)
+
+    tlo_redis_final = tlo_availability_df.merge(tlo_redis_final, on = ['Facility_ID', 'item_code', 'month'],
+                                                how = 'left', validate = "1:1")
+
+    return tlo_redis_final
+
+# Plot final availability
+def plot_availability_heatmap(
+    df: pd.DataFrame,
+    y_var: str  = None,
+    scenario_cols: list[str]  = None,
+    filter_dict: dict  = None,
+    cmap: str = "RdYlGn",
+    vmin: float = 0,
+    vmax: float = 1,
+    figsize: tuple = (10, 8),
+    annot: bool = True,
+    rename_scenarios_dict: dict  = None,
+    title: str = 'Availability across scenarios',
+    figname: Path  = None,
+):
+    """
+    Flexible heatmap generator that supports:
+    1. Filters to subset data
+    2. Multiple scenario columns (wide format, like available_prop_scen1–16)
+
+    Parameters
+    ----------
+    df : pd.DataFrame
+        Input dataframe.
+    y_var : str, optional
+        Column name for y-axis.
+    scenario_cols : list of str, optional
+        List of scenario columns (e.g., [f"available_prop_scen{i}" for i in range(1,17)]).
+    filter_dict : dict, optional
+        Filters to apply before plotting, e.g. {"Facility_Level": "1a"}.
+    cmap : str
+        Colormap.
+    vmin, vmax : float
+        Color scale range.
+    figsize : tuple
+        Figure size.
+    annot : bool
+        Annotate cells with values.
+    rename_scenario_dict : dict, optional
+        Rename columns (for pretty scenario names, etc.)
+    title : str, optional
+        Title for the plot.
+    figname : str, optional
+        Save name for PNG; if None, displays interactively.
+    """
+    if filter_dict:
+        for k, v in filter_dict.items():
+            if isinstance(v, (list, tuple, set)):
+                df = df[df[k].isin(v)]
+            else:
+                df = df[df[k] == v]
+
+    aggregated_df = df.groupby([y_var])[scenario_cols].mean().reset_index()
+    heatmap_data = aggregated_df.set_index(y_var)
+
+    # Calculate aggregate column (true overall mean)
+    aggregate_col = df[scenario_cols].mean()
+    if rename_scenarios_dict:
+        aggregate_col = aggregate_col.rename(index=rename_scenarios_dict)
+
+    # Apply column renames (fix variable name)
+    if rename_scenarios_dict:
+        heatmap_data = heatmap_data.rename(columns=rename_dict)
+    heatmap_data.loc['Average'] = aggregate_col
+
+    # Generate the heatmap
+    sns.set(font_scale=1)
+    plt.figure(figsize=figsize)
+    ax = sns.heatmap(  # <── assign to ax
+        heatmap_data,
+        annot=annot,
+        cmap=cmap,
+        vmin=vmin,
+        vmax=vmax,
+        cbar_kws={'label': 'Proportion of days on which consumable is available'}
+    )
+
+    # Customize the plot
+    plt.title(title)
+    plt.xlabel('Scenarios')
+    plt.ylabel(y_var)
+    plt.xticks(rotation=90, fontsize=12)
+    plt.yticks(rotation=0, fontsize=11)
+    ax.set_xticklabels(
+        [textwrap.fill(label.get_text(), 20) for label in ax.get_xticklabels()],
+        rotation=90, ha='center'
+    )
+    ax.set_yticklabels(
+        [textwrap.fill(label.get_text(), 25) for label in ax.get_yticklabels()],
+        rotation=0, va='center'
+    )
+
+    if figname:
+        plt.savefig(outputfilepath / figname, dpi=500, bbox_inches='tight')
+        plt.close()
+    else:
+        plt.show()
+    plt.close()
+
+'''
+# Clean item category
+clean_category_names = {'cancer': 'Cancer', 'cardiometabolicdisorders': 'Cardiometabolic Disorders',
+                        'contraception': 'Contraception', 'general': 'General', 'hiv': 'HIV', 'malaria': 'Malaria',
+                        'ncds': 'Non-communicable Diseases', 'neonatal_health': 'Neonatal Health',
+                        'other_childhood_illnesses': 'Other Childhood Illnesses', 'reproductive_health': 'Reproductive Health',
+                        'road_traffic_injuries': 'Road Traffic Injuries', 'tb': 'Tuberculosis',
+                        'undernutrition': 'Undernutrition', 'epi': 'Expanded programme on immunization'}
+df_for_plots['item_category_clean'] = df_for_plots['item_category'].map(clean_category_names)
+
+scenario_cols = ['available_prop', 'available_prop_scenario1', 'available_prop_scenario2', 'available_prop_scenario3',
+                 'available_prop_scenario6', 'available_prop_scenario7', 'available_prop_scenario8',
+                 'available_prop_scenario16', 'available_prop_scenario17', 'available_prop_scenario18', 'available_prop_scenario19']
+rename_dict = {'available_prop': 'Actual',
+               'available_prop_scenario1': 'Non-therapeutic consumables',
+               'available_prop_scenario2': 'Vital medicines',
+               'available_prop_scenario3': 'Pharmacist-managed',
+                'available_prop_scenario6': '75th percentile facility',
+               'available_prop_scenario7': '90th percentile acility',
+               'available_prop_scenario8': 'Best facility',
+                 'available_prop_scenario16': 'District Pooling',
+               'available_prop_scenario17' : 'Cluster Pooling',
+               'available_prop_scenario18': 'Pairwise exchange (60-min radius)',
+               'available_prop_scenario19': 'Pairwise exchange (30-min radius)'}
+scenario_names = list(rename_dict.values())
+
+# Plot heatmap for level 1a
+plot_availability_heatmap(
+    df=df_for_plots,
+    scenario_cols=scenario_cols,
+    y_var="item_category_clean",
+    filter_dict={"Facility_Level": ["1a"]},
+    title="Availability across Scenarios — Level 1a",
+    rename_scenarios_dict = rename_dict,
+    cmap = "RdYlGn",
+    figname = 'availability_1a.png'
+)
+
+# Plot heatmap for level 1b
+plot_availability_heatmap(
+    df=df_for_plots,
+    scenario_cols=scenario_cols,
+    y_var="item_category_clean",
+    filter_dict={"Facility_Level": ["1b"]},
+    title="Availability across Scenarios — Level 1b",
+    rename_scenarios_dict = rename_dict,
+    cmap = "RdYlGn",
+    figname = 'availability_1b.png'
+)
+'''
diff --git a/src/scripts/data_file_processing/healthsystem/consumables/generating_consumable_scenarios/generate_consumable_availability_scenarios_for_impact_analysis.py b/src/scripts/data_file_processing/healthsystem/consumables/generating_consumable_scenarios/generate_consumable_availability_scenarios_for_impact_analysis.py
index 6af7387301..32281bbe96 100644
--- a/src/scripts/data_file_processing/healthsystem/consumables/generating_consumable_scenarios/generate_consumable_availability_scenarios_for_impact_analysis.py
+++ b/src/scripts/data_file_processing/healthsystem/consumables/generating_consumable_scenarios/generate_consumable_availability_scenarios_for_impact_analysis.py
@@ -37,6 +37,7 @@
 Consumable availability is measured as probability of consumable being available at any point in time.
 """
 import datetime
+import os
 from collections import defaultdict
 from pathlib import Path
 
@@ -44,11 +45,9 @@
 import numpy as np
 import pandas as pd
 import seaborn as sns
-
-from tlo.methods.consumables import check_format_of_consumables_file
-
 #from plotnine import aes, element_text, geom_bar, ggplot, labs, theme, ylim  # for ggplots from R
 
+from tlo.methods.consumables import check_format_of_consumables_file
 
 # define a timestamp for script outputs
 timestamp = datetime.datetime.now().strftime("_%Y_%m_%d_%H_%M")
@@ -79,7 +78,7 @@ def generate_alternative_availability_scenarios(tlo_availability_df: pd.DataFram
     # Get TLO Facility_ID for each district and facility level
     mfl = pd.read_csv(resourcefilepath / "healthsystem" / "organisation" / "ResourceFile_Master_Facilities_List.csv")
     districts = set(pd.read_csv(resourcefilepath / 'demography' / 'ResourceFile_Population_2010.csv')['District'])
-    fac_levels = {'0', '1a', '1b', '2', '3', '4'}  # noqa: F841
+    fac_levels = {'0', '1a', '1b', '2', '3', '4'}
     tlo_availability_df = tlo_availability_df.merge(mfl[['District', 'Facility_Level', 'Facility_ID']],
                         on = ['Facility_ID'], how='left')
 
@@ -600,7 +599,7 @@ def interpolate_missing_with_mean(_ser):
     #------------------------------------------------------
     # Combine all scenario suffixes into a single list
     scenario_suffixes = list_of_scenario_suffixes + [f'scenario{i}' for i in range(6, 16)]
-    scenario_vars = [f'available_prop_{s}' for s in scenario_suffixes]  # noqa: F841
+    scenario_vars = [f'available_prop_{s}' for s in scenario_suffixes]
     old_vars = ['Facility_ID', 'month', 'item_code']
 
     # Prepare the full base dataframe from scenarios 6–8
diff --git a/src/tlo/methods/consumables.py b/src/tlo/methods/consumables.py
index 4b622cbea7..20255f6432 100644
--- a/src/tlo/methods/consumables.py
+++ b/src/tlo/methods/consumables.py
@@ -64,6 +64,10 @@ def __init__(
             "scenario13",
             "scenario14",
             "scenario15",
+            "scenario16",
+            "scenario17",
+            "scenario18",
+            "scenario19",
         }
 
         # Create internal items:
@@ -166,6 +170,10 @@ def _update_prob_item_codes_available(self, availability: str):
             "scenario13",
             "scenario14",
             "scenario15",
+            "scenario16",
+            "scenario17",
+            "scenario18",
+            "scenario19",
         ):
             pass  # change already picked up in `self._process_consumables_data()`
         elif availability == "all":
@@ -228,6 +236,10 @@ def _process_consumables_data(self, availability_data: pd.DataFrame, availabilit
             "scenario13",
             "scenario14",
             "scenario15",
+            "scenario16",
+            "scenario17",
+            "scenario18",
+            "scenario19",
         ):
             return availability_data.set_index(["month", "Facility_ID", "item_code"])[f"available_prop_{availability}"]
 
diff --git a/src/tlo/methods/healthsystem.py b/src/tlo/methods/healthsystem.py
index 29519a015c..89141af9d0 100644
--- a/src/tlo/methods/healthsystem.py
+++ b/src/tlo/methods/healthsystem.py
@@ -565,6 +565,7 @@ def __init__(
             )
 
     def read_parameters(self, resourcefilepath: Optional[Path] = None):
+        self.resourcefilepath = resourcefilepath
         path_to_resourcefiles_for_healthsystem = resourcefilepath / "healthsystem"
 
         # Read parameters for overall performance of the HealthSystem
@@ -646,19 +647,6 @@ def read_parameters(self, resourcefilepath: Optional[Path] = None):
             dtype={"Item_Code": int, "is_diagnostic": bool, "is_medicine": bool, "is_other": bool},
         ).set_index("Item_Code")
 
-        # Choose to read-in the updated availabilty estimates or the legacy availability estimates
-        if self.parameters["data_source_for_cons_availability_estimates"] == 'original':
-            filename_for_cons_availability_estimates = "ResourceFile_Consumables_availability_small_original.csv"
-        elif self.parameters["data_source_for_cons_availability_estimates"] == 'updated':
-            filename_for_cons_availability_estimates = "ResourceFile_Consumables_availability_small.csv"
-        else:
-            raise ValueError("data_source_for_cons_availability_estimates should be either 'original' or 'updated'")
-
-        self.parameters["availability_estimates"] = pd.read_csv(
-            path_to_resourcefiles_for_healthsystem / "consumables" / filename_for_cons_availability_estimates
-        )
-
-
         # Data on the number of beds available of each type by facility_id
         self.parameters["BedCapacity"] = pd.read_csv(
             path_to_resourcefiles_for_healthsystem / "infrastructure_and_equipment" / "ResourceFile_Bed_Capacity.csv"
@@ -764,6 +752,7 @@ def validate_clinic_configuration(self, clinic_capabilities_df: pd.DataFrame):
 
     def pre_initialise_population(self):
         """Generate the accessory classes used by the HealthSystem and pass to them the data that has been read."""
+        path_to_resourcefiles_for_healthsystem = self.resourcefilepath / "healthsystem"
 
         # Create dedicated RNGs for separate functions done by the HealthSystem module
         self.rng_for_hsi_queue = np.random.RandomState(self.rng.randint(2**31 - 1))
@@ -797,6 +786,18 @@ def pre_initialise_population(self):
         self.bed_days = BedDays(hs_module=self, availability=self.get_beds_availability())
         self.bed_days.pre_initialise_population()
 
+        # Choose to read-in the updated consumable availabilty estimates or the legacy availability estimates
+        if self.parameters["data_source_for_cons_availability_estimates"] == 'original':
+            filename_for_cons_availability_estimates = "ResourceFile_Consumables_availability_small_original.csv"
+        elif self.parameters["data_source_for_cons_availability_estimates"] == 'updated':
+            filename_for_cons_availability_estimates = "ResourceFile_Consumables_availability_small.csv"
+        else:
+            raise ValueError("data_source_for_cons_availability_estimates should be either 'original' or 'updated'")
+
+        self.parameters["availability_estimates"] = pd.read_csv(
+            path_to_resourcefiles_for_healthsystem / "consumables" / filename_for_cons_availability_estimates
+        )
+
         # Initialise the Consumables class
         self.consumables = Consumables(
             availability_data=self.update_consumables_availability_to_represent_merging_of_levels_1b_and_2(