amazon-science
diff --git a/‎pyproject.toml‎
Lines changed: 1 addition & 1 deletion b/‎pyproject.toml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/causal_validation/data.py‎
Lines changed: 103 additions & 3 deletions b/‎src/causal_validation/data.py‎
Lines changed: 103 additions & 3 deletions
diff --git a/‎src/causal_validation/validation/placebo.py‎
Lines changed: 1 addition & 8 deletions b/‎src/causal_validation/validation/placebo.py‎
Lines changed: 1 addition & 8 deletions
diff --git a/‎src/causal_validation/validation/rmspe.py‎
Lines changed: 1 addition & 5 deletions b/‎src/causal_validation/validation/rmspe.py‎
Lines changed: 1 addition & 5 deletions
@@ -171,7 +171,7 @@ select = [
   "TID",
   "ISC",
 ]
-ignore = ["F722"]
+ignore = ["F722", "PLW1641"]
 
 [tool.ruff.format]
 quote-style = "double"
 
@@ -21,15 +21,45 @@
 
 @dataclass
 class Dataset:
+    """A causal inference dataset containing pre/post intervention observations
+    and optional associated covariates.
+
+    Attributes:
+        Xtr: Pre-intervention control unit observations (N x D)
+        Xte: Post-intervention control unit observations (M x D)
+        ytr: Pre-intervention treated unit observations (N x 1)
+        yte: Post-intervention treated unit observations (M x 1)
+        _start_date: Start date for time indexing
+        Ptr: Pre-intervention control unit covariates (N x D x F)
+        Pte: Post-intervention control unit covariates (M x D x F)
+        Rtr: Pre-intervention treated unit covariates (N x 1 x F)
+        Rte: Post-intervention treated unit covariates (M x 1 x F)
+        counterfactual: Optional counterfactual outcomes (M x 1)
+        synthetic: Optional synthetic control outcomes (M x 1).
+            This is weighted combination of control units
+            minimizing a distance-based error w.r.t. the
+            treated in pre-intervention period.
+        _name: Optional name identifier for the dataset
+    """
     Xtr: Float[np.ndarray, "N D"]
     Xte: Float[np.ndarray, "M D"]
     ytr: Float[np.ndarray, "N 1"]
     yte: Float[np.ndarray, "M 1"]
     _start_date: dt.date
+    Ptr: tp.Optional[Float[np.ndarray, "N D F"]] = None
+    Pte: tp.Optional[Float[np.ndarray, "M D F"]] = None
+    Rtr: tp.Optional[Float[np.ndarray, "N 1 F"]] = None
+    Rte: tp.Optional[Float[np.ndarray, "M 1 F"]] = None
     counterfactual: tp.Optional[Float[np.ndarray, "M 1"]] = None
     synthetic: tp.Optional[Float[np.ndarray, "M 1"]] = None
     _name: str = None
 
+    def __post_init__(self):
+        covariates = [self.Ptr, self.Pte, self.Rtr, self.Rte]
+        self.has_covariates = all(cov is not None for cov in covariates)
+        if not self.has_covariates:
+            assert all(cov is None for cov in covariates)
+
     def to_df(
         self, index_start: str = dt.date(year=2023, month=1, day=1)
     ) -> pd.DataFrame:
@@ -59,6 +89,13 @@ def n_units(self) -> int:
     def n_timepoints(self) -> int:
         return self.n_post_intervention + self.n_pre_intervention
 
+    @property
+    def n_covariates(self) -> int:
+        if self.has_covariates:
+            return self.Ptr.shape[2]
+        else:
+            return 0
+
     @property
     def control_units(self) -> Float[np.ndarray, "{self.n_timepoints} {self.n_units}"]:
         return np.vstack([self.Xtr, self.Xte])
@@ -67,6 +104,26 @@ def control_units(self) -> Float[np.ndarray, "{self.n_timepoints} {self.n_units}
     def treated_units(self) -> Float[np.ndarray, "{self.n_timepoints} 1"]:
         return np.vstack([self.ytr, self.yte])
 
+    @property
+    def control_covariates(
+        self,
+    ) -> tp.Optional[
+        Float[np.ndarray, "{self.n_timepoints} {self.n_units} {self.n_covariates}"]
+    ]:
+        if self.has_covariates:
+            return np.vstack([self.Ptr, self.Pte])
+        else:
+            return None
+
+    @property
+    def treated_covariates(
+        self,
+    ) -> tp.Optional[Float[np.ndarray, "{self.n_timepoints} 1 {self.n_covariates}"]]:
+        if self.has_covariates:
+            return np.vstack([self.Rtr, self.Rte])
+        else:
+            return None
+
     @property
     def pre_intervention_obs(
         self,
@@ -79,6 +136,32 @@ def post_intervention_obs(
     ) -> tp.Tuple[Float[np.ndarray, "M D"], Float[np.ndarray, "M 1"]]:
         return self.Xte, self.yte
 
+    @property
+    def pre_intervention_covariates(
+        self,
+    ) -> tp.Optional[
+        tp.Tuple[
+            Float[np.ndarray, "N D F"], Float[np.ndarray, "N 1 F"],
+        ]
+    ]:
+        if self.has_covariates:
+            return self.Ptr, self.Rtr
+        else:
+            return None
+
+    @property
+    def post_intervention_covariates(
+        self,
+    ) -> tp.Optional[
+        tp.Tuple[
+            Float[np.ndarray, "M D F"], Float[np.ndarray, "M 1 F"],
+        ]
+    ]:
+        if self.has_covariates:
+            return self.Pte, self.Rte
+        else:
+            return None
+
     @property
     def full_index(self) -> DatetimeIndex:
         return self._get_index(self._start_date)
@@ -97,7 +180,12 @@ def get_index(self, period: InterventionTypes) -> DatetimeIndex:
             return self.full_index
 
     def _get_columns(self) -> tp.List[str]:
-        colnames = ["T"] + [f"C{i}" for i in range(self.n_units)]
+        if self.has_covariates:
+            colnames = ["T"] + [f"C{i}" for i in range(self.n_units)] + [
+                f"F{i}" for i in range(self.n_covariates)
+            ]
+        else:
+            colnames = ["T"] + [f"C{i}" for i in range(self.n_units)]
         return colnames
 
     def _get_index(self, start_date: dt.date) -> DatetimeIndex:
@@ -116,7 +204,10 @@ def inflate(self, inflation_vals: Float[np.ndarray, "M 1"]) -> Dataset:
         Xtr, ytr = [deepcopy(i) for i in self.pre_intervention_obs]
         Xte, yte = [deepcopy(i) for i in self.post_intervention_obs]
         inflated_yte = yte * inflation_vals
-        return Dataset(Xtr, Xte, ytr, inflated_yte, self._start_date, yte)
+        return Dataset(
+            Xtr, Xte, ytr, inflated_yte, self._start_date,
+            self.Ptr, self.Pte, self.Rtr, self.Rte, yte, self.synthetic, self._name
+        )
 
     def __eq__(self, other: Dataset) -> bool:
         ytr = np.allclose(self.ytr, other.ytr)
@@ -151,14 +242,21 @@ def _slots(self) -> tp.Dict[str, int]:
     def drop_unit(self, idx: int) -> Dataset:
         Xtr = np.delete(self.Xtr, [idx], axis=1)
         Xte = np.delete(self.Xte, [idx], axis=1)
+        Ptr = np.delete(self.Ptr, [idx], axis=1) if self.Ptr is not None else None
+        Pte = np.delete(self.Pte, [idx], axis=1) if self.Pte is not None else None
         return Dataset(
             Xtr,
             Xte,
             self.ytr,
             self.yte,
             self._start_date,
+            Ptr,
+            Pte,
+            self.Rtr,
+            self.Rte,
             self.counterfactual,
             self.synthetic,
+            self._name,
         )
 
     def to_placebo_data(self, to_treat_idx: int) -> Dataset:
@@ -212,5 +310,7 @@ def reassign_treatment(
     Xtr = data.Xtr
     Xte = data.Xte
     return Dataset(
-        Xtr, Xte, ytr, yte, data._start_date, data.counterfactual, data.synthetic
+        Xtr, Xte, ytr, yte, data._start_date,
+        data.Ptr, data.Pte, data.Rtr, data.Rte,
+        data.counterfactual, data.synthetic, data._name
     )
@@ -1,7 +1,6 @@
 from dataclasses import dataclass
 import typing as tp
 
-from azcausal.core.effect import Effect
 import numpy as np
 import pandas as pd
 from pandera import (
@@ -11,14 +10,8 @@
 )
 from rich.progress import (
     Progress,
-    ProgressBar,
-    track,
 )
 from scipy.stats import ttest_1samp
-from tqdm import (
-    tqdm,
-    trange,
-)
 
 from causal_validation.data import (
     Dataset,
@@ -108,7 +101,7 @@ def execute(self, verbose: bool = True) -> PlaceboTestResult:
                 "[blue]Datasets", total=n_datasets, visible=verbose
             )
             unit_task = progress.add_task(
-                f"[green]Control Units",
+                "[green]Control Units",
                 total=n_control,
                 visible=verbose,
             )
 
@@ -2,18 +2,14 @@
 import typing as tp
 
 from jaxtyping import Float
-import numpy as np
 import pandas as pd
 from pandera import (
     Check,
     Column,
     DataFrameSchema,
 )
-from rich import box
 from rich.progress import (
     Progress,
-    ProgressBar,
-    track,
 )
 
 from causal_validation.validation.placebo import PlaceboTest
@@ -87,7 +83,7 @@ def execute(self, verbose: bool = True) -> RMSPETestResult:
                 "[blue]Datasets", total=n_datasets, visible=verbose
             )
             unit_task = progress.add_task(
-                f"[green]Treatment and Control Units",
+                "[green]Treatment and Control Units",
                 total=n_control + 1,
                 visible=verbose,
             )
Original file line number	Diff line number	Diff line change
`@@ -171,7 +171,7 @@ select = [`
`171`	`171`	`"TID",`
`172`	`172`	`"ISC",`
`173`	`173`	`]`
`174`		`-ignore = ["F722"]`
	`174`	`+ignore = ["F722", "PLW1641"]`
`175`	`175`
`176`	`176`	`[tool.ruff.format]`
`177`	`177`	`quote-style = "double"`