diff --git a/HISTORY.rst b/HISTORY.rst index 4b499475..321864b2 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -2,7 +2,11 @@ History ======= -0.1.8 (2024-08-29) +0.1.10 (2024-??-??) +------------------ +* Long EM and RPCA operations wrapped with tqdm progress bars + +0.1.9 (2024-08-29) ------------------ * Tutorials reproducibility improved with random_state parameters * RPCA now accepts random_state parameters diff --git a/README.rst b/README.rst index 1f292ebf..83af908f 100644 --- a/README.rst +++ b/README.rst @@ -70,17 +70,18 @@ With just these few lines of code, you can see how easy it is to from qolmat.utils import data # load and prepare csv data + df_data = data.get_data("Beijing") columns = ["TEMP", "PRES", "WSPM"] df_data = df_data[columns] df_with_nan = data.add_holes(df_data, ratio_masked=0.2, mean_size=120) # impute and compare - imputer_mean = imputers.ImputerSimple(strategy="mean", groups=("station",)) + imputer_median = imputers.ImputerSimple(groups=("station",)) imputer_interpol = imputers.ImputerInterpolation(method="linear", groups=("station",)) imputer_var1 = imputers.ImputerEM(model="VAR", groups=("station",), method="mle", max_iter_em=50, n_iter_ou=15, dt=1e-3, p=1) dict_imputers = { - "mean": imputer_mean, + "median": imputer_median, "interpolation": imputer_interpol, "VAR(1) process": imputer_var1 } diff --git a/pyproject.toml b/pyproject.toml index ca6327cb..7f3d6c37 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -45,6 +45,7 @@ statsmodels = ">= 0.14.0" typed-ast = { version = "*", optional = true } category-encoders = "^2.6.3" dcor = ">= 0.6" +tqdm = "*" [tool.poetry.group.torch.dependencies] torch = "< 2.5" diff --git a/qolmat/imputations/em_sampler.py b/qolmat/imputations/em_sampler.py index 449b4f68..d6845079 100644 --- a/qolmat/imputations/em_sampler.py +++ b/qolmat/imputations/em_sampler.py @@ -11,6 +11,7 @@ from scipy import optimize as spo from sklearn import utils as sku from sklearn.base import BaseEstimator, TransformerMixin +from tqdm import tqdm from qolmat.utils import utils from qolmat.utils.utils import RandomSetting @@ -433,7 +434,11 @@ def fit_X(self, X: NDArray) -> None: X = self._maximize_likelihood(X_imp, mask_na) - for iter_em in range(self.max_iter_em): + for iter_em in tqdm( + range(self.max_iter_em), + desc="EM parameters estimation", + disable=not self.verbose, + ): X = self._sample_ou(X, mask_na) self.combine_parameters() @@ -474,6 +479,7 @@ def fit(self, X: NDArray) -> "EM": if hasattr(self, "p_to_fit") and self.p_to_fit: aics: List[float] = [] for p in range(self.max_lagp + 1): + print("p=", p) self.p = p self.fit_X(X) n1, n2 = self.X.shape diff --git a/qolmat/imputations/rpca/rpca_noisy.py b/qolmat/imputations/rpca/rpca_noisy.py index 59164a3b..62bd6f08 100644 --- a/qolmat/imputations/rpca/rpca_noisy.py +++ b/qolmat/imputations/rpca/rpca_noisy.py @@ -11,6 +11,7 @@ from scipy.sparse import dok_matrix, identity from scipy.sparse.linalg import spsolve from sklearn import utils as sku +from tqdm import tqdm from qolmat.imputations.rpca import rpca_utils from qolmat.imputations.rpca.rpca import RPCA @@ -200,6 +201,7 @@ def decompose_with_basis( max_iterations=self.max_iterations, tolerance=self.tolerance, norm=self.norm, + verbose=self.verbose, ) self._check_cost_function_minimized(D, M, A, Omega, tau, lam) @@ -219,6 +221,7 @@ def minimise_loss( max_iterations: int = 10000, tolerance: float = 1e-6, norm: str = "L2", + verbose: bool = False, ) -> Tuple: """Compute the noisy RPCA with a L2 time penalisation. @@ -255,6 +258,9 @@ def minimise_loss( consecutive iterations. Defaults to 1e-6. norm : str, optional Error norm, can be "L1" or "L2". Defaults to "L2". + verbose : bool, optional + Verbosity level, if False the warnings are silenced. Defaults to + False. Returns ------- @@ -311,7 +317,11 @@ def minimise_loss( Ir = np.eye(rank) In = identity(n_rows) - for _ in range(max_iterations): + for _ in tqdm( + range(max_iterations), + desc="Noisy RPCA loss minimization", + disable=not verbose, + ): M_temp = M.copy() A_temp = A.copy() L_temp = L.copy() diff --git a/qolmat/imputations/rpca/rpca_pcp.py b/qolmat/imputations/rpca/rpca_pcp.py index afb6dea3..e018dbf9 100644 --- a/qolmat/imputations/rpca/rpca_pcp.py +++ b/qolmat/imputations/rpca/rpca_pcp.py @@ -8,6 +8,7 @@ import numpy as np from numpy.typing import NDArray from sklearn import utils as sku +from tqdm import tqdm from qolmat.imputations.rpca import rpca_utils from qolmat.imputations.rpca.rpca import RPCA @@ -125,7 +126,11 @@ def decompose(self, D: NDArray, Omega: NDArray) -> Tuple[NDArray, NDArray]: errors: NDArray = np.full((self.max_iterations,), fill_value=np.nan) M: NDArray = D - A - for iteration in range(self.max_iterations): + for iteration in tqdm( + range(self.max_iterations), + desc="RPCA PCP decomposition", + disable=not self.verbose, + ): M = rpca_utils.svd_thresholding(D - A + Y / mu, 1 / mu) A = rpca_utils.soft_thresholding(D - M + Y / mu, lam / mu) A[~Omega] = (D - M)[~Omega] diff --git a/qolmat/imputations/softimpute.py b/qolmat/imputations/softimpute.py index 912a9294..63688812 100644 --- a/qolmat/imputations/softimpute.py +++ b/qolmat/imputations/softimpute.py @@ -10,6 +10,7 @@ from numpy.typing import NDArray from sklearn import utils as sku from sklearn.base import BaseEstimator, TransformerMixin +from tqdm import tqdm from qolmat.imputations.rpca import rpca_utils from qolmat.utils import utils @@ -146,7 +147,11 @@ def decompose(self, X: NDArray, Omega: NDArray) -> Tuple[NDArray, NDArray]: B = V * D M = A @ B.T cost_start = SoftImpute.cost_function(X, M, A, Omega, tau) - for iter_ in range(self.max_iterations): + for iter_ in tqdm( + range(self.max_iterations), + desc="Soft Impute decomposition", + disable=not self.verbose, + ): U_old = U V_old = V D_old = D