Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion HISTORY.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,11 @@
History
=======

0.1.8 (2024-08-29)
0.1.10 (2024-??-??)
------------------
* Long EM and RPCA operations wrapped with tqdm progress bars

0.1.9 (2024-08-29)
------------------
* Tutorials reproducibility improved with random_state parameters
* RPCA now accepts random_state parameters
Expand Down
5 changes: 3 additions & 2 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -70,17 +70,18 @@ With just these few lines of code, you can see how easy it is to
from qolmat.utils import data

# load and prepare csv data

df_data = data.get_data("Beijing")
columns = ["TEMP", "PRES", "WSPM"]
df_data = df_data[columns]
df_with_nan = data.add_holes(df_data, ratio_masked=0.2, mean_size=120)

# impute and compare
imputer_mean = imputers.ImputerSimple(strategy="mean", groups=("station",))
imputer_median = imputers.ImputerSimple(groups=("station",))
imputer_interpol = imputers.ImputerInterpolation(method="linear", groups=("station",))
imputer_var1 = imputers.ImputerEM(model="VAR", groups=("station",), method="mle", max_iter_em=50, n_iter_ou=15, dt=1e-3, p=1)
dict_imputers = {
"mean": imputer_mean,
"median": imputer_median,
"interpolation": imputer_interpol,
"VAR(1) process": imputer_var1
}
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ statsmodels = ">= 0.14.0"
typed-ast = { version = "*", optional = true }
category-encoders = "^2.6.3"
dcor = ">= 0.6"
tqdm = "*"

[tool.poetry.group.torch.dependencies]
torch = "< 2.5"
Expand Down
8 changes: 7 additions & 1 deletion qolmat/imputations/em_sampler.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from scipy import optimize as spo
from sklearn import utils as sku
from sklearn.base import BaseEstimator, TransformerMixin
from tqdm import tqdm

from qolmat.utils import utils
from qolmat.utils.utils import RandomSetting
Expand Down Expand Up @@ -433,7 +434,11 @@ def fit_X(self, X: NDArray) -> None:

X = self._maximize_likelihood(X_imp, mask_na)

for iter_em in range(self.max_iter_em):
for iter_em in tqdm(
range(self.max_iter_em),
desc="EM parameters estimation",
disable=not self.verbose,
):
X = self._sample_ou(X, mask_na)

self.combine_parameters()
Expand Down Expand Up @@ -474,6 +479,7 @@ def fit(self, X: NDArray) -> "EM":
if hasattr(self, "p_to_fit") and self.p_to_fit:
aics: List[float] = []
for p in range(self.max_lagp + 1):
print("p=", p)
self.p = p
self.fit_X(X)
n1, n2 = self.X.shape
Expand Down
12 changes: 11 additions & 1 deletion qolmat/imputations/rpca/rpca_noisy.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from scipy.sparse import dok_matrix, identity
from scipy.sparse.linalg import spsolve
from sklearn import utils as sku
from tqdm import tqdm

from qolmat.imputations.rpca import rpca_utils
from qolmat.imputations.rpca.rpca import RPCA
Expand Down Expand Up @@ -200,6 +201,7 @@ def decompose_with_basis(
max_iterations=self.max_iterations,
tolerance=self.tolerance,
norm=self.norm,
verbose=self.verbose,
)

self._check_cost_function_minimized(D, M, A, Omega, tau, lam)
Expand All @@ -219,6 +221,7 @@ def minimise_loss(
max_iterations: int = 10000,
tolerance: float = 1e-6,
norm: str = "L2",
verbose: bool = False,
) -> Tuple:
"""Compute the noisy RPCA with a L2 time penalisation.

Expand Down Expand Up @@ -255,6 +258,9 @@ def minimise_loss(
consecutive iterations. Defaults to 1e-6.
norm : str, optional
Error norm, can be "L1" or "L2". Defaults to "L2".
verbose : bool, optional
Verbosity level, if False the warnings are silenced. Defaults to
False.

Returns
-------
Expand Down Expand Up @@ -311,7 +317,11 @@ def minimise_loss(
Ir = np.eye(rank)
In = identity(n_rows)

for _ in range(max_iterations):
for _ in tqdm(
range(max_iterations),
desc="Noisy RPCA loss minimization",
disable=not verbose,
):
M_temp = M.copy()
A_temp = A.copy()
L_temp = L.copy()
Expand Down
7 changes: 6 additions & 1 deletion qolmat/imputations/rpca/rpca_pcp.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import numpy as np
from numpy.typing import NDArray
from sklearn import utils as sku
from tqdm import tqdm

from qolmat.imputations.rpca import rpca_utils
from qolmat.imputations.rpca.rpca import RPCA
Expand Down Expand Up @@ -125,7 +126,11 @@ def decompose(self, D: NDArray, Omega: NDArray) -> Tuple[NDArray, NDArray]:
errors: NDArray = np.full((self.max_iterations,), fill_value=np.nan)

M: NDArray = D - A
for iteration in range(self.max_iterations):
for iteration in tqdm(
range(self.max_iterations),
desc="RPCA PCP decomposition",
disable=not self.verbose,
):
M = rpca_utils.svd_thresholding(D - A + Y / mu, 1 / mu)
A = rpca_utils.soft_thresholding(D - M + Y / mu, lam / mu)
A[~Omega] = (D - M)[~Omega]
Expand Down
7 changes: 6 additions & 1 deletion qolmat/imputations/softimpute.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from numpy.typing import NDArray
from sklearn import utils as sku
from sklearn.base import BaseEstimator, TransformerMixin
from tqdm import tqdm

from qolmat.imputations.rpca import rpca_utils
from qolmat.utils import utils
Expand Down Expand Up @@ -146,7 +147,11 @@ def decompose(self, X: NDArray, Omega: NDArray) -> Tuple[NDArray, NDArray]:
B = V * D
M = A @ B.T
cost_start = SoftImpute.cost_function(X, M, A, Omega, tau)
for iter_ in range(self.max_iterations):
for iter_ in tqdm(
range(self.max_iterations),
desc="Soft Impute decomposition",
disable=not self.verbose,
):
U_old = U
V_old = V
D_old = D
Expand Down
Loading