Skip to content

Commit 0391f61

Browse files
authored
Update pymc to latest as-at Aug 2024. Likely many breaking changes throughout (#116)
* + update version number + update pymc, god help us all * + updated local env * + removed fastprogress.progress_bar from model_pymc.calc.compute_log_likelihood_for_potential because this package no longer part of pymc/arviz. Lots of other equivalanent changes in the pymc/arviz source function too, will need to return to compute_log_likelihood_for_potential to massively update it + minor improvement to eda.eda_io.display_image_file * + added explicit sizing for sns.set_theme 'figure.dpi':72 * + Added a useful get_cr94 to eda.describe * + improved docstrings in plot * + improved facetplot_single with common signature + improved facetplot_single and plot_posterior with transform and and * + refactored display_image_file into new function figio.read for ease and consistency * + included log_prior in sample * + added logx kwarg to plot_ppc * + now initting PandasExcelIO * + typo * + updated create_dfcmb to allow F() in fcat * + added version to PYMCIO fn * + improved model fn version * + probably time for a PR!
1 parent 6805bfd commit 0391f61

19 files changed

+397
-295
lines changed

.flake8

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
[flake8]
2-
ignore = E203, E266, W291, W293, F401, F403, E501, W503, W605, C901
2+
ignore = E203, E266, W291, W293, F401, F403, E501, W503, W605, C901, E712
33
max-line-length = 88
44
max-doc-length = 144
55
max-complexity = 18

.pre-commit-config.yaml

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ default_language_version:
44
default_stages: [commit, push]
55
repos:
66
- repo: https://github.com/pre-commit/pre-commit-hooks # general checks
7-
rev: v4.5.0
7+
rev: v4.6.0
88
hooks:
99
- id: check-added-large-files
1010
args: ['--maxkb=1024']
@@ -31,7 +31,7 @@ repos:
3131
- id: no-print-statements
3232
files: ^oreum_core/
3333
- repo: https://github.com/psf/black # black formatter
34-
rev: 23.12.1
34+
rev: 24.8.0
3535
hooks:
3636
- id: black
3737
files: ^oreum_core/
@@ -41,26 +41,26 @@ repos:
4141
- id: isort
4242
files: ^oreum_core/
4343
- repo: https://github.com/pycqa/flake8 # flake8 linter
44-
rev: 7.0.0
44+
rev: 7.1.0
4545
hooks:
4646
- id: flake8
4747
files: ^oreum_core/
4848
- repo: https://github.com/pycqa/bandit # basic security checks for python code
49-
rev: 1.7.6
49+
rev: 1.7.9
5050
hooks:
5151
- id: bandit
5252
files: ^oreum_core/
5353
args: ["--config", "pyproject.toml"]
5454
additional_dependencies: ["bandit[toml]"]
5555
- repo: https://github.com/econchick/interrogate # check for docstrings
56-
rev: 1.5.0
56+
rev: 1.7.0
5757
hooks:
5858
- id: interrogate
5959
files: ^oreum_core/
6060
args: [--config, pyproject.toml]
6161
pass_filenames: false # see https://github.com/econchick/interrogate/issues/60#issuecomment-1180262851
6262
- repo: https://gitlab.com/iam-cms/pre-commit-hooks # apply Apache2 header
63-
rev: v0.4.0
63+
rev: v0.6.0
6464
hooks:
6565
- id: apache-license
6666
files: ^oreum_core/

LICENSES_THIRD_PARTY.md

Lines changed: 176 additions & 171 deletions
Large diffs are not rendered by default.

assets/img/interrogate_badge.svg

Lines changed: 4 additions & 4 deletions
Loading

oreum_core/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
"""Core tools for use on projects by Oreum Industries"""
1616
import logging
1717

18-
__version__ = "0.8.1"
18+
__version__ = "0.9.0"
1919

2020
# logger goes to null handler by default
2121
# packages that import oreum_core can override this and direct elsewhere

oreum_core/curate/__init__.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,13 @@
1414

1515
# curate/
1616
"""Various classes & functions for data curation"""
17-
from .data_io import PandasCSVIO, PandasParquetIO, SimpleStringIO, copy_csv2md
17+
from .data_io import (
18+
PandasCSVIO,
19+
PandasExcelIO,
20+
PandasParquetIO,
21+
SimpleStringIO,
22+
copy_csv2md,
23+
)
1824
from .data_transform import (
1925
DatasetReshaper,
2026
DatatypeConverter,

oreum_core/curate/data_io.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,7 @@ def __init__(self, *args, **kwargs):
102102

103103
def read(self, fn: str, *args, **kwargs) -> pd.DataFrame:
104104
"""Read excel fn from rootdir, pass args kwargs to pd.read_excel"""
105-
fn = Path(fn).with_suffix('.xslx')
105+
fn = Path(fn).with_suffix('.xlsx')
106106
fqn = self.get_path_read(fn)
107107
_log.info(f'Read from {str(fqn.resolve())}')
108108
return pd.read_excel(str(fqn), *args, **kwargs)

oreum_core/curate/data_transform.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -228,6 +228,7 @@ def create_dfcmb(self, df: pd.DataFrame, ftsd: dict) -> pd.DataFrame:
228228
dfcmb = pd.DataFrame(index=[0])
229229
fts_factor = ftsd.get('fcat', []) + ftsd.get('fbool', [])
230230
for ft in fts_factor:
231+
ft = ft[2:-1] if ft[:2] == 'F(' else ft
231232
colnames_pre = list(dfcmb.columns.values)
232233
s = pd.Series(np.unique(df[ft]), name=ft)
233234
dfcmb = pd.concat([dfcmb, s], axis=1, join='outer', ignore_index=True)

oreum_core/eda/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
tril_nan,
2828
)
2929
from .describe import describe, display_fw, display_ht, get_fts_by_dtype
30-
from .eda_io import FigureIO, display_image_file, output_data_dict
30+
from .eda_io import FigureIO, output_data_dict
3131
from .plot import ( # plot_umap,; plot_r2_range,; plot_r2_range_pair,
3232
plot_accuracy,
3333
plot_binary_performance,

oreum_core/eda/describe.py

Lines changed: 24 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ def describe(
3636
limit: int = 50, # MB
3737
get_mode: bool = False,
3838
get_counts: bool = True,
39+
get_cr94: bool = False,
3940
reset_index: bool = True,
4041
return_df: bool = False,
4142
**kwargs,
@@ -68,7 +69,12 @@ def describe(
6869
df = df.reset_index()
6970

7071
# start with pandas describe, add on dtypes
71-
dfdesc = df.describe(include='all').T
72+
quantiles = [0.25, 0.5, 0.75] # the default
73+
percentile_names = ['25%', '50%', '75%']
74+
if get_cr94:
75+
quantiles = [0.03] + quantiles + [0.97]
76+
percentile_names = ['3%'] + percentile_names + ['97%']
77+
dfdesc = df.describe(include='all', percentiles=quantiles).T
7278

7379
dfout = pd.concat((dfdesc, df.dtypes), axis=1, join='outer', sort=False)
7480
dfout = dfout.loc[df.columns.values]
@@ -100,23 +106,23 @@ def describe(
100106
dfout.loc[ft, 'min'] = df[ft].value_counts().index.min()
101107
dfout.loc[ft, 'max'] = df[ft].value_counts().index.max()
102108

103-
fts_out_all = [
104-
'dtype',
105-
'count_null',
106-
'count_inf',
107-
'count_zero',
108-
'count_unique',
109-
'top',
110-
'freq',
111-
'sum',
112-
'mean',
113-
'std',
114-
'min',
115-
'25%',
116-
'50%',
117-
'75%',
118-
'max',
119-
]
109+
fts_out_all = (
110+
[
111+
'dtype',
112+
'count_null',
113+
'count_inf',
114+
'count_zero',
115+
'count_unique',
116+
'top',
117+
'freq',
118+
'sum',
119+
'mean',
120+
'std',
121+
'min',
122+
]
123+
+ percentile_names
124+
+ ['max']
125+
)
120126
fts_out = [f for f in fts_out_all if f in dfout.columns.values]
121127

122128
# add mode and mode count WARNING takes forever for large arrays (>10k row)

0 commit comments

Comments
 (0)