1616"""Data Descriptions"""
1717
1818import logging
19+ from copy import copy
1920
2021import numpy as np
2122import pandas as pd
@@ -40,7 +41,7 @@ def describe(
4041 get_cr94 : bool = False ,
4142 reset_index : bool = True ,
4243 return_df : bool = False ,
43- subsample : bool = False ,
44+ subsample : bool = True ,
4445 ** kwargs ,
4546) -> pd .DataFrame | None :
4647 """Concat transposed topN rows, numerical desc & dtypes
@@ -52,12 +53,13 @@ def describe(
5253 df = df .copy ()
5354 len_idx = df .index .nlevels
5455 nbytes = df .values .nbytes
55- _log .info (f"Shape: { df .shape } " )
56+ shape = df .shape
57+ _log .info (f"Shape: { shape } " )
5658 _log .info (f"Memsize: { nbytes // 1e6 :,.1f} MB" )
5759 _log .info (f"Index levels: { df .index .names } " )
58- if nfeats + len_idx < df . shape [1 ]:
60+ if nfeats + len_idx < shape [1 ]:
5961 _log .info (
60- f"NOTE: nfeats + index shown { nfeats + len_idx } " + f" < width { df . shape [1 ]} "
62+ f"NOTE: nfeats + index shown { nfeats + len_idx } " + f" < width { shape [1 ]} "
6163 )
6264
6365 limit *= 1e6
@@ -67,6 +69,10 @@ def describe(
6769 )
6870 if subsample :
6971 df = df .sample (frac = (limit * 0.99 ) / nbytes , random_state = 42 )
72+ nbytes_pre = copy (nbytes )
73+ shape_pre = copy (shape )
74+ nbytes = df .values .nbytes
75+ shape = df .shape
7076 nobs = min (nobs , len (df ))
7177 _log .info (txt + f", taking a subsample of { len (df )} rows" )
7278 else :
@@ -157,20 +163,21 @@ def describe(
157163 if return_df :
158164 return dfout
159165 else :
160- display_fw (
161- dfout . iloc [: nfeats + len_idx , :],
162- max_rows = nfeats ,
163- shape = df . shape ,
164- nbytes = nbytes ,
165- ** kwargs ,
166- )
166+ kws_out = dict ( max_rows = nfeats , shape = shape , nbytes = nbytes )
167+ if subsample :
168+ kws_out [ "txtadd" ] = (
169+ f"subsampled from Shape: { shape_pre } ,"
170+ + f" Memsize { nbytes_pre / 1e6 :,.1f } MB"
171+ )
172+ display_fw ( dfout . iloc [: nfeats + len_idx , :], ** kws_out , ** kwargs )
167173
168174
169175def display_fw (df : pd .DataFrame , ** kwargs ) -> None :
170176 """Conv fn: contextually display max cols"""
171177
172178 shape = kwargs .pop ("shape" , df .shape )
173179 nbytes = kwargs .pop ("nbytes" , df .values .nbytes )
180+ txtadd = kwargs .pop ("txtadd" , None )
174181
175182 options = {
176183 "display.precision" : kwargs .pop ("precision" , 2 ),
@@ -185,7 +192,8 @@ def display_fw(df: pd.DataFrame, **kwargs) -> None:
185192
186193 with pd .option_context (* [i for tup in options .items () for i in tup ]):
187194 display (df )
188- display (f"Shape: { shape } , Memsize { nbytes / 1e6 :,.1f} MB" )
195+ t = f"Shape: { shape } , Memsize { nbytes / 1e6 :,.1f} MB"
196+ display (", " .join (filter (None , [t , txtadd ])))
189197
190198
191199def display_ht (df : pd .DataFrame , nrows = 3 , ** kwargs ) -> None :
0 commit comments