|
73 | 73 | Generator, |
74 | 74 | Hashable, |
75 | 75 | Iterator, |
76 | | - Sequence, |
77 | 76 | ) |
78 | 77 |
|
79 | 78 | from pandas.core.generic import NDFrame |
@@ -581,25 +580,21 @@ class BaseGrouper: |
581 | 580 | def __init__( |
582 | 581 | self, |
583 | 582 | axis: Index, |
584 | | - groupings: Sequence[grouper.Grouping], |
| 583 | + groupings: list[grouper.Grouping], |
585 | 584 | sort: bool = True, |
586 | 585 | dropna: bool = True, |
587 | 586 | ) -> None: |
588 | 587 | assert isinstance(axis, Index), axis |
589 | 588 |
|
590 | 589 | self.axis = axis |
591 | | - self._groupings: list[grouper.Grouping] = list(groupings) |
| 590 | + self._groupings = groupings |
592 | 591 | self._sort = sort |
593 | 592 | self.dropna = dropna |
594 | 593 |
|
595 | 594 | @property |
596 | 595 | def groupings(self) -> list[grouper.Grouping]: |
597 | 596 | return self._groupings |
598 | 597 |
|
599 | | - @property |
600 | | - def shape(self) -> Shape: |
601 | | - return tuple(ping.ngroups for ping in self.groupings) |
602 | | - |
603 | 598 | def __iter__(self) -> Iterator[Hashable]: |
604 | 599 | return iter(self.indices) |
605 | 600 |
|
@@ -628,11 +623,15 @@ def _get_splitter(self, data: NDFrame) -> DataSplitter: |
628 | 623 | ------- |
629 | 624 | Generator yielding subsetted objects |
630 | 625 | """ |
631 | | - ids, ngroups = self.group_info |
632 | | - return _get_splitter( |
| 626 | + if isinstance(data, Series): |
| 627 | + klass: type[DataSplitter] = SeriesSplitter |
| 628 | + else: |
| 629 | + # i.e. DataFrame |
| 630 | + klass = FrameSplitter |
| 631 | + |
| 632 | + return klass( |
633 | 633 | data, |
634 | | - ids, |
635 | | - ngroups, |
| 634 | + self.ngroups, |
636 | 635 | sorted_ids=self._sorted_ids, |
637 | 636 | sort_idx=self.result_ilocs, |
638 | 637 | ) |
@@ -692,7 +691,8 @@ def size(self) -> Series: |
692 | 691 | """ |
693 | 692 | Compute group sizes. |
694 | 693 | """ |
695 | | - ids, ngroups = self.group_info |
| 694 | + ids = self.ids |
| 695 | + ngroups = self.ngroups |
696 | 696 | out: np.ndarray | list |
697 | 697 | if ngroups: |
698 | 698 | out = np.bincount(ids[ids != -1], minlength=ngroups) |
@@ -729,12 +729,6 @@ def has_dropped_na(self) -> bool: |
729 | 729 | """ |
730 | 730 | return bool((self.ids < 0).any()) |
731 | 731 |
|
732 | | - @cache_readonly |
733 | | - def group_info(self) -> tuple[npt.NDArray[np.intp], int]: |
734 | | - result_index, ids = self.result_index_and_ids |
735 | | - ngroups = len(result_index) |
736 | | - return ids, ngroups |
737 | | - |
738 | 732 | @cache_readonly |
739 | 733 | def codes_info(self) -> npt.NDArray[np.intp]: |
740 | 734 | # return the codes of items in original grouped axis |
@@ -1123,10 +1117,6 @@ def indices(self): |
1123 | 1117 | i = bin |
1124 | 1118 | return indices |
1125 | 1119 |
|
1126 | | - @cache_readonly |
1127 | | - def group_info(self) -> tuple[npt.NDArray[np.intp], int]: |
1128 | | - return self.ids, self.ngroups |
1129 | | - |
1130 | 1120 | @cache_readonly |
1131 | 1121 | def codes(self) -> list[npt.NDArray[np.intp]]: |
1132 | 1122 | return [self.ids] |
@@ -1191,29 +1181,25 @@ class DataSplitter(Generic[NDFrameT]): |
1191 | 1181 | def __init__( |
1192 | 1182 | self, |
1193 | 1183 | data: NDFrameT, |
1194 | | - labels: npt.NDArray[np.intp], |
1195 | 1184 | ngroups: int, |
1196 | 1185 | *, |
1197 | 1186 | sort_idx: npt.NDArray[np.intp], |
1198 | 1187 | sorted_ids: npt.NDArray[np.intp], |
1199 | 1188 | ) -> None: |
1200 | 1189 | self.data = data |
1201 | | - self.labels = ensure_platform_int(labels) # _should_ already be np.intp |
1202 | 1190 | self.ngroups = ngroups |
1203 | 1191 |
|
1204 | 1192 | self._slabels = sorted_ids |
1205 | 1193 | self._sort_idx = sort_idx |
1206 | 1194 |
|
1207 | 1195 | def __iter__(self) -> Iterator: |
1208 | | - sdata = self._sorted_data |
1209 | | - |
1210 | 1196 | if self.ngroups == 0: |
1211 | 1197 | # we are inside a generator, rather than raise StopIteration |
1212 | 1198 | # we merely return signal the end |
1213 | 1199 | return |
1214 | 1200 |
|
1215 | 1201 | starts, ends = lib.generate_slices(self._slabels, self.ngroups) |
1216 | | - |
| 1202 | + sdata = self._sorted_data |
1217 | 1203 | for start, end in zip(starts, ends): |
1218 | 1204 | yield self._chop(sdata, slice(start, end)) |
1219 | 1205 |
|
@@ -1241,20 +1227,3 @@ def _chop(self, sdata: DataFrame, slice_obj: slice) -> DataFrame: |
1241 | 1227 | mgr = sdata._mgr.get_slice(slice_obj, axis=1) |
1242 | 1228 | df = sdata._constructor_from_mgr(mgr, axes=mgr.axes) |
1243 | 1229 | return df.__finalize__(sdata, method="groupby") |
1244 | | - |
1245 | | - |
1246 | | -def _get_splitter( |
1247 | | - data: NDFrame, |
1248 | | - labels: npt.NDArray[np.intp], |
1249 | | - ngroups: int, |
1250 | | - *, |
1251 | | - sort_idx: npt.NDArray[np.intp], |
1252 | | - sorted_ids: npt.NDArray[np.intp], |
1253 | | -) -> DataSplitter: |
1254 | | - if isinstance(data, Series): |
1255 | | - klass: type[DataSplitter] = SeriesSplitter |
1256 | | - else: |
1257 | | - # i.e. DataFrame |
1258 | | - klass = FrameSplitter |
1259 | | - |
1260 | | - return klass(data, labels, ngroups, sort_idx=sort_idx, sorted_ids=sorted_ids) |
0 commit comments