Skip to content

Commit fc56b98

Browse files
committed
Implemented flexible __setitem__ of Dataset
1 parent feb5277 commit fc56b98

File tree

3 files changed

+372
-31
lines changed

3 files changed

+372
-31
lines changed

src/omnipy/data/dataset.py

Lines changed: 69 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
import os
55
import tarfile
66
from tempfile import TemporaryDirectory
7-
from typing import Any, Callable, cast, Generic, Iterator
7+
from typing import Any, Callable, cast, Generic, Iterator, MutableMapping
88
from urllib.parse import ParseResult, urlparse
99

1010
import humanize
@@ -23,6 +23,12 @@
2323
is_model_instance,
2424
waiting_for_terminal_repr)
2525
from omnipy.data.model import Model
26+
from omnipy.data.selector import (create_updated_mapping,
27+
Index2DataItemsType,
28+
Key2DataItemType,
29+
prepare_selected_items_with_iterable_data,
30+
prepare_selected_items_with_mapping_data,
31+
select_keys)
2632
from omnipy.util.helpers import (get_calling_module_name,
2733
get_default_if_typevar,
2834
is_iterable,
@@ -272,40 +278,73 @@ def _get_standard_field_description(cls) -> str:
272278
'particular specialization of the Model class. Both main classes are wrapping '
273279
'the excellent Python package named `pydantic`.')
274280

275-
def __setitem__(self, data_file: str, data_obj: Any) -> None:
276-
has_prev_value = data_file in self.data
277-
prev_value = self.data.get(data_file)
281+
def __getitem__(self, selector: str | int | slice | Iterable[str | int]) -> Any:
282+
selected_keys = select_keys(selector, self.data)
283+
284+
if selected_keys.singular:
285+
return self.data[selected_keys.keys[0]]
286+
else:
287+
return self.__class__({key: self.data[key] for key in selected_keys.keys})
288+
289+
def __setitem__(
290+
self,
291+
selector: str | int | slice | Iterable[str | int],
292+
data_obj: dict[str, ModelT] | Iterable[ModelT] | ModelT,
293+
) -> None:
294+
selected_keys = select_keys(selector, self.data)
295+
296+
if selected_keys.singular:
297+
self._set_data_file_and_validate(selected_keys.keys[0], cast(ModelT, data_obj))
298+
else:
299+
key_2_data_item: Key2DataItemType[ModelT]
300+
index_2_data_items: Index2DataItemsType[ModelT]
301+
302+
if isinstance(data_obj, MutableMapping):
303+
key_2_data_item, index_2_data_items = \
304+
prepare_selected_items_with_mapping_data(
305+
selected_keys.keys, selected_keys.last_index, data_obj,)
306+
307+
elif is_iterable(data_obj):
308+
key_2_data_item, index_2_data_items = \
309+
prepare_selected_items_with_iterable_data(
310+
selected_keys.keys, selected_keys.last_index, tuple(data_obj), self.data)
278311

279-
try:
280-
self.data[data_file] = data_obj
281-
self._validate(data_file)
282-
except: # noqa
283-
if has_prev_value:
284-
self.data[data_file] = prev_value
285312
else:
286-
del self.data[data_file]
313+
raise TypeError('Data object must be a mapping or an iterable')
314+
315+
self._update_selected_items_with_data_items(key_2_data_item, index_2_data_items)
316+
317+
def _update_selected_items_with_data_items(
318+
self,
319+
key_2_data_item: Key2DataItemType[ModelT],
320+
index_2_data_item: Index2DataItemsType[ModelT],
321+
) -> None:
322+
323+
updated_mapping = create_updated_mapping(self.data, key_2_data_item, index_2_data_item)
324+
self._replace_data_with_mapping(updated_mapping)
325+
326+
def _replace_data_with_mapping(self, updated_mapping):
327+
prev_data = self.data
328+
try:
329+
self.absorb_and_replace(self.__class__(updated_mapping))
330+
except Exception:
331+
self.data = prev_data
287332
raise
288333

289-
def __getitem__(self, selector: str | int | slice | Iterable[str | int]) -> Any:
290-
if isinstance(selector, str):
291-
if selector in self.data:
292-
return self.data[selector]
293-
else:
294-
return self.data[selector]
295-
else:
296-
data_keys = tuple(self.data.keys())
297-
298-
if isinstance(selector, int):
299-
return self.data[data_keys[selector]]
300-
elif isinstance(selector, slice):
301-
return self.__class__({key: self.data[key] for key in data_keys[selector]})
302-
elif is_iterable(selector):
303-
selected_keys = [data_keys[_] if isinstance(_, int) else _ for _ in selector]
304-
return self.__class__({key: self.data[key] for key in selected_keys})
334+
def _set_data_file_and_validate(self, key: str, val: ModelT) -> None:
335+
has_prev_value = key in self.data
336+
if has_prev_value:
337+
prev_value = self.data[key]
338+
339+
try:
340+
self.data[key] = val
341+
self._validate(key)
342+
except Exception:
343+
if has_prev_value:
344+
self.data[key] = prev_value
305345
else:
306-
raise KeyError(
307-
'Selector is of incorrect type. Must be a string, a positive integer,'
308-
'or a slice (e.g. `dataset[2:5]`).')
346+
del self.data[key]
347+
raise
309348

310349
@classmethod
311350
def update_forward_refs(cls, **localns: Any) -> None:

src/omnipy/data/selector.py

Lines changed: 167 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,167 @@
1+
from collections import defaultdict
2+
from dataclasses import dataclass
3+
from typing import Iterable, Mapping, MutableMapping, TypeAlias
4+
5+
from typing_extensions import TypeVar
6+
7+
from omnipy.util.helpers import is_iterable
8+
9+
T = TypeVar('T')
10+
11+
UNTITLED_KEY = '_untitled'
12+
13+
14+
@dataclass
15+
class SelectedKeys:
16+
singular: bool
17+
keys: tuple[str, ...]
18+
last_index: int = -1
19+
20+
21+
Key2DataItemType: TypeAlias = dict[str, tuple[str, T] | None]
22+
Index2DataItemsType: TypeAlias = defaultdict[int, list[tuple[str, T]]]
23+
MappingType: TypeAlias = MutableMapping[str, T]
24+
25+
26+
def select_keys(selector: str | int | slice | Iterable[str | int],
27+
mapping: MappingType[T]) -> SelectedKeys:
28+
if isinstance(selector, str):
29+
return SelectedKeys(singular=True, keys=(selector,))
30+
else:
31+
data_keys = tuple(mapping.keys())
32+
33+
if isinstance(selector, int):
34+
return SelectedKeys(singular=True, keys=(data_keys[selector],))
35+
36+
if isinstance(selector, slice):
37+
last_index = selector.indices(len(data_keys))[1] - 1
38+
return SelectedKeys(singular=False, keys=data_keys[selector], last_index=last_index)
39+
40+
elif is_iterable(selector):
41+
keys = tuple(data_keys[_] if isinstance(_, int) else _ for _ in selector)
42+
if keys and keys[-1] in data_keys:
43+
last_index = data_keys.index(keys[-1])
44+
else:
45+
last_index = len(data_keys) - 1
46+
return SelectedKeys(singular=False, keys=keys, last_index=last_index)
47+
48+
else:
49+
raise KeyError('Selector is of incorrect type. Must be a string, a positive integer,'
50+
'or a slice (e.g. `dataset[2:5]`).')
51+
52+
53+
def prepare_selected_items_with_mapping_data(
54+
keys: tuple[str, ...],
55+
last_index: int,
56+
data_obj: MappingType[T],
57+
) -> tuple[Key2DataItemType[T], Index2DataItemsType[T]]:
58+
59+
data_obj_keys = tuple(data_obj.keys())
60+
key_2_data_item: Key2DataItemType[T] = {}
61+
index_2_data_items: Index2DataItemsType[T] = defaultdict(list)
62+
63+
for i, data_key in enumerate(data_obj.keys()):
64+
if i < len(keys):
65+
key_2_data_item[keys[i]] = (data_key, data_obj[data_key])
66+
else:
67+
index_2_data_items[last_index].extend((key, data_obj[key]) for key in data_obj_keys[i:])
68+
break
69+
70+
if len(keys) > len(data_obj_keys):
71+
for key in keys[len(data_obj_keys):]:
72+
key_2_data_item[key] = None
73+
74+
return key_2_data_item, index_2_data_items
75+
76+
77+
def prepare_selected_items_with_iterable_data(
78+
keys: tuple[str, ...],
79+
last_index: int,
80+
data_obj: tuple[T, ...],
81+
mapping: MappingType[T],
82+
) -> tuple[Key2DataItemType[T], Index2DataItemsType[T]]:
83+
84+
key_2_data_item: Key2DataItemType[T] = {}
85+
index_2_data_items: Index2DataItemsType[T] = defaultdict(list)
86+
87+
for i, data_val in enumerate(data_obj):
88+
if i < len(keys):
89+
if keys[i] in mapping:
90+
key_2_data_item[keys[i]] = (keys[i], data_val)
91+
else:
92+
index_2_data_items[last_index].append((keys[i], data_val))
93+
else:
94+
index_2_data_items[last_index].extend(
95+
(UNTITLED_KEY, val) for j, val in enumerate(data_obj[i:]))
96+
break
97+
98+
if len(keys) > len(data_obj):
99+
for key in keys[len(data_obj):]:
100+
key_2_data_item[key] = None
101+
102+
return key_2_data_item, index_2_data_items
103+
104+
105+
def create_updated_mapping(
106+
mapping: MappingType[T],
107+
key_2_data_item: Key2DataItemType[T],
108+
index_2_data_item: Index2DataItemsType[T],
109+
) -> MappingType[T]:
110+
111+
updated_mapping: dict[str, T] = {}
112+
113+
uniquely_add_extra_items_by_index_to_mapping(-1, index_2_data_item, updated_mapping)
114+
115+
for i, (key, val) in enumerate(mapping.items()):
116+
if key in key_2_data_item:
117+
uniquely_add_item_by_key_to_mapping_if_val(key, key_2_data_item, updated_mapping)
118+
else:
119+
uniquely_add_item_to_mapping(key, val, updated_mapping)
120+
121+
uniquely_add_extra_items_by_index_to_mapping(i, index_2_data_item, updated_mapping)
122+
123+
return updated_mapping
124+
125+
126+
def uniquely_add_extra_items_by_index_to_mapping(
127+
index: int,
128+
index_2_data_item: Index2DataItemsType[T],
129+
mapping: MappingType[T],
130+
) -> None:
131+
if index in index_2_data_item:
132+
for key, val in index_2_data_item[index]:
133+
uniquely_add_item_to_mapping(key, val, mapping)
134+
135+
136+
def uniquely_add_item_by_key_to_mapping_if_val(
137+
key: str,
138+
key_2_data_item: Key2DataItemType[T],
139+
mapping: MappingType[T],
140+
):
141+
data_item = key_2_data_item[key]
142+
if data_item is not None:
143+
key, val = data_item
144+
uniquely_add_item_to_mapping(key, val, mapping)
145+
146+
147+
def uniquely_add_item_to_mapping(key: str, val: T, mapping: MappingType[T]) -> None:
148+
mapping[make_unique_key(key, mapping)] = val
149+
150+
151+
def make_unique_key(key: str, mapping: MappingType[T]) -> str:
152+
while key in mapping:
153+
if is_duplicate_name(key):
154+
key = increase_duplicate_count(key)
155+
else:
156+
key = f'{key}_2'
157+
return key
158+
159+
160+
def is_duplicate_name(key: str) -> bool:
161+
splitted_key = key.rsplit('_', 1)
162+
return len(splitted_key) == 2 and splitted_key[1].isdigit()
163+
164+
165+
def increase_duplicate_count(key: str) -> str:
166+
key, count = key.rsplit('_', 1)
167+
return f'{key}_{int(count) + 1}'

0 commit comments

Comments
 (0)