|
| 1 | +from collections import defaultdict |
| 2 | +from dataclasses import dataclass |
| 3 | +from typing import Iterable, Mapping, MutableMapping, TypeAlias |
| 4 | + |
| 5 | +from typing_extensions import TypeVar |
| 6 | + |
| 7 | +from omnipy.util.helpers import is_iterable |
| 8 | + |
| 9 | +T = TypeVar('T') |
| 10 | + |
| 11 | +UNTITLED_KEY = '_untitled' |
| 12 | + |
| 13 | + |
| 14 | +@dataclass |
| 15 | +class SelectedKeys: |
| 16 | + singular: bool |
| 17 | + keys: tuple[str, ...] |
| 18 | + last_index: int = -1 |
| 19 | + |
| 20 | + |
| 21 | +Key2DataItemType: TypeAlias = dict[str, tuple[str, T] | None] |
| 22 | +Index2DataItemsType: TypeAlias = defaultdict[int, list[tuple[str, T]]] |
| 23 | +MappingType: TypeAlias = MutableMapping[str, T] |
| 24 | + |
| 25 | + |
| 26 | +def select_keys(selector: str | int | slice | Iterable[str | int], |
| 27 | + mapping: MappingType[T]) -> SelectedKeys: |
| 28 | + if isinstance(selector, str): |
| 29 | + return SelectedKeys(singular=True, keys=(selector,)) |
| 30 | + else: |
| 31 | + data_keys = tuple(mapping.keys()) |
| 32 | + |
| 33 | + if isinstance(selector, int): |
| 34 | + return SelectedKeys(singular=True, keys=(data_keys[selector],)) |
| 35 | + |
| 36 | + if isinstance(selector, slice): |
| 37 | + last_index = selector.indices(len(data_keys))[1] - 1 |
| 38 | + return SelectedKeys(singular=False, keys=data_keys[selector], last_index=last_index) |
| 39 | + |
| 40 | + elif is_iterable(selector): |
| 41 | + keys = tuple(data_keys[_] if isinstance(_, int) else _ for _ in selector) |
| 42 | + if keys and keys[-1] in data_keys: |
| 43 | + last_index = data_keys.index(keys[-1]) |
| 44 | + else: |
| 45 | + last_index = len(data_keys) - 1 |
| 46 | + return SelectedKeys(singular=False, keys=keys, last_index=last_index) |
| 47 | + |
| 48 | + else: |
| 49 | + raise KeyError('Selector is of incorrect type. Must be a string, a positive integer,' |
| 50 | + 'or a slice (e.g. `dataset[2:5]`).') |
| 51 | + |
| 52 | + |
| 53 | +def prepare_selected_items_with_mapping_data( |
| 54 | + keys: tuple[str, ...], |
| 55 | + last_index: int, |
| 56 | + data_obj: MappingType[T], |
| 57 | +) -> tuple[Key2DataItemType[T], Index2DataItemsType[T]]: |
| 58 | + |
| 59 | + data_obj_keys = tuple(data_obj.keys()) |
| 60 | + key_2_data_item: Key2DataItemType[T] = {} |
| 61 | + index_2_data_items: Index2DataItemsType[T] = defaultdict(list) |
| 62 | + |
| 63 | + for i, data_key in enumerate(data_obj.keys()): |
| 64 | + if i < len(keys): |
| 65 | + key_2_data_item[keys[i]] = (data_key, data_obj[data_key]) |
| 66 | + else: |
| 67 | + index_2_data_items[last_index].extend((key, data_obj[key]) for key in data_obj_keys[i:]) |
| 68 | + break |
| 69 | + |
| 70 | + if len(keys) > len(data_obj_keys): |
| 71 | + for key in keys[len(data_obj_keys):]: |
| 72 | + key_2_data_item[key] = None |
| 73 | + |
| 74 | + return key_2_data_item, index_2_data_items |
| 75 | + |
| 76 | + |
| 77 | +def prepare_selected_items_with_iterable_data( |
| 78 | + keys: tuple[str, ...], |
| 79 | + last_index: int, |
| 80 | + data_obj: tuple[T, ...], |
| 81 | + mapping: MappingType[T], |
| 82 | +) -> tuple[Key2DataItemType[T], Index2DataItemsType[T]]: |
| 83 | + |
| 84 | + key_2_data_item: Key2DataItemType[T] = {} |
| 85 | + index_2_data_items: Index2DataItemsType[T] = defaultdict(list) |
| 86 | + |
| 87 | + for i, data_val in enumerate(data_obj): |
| 88 | + if i < len(keys): |
| 89 | + if keys[i] in mapping: |
| 90 | + key_2_data_item[keys[i]] = (keys[i], data_val) |
| 91 | + else: |
| 92 | + index_2_data_items[last_index].append((keys[i], data_val)) |
| 93 | + else: |
| 94 | + index_2_data_items[last_index].extend( |
| 95 | + (UNTITLED_KEY, val) for j, val in enumerate(data_obj[i:])) |
| 96 | + break |
| 97 | + |
| 98 | + if len(keys) > len(data_obj): |
| 99 | + for key in keys[len(data_obj):]: |
| 100 | + key_2_data_item[key] = None |
| 101 | + |
| 102 | + return key_2_data_item, index_2_data_items |
| 103 | + |
| 104 | + |
| 105 | +def create_updated_mapping( |
| 106 | + mapping: MappingType[T], |
| 107 | + key_2_data_item: Key2DataItemType[T], |
| 108 | + index_2_data_item: Index2DataItemsType[T], |
| 109 | +) -> MappingType[T]: |
| 110 | + |
| 111 | + updated_mapping: dict[str, T] = {} |
| 112 | + |
| 113 | + uniquely_add_extra_items_by_index_to_mapping(-1, index_2_data_item, updated_mapping) |
| 114 | + |
| 115 | + for i, (key, val) in enumerate(mapping.items()): |
| 116 | + if key in key_2_data_item: |
| 117 | + uniquely_add_item_by_key_to_mapping_if_val(key, key_2_data_item, updated_mapping) |
| 118 | + else: |
| 119 | + uniquely_add_item_to_mapping(key, val, updated_mapping) |
| 120 | + |
| 121 | + uniquely_add_extra_items_by_index_to_mapping(i, index_2_data_item, updated_mapping) |
| 122 | + |
| 123 | + return updated_mapping |
| 124 | + |
| 125 | + |
| 126 | +def uniquely_add_extra_items_by_index_to_mapping( |
| 127 | + index: int, |
| 128 | + index_2_data_item: Index2DataItemsType[T], |
| 129 | + mapping: MappingType[T], |
| 130 | +) -> None: |
| 131 | + if index in index_2_data_item: |
| 132 | + for key, val in index_2_data_item[index]: |
| 133 | + uniquely_add_item_to_mapping(key, val, mapping) |
| 134 | + |
| 135 | + |
| 136 | +def uniquely_add_item_by_key_to_mapping_if_val( |
| 137 | + key: str, |
| 138 | + key_2_data_item: Key2DataItemType[T], |
| 139 | + mapping: MappingType[T], |
| 140 | +): |
| 141 | + data_item = key_2_data_item[key] |
| 142 | + if data_item is not None: |
| 143 | + key, val = data_item |
| 144 | + uniquely_add_item_to_mapping(key, val, mapping) |
| 145 | + |
| 146 | + |
| 147 | +def uniquely_add_item_to_mapping(key: str, val: T, mapping: MappingType[T]) -> None: |
| 148 | + mapping[make_unique_key(key, mapping)] = val |
| 149 | + |
| 150 | + |
| 151 | +def make_unique_key(key: str, mapping: MappingType[T]) -> str: |
| 152 | + while key in mapping: |
| 153 | + if is_duplicate_name(key): |
| 154 | + key = increase_duplicate_count(key) |
| 155 | + else: |
| 156 | + key = f'{key}_2' |
| 157 | + return key |
| 158 | + |
| 159 | + |
| 160 | +def is_duplicate_name(key: str) -> bool: |
| 161 | + splitted_key = key.rsplit('_', 1) |
| 162 | + return len(splitted_key) == 2 and splitted_key[1].isdigit() |
| 163 | + |
| 164 | + |
| 165 | +def increase_duplicate_count(key: str) -> str: |
| 166 | + key, count = key.rsplit('_', 1) |
| 167 | + return f'{key}_{int(count) + 1}' |
0 commit comments