diff --git a/immutablecollections/_immutabledict.py b/immutablecollections/_immutabledict.py index 152cb47..97b9095 100644 --- a/immutablecollections/_immutabledict.py +++ b/immutablecollections/_immutabledict.py @@ -2,11 +2,9 @@ from typing import ( Callable, Dict, - Generic, Iterable, Iterator, Mapping, - MutableMapping, Optional, Set, Tuple, @@ -134,7 +132,7 @@ def of(dict_: AllowableSourceType) -> "ImmutableDict[KT, VT]": if isinstance(dict_, ImmutableDict): return dict_ else: - return ImmutableDict.builder().put_all(dict_).build() # type:ignore + return immutabledict(dict_) @staticmethod def empty() -> "ImmutableDict[KT, VT]": @@ -143,14 +141,6 @@ def empty() -> "ImmutableDict[KT, VT]": """ return _EMPTY - @staticmethod - def builder() -> "ImmutableDict.Builder[KT, VT]": - """ - Deprecated - prefer to build a list of tuples and pass them to the ``immutabledict`` - module-level factory - """ - return ImmutableDict.Builder() - @staticmethod def index( items: Iterable[VT], key_function: Callable[[VT], KT] @@ -163,9 +153,6 @@ def index( """ return immutabledict((key_function(item), item) for item in items) - def modified_copy_builder(self) -> "ImmutableDict.Builder[KT, VT]": - return ImmutableDict.Builder(source=self) - def filter_keys(self, predicate: Callable[[KT], bool]) -> "ImmutableDict[KT, VT]": """ Filters an ImmutableDict by a predicate on its keys. @@ -197,60 +184,6 @@ def __reduce__(self): _repr = tuple(self.items()) return (immutabledict, (_repr,)) - class Builder(Generic[KT2, VT2]): - def __init__(self, source: "ImmutableDict[KT2,VT2]" = None) -> None: - self._dict: MutableMapping[KT2, VT2] = {} - self.source = source - - def put(self: SelfType, key: KT2, val: VT2) -> SelfType: - if self.source: - # we only lazily copy the contents of source because if no changes are ever made - # we can just reuse it - # we need the temporary variable because the call to put_all below will - # call this put method again and we need self.source to be None to avoid an - # infinite loop - tmp_source = self.source - # Defend against multithreading scenario where another thread has cleared - # self.source already. Not that this code is meant to be thread-safe anyway, - # but at least you won't get non-deterministic crashes - if tmp_source is not None: - self.source = None - self.put_all(tmp_source) - - self._dict[key] = val - return self - - def put_all( - self: SelfType, data: Union[Mapping[KT2, VT2], Iterable[IT2]] - ) -> SelfType: - if isinstance(data, Mapping): - for (k, v) in data.items(): - self.put(k, v) - elif isinstance(data, Iterable): - # mypy is confused - for (k, v) in data: # type: ignore - self.put(k, v) - else: - raise TypeError( - "Can only initialize ImmutableDict from another dictionary or " - "a sequence of key-value pairs" - ) - return self - - def __setitem__(self, key: KT2, value: VT2) -> None: - self.put(key, value) - - def build(self) -> "ImmutableDict[KT2, VT2]": - if self.source: - # if any puts were done this will be None. If no puts were done we can return - # the ImmutableDict we were based on because we will be identical and immutable - # objects can be safely shared - return self.source - if self._dict: - return _RegularDictBackedImmutableDict(self._dict) - else: - return _EMPTY - class _RegularDictBackedImmutableDict(ImmutableDict[KT, VT]): __slots__ = ("_dict", "_hash") diff --git a/immutablecollections/_immutablemultidict.py b/immutablecollections/_immutablemultidict.py index b334329..701b76f 100644 --- a/immutablecollections/_immutablemultidict.py +++ b/immutablecollections/_immutablemultidict.py @@ -321,10 +321,9 @@ def __init__( source: Optional["ImmutableMultiDict[KT2,VT2]"] = None, order_key: Callable[[VT2], Any] = None, ) -> None: - self._dict: MutableMapping[KT2, ImmutableSet.Builder[VT2]] = defaultdict( - lambda: ImmutableSet.builder(order_key=order_key) - ) + self._dict: MutableMapping[KT2, List[VT2]] = defaultdict(list) self._source = source + self._order_key = order_key self._dirty = False def put(self: SelfType, key: KT2, value: VT2) -> SelfType: @@ -344,7 +343,7 @@ def put(self: SelfType, key: KT2, value: VT2) -> SelfType: for v in tmp_source[k]: self.put(k, v) - self._dict[key].add(value) + self._dict[key].append(value) self._dirty = True return self @@ -367,7 +366,10 @@ def build(self) -> "ImmutableSetMultiDict[KT2, VT2]": result: ImmutableSetMultiDict[ KT2, VT2 ] = _ImmutableDictBackedImmutableSetMultiDict( - {k: v.build() for (k, v) in self._dict.items()} # type: ignore + { + k: immutableset(v, order_key=self._order_key) + for (k, v) in self._dict.items() + } # type: ignore ) # item type doesn't matter on empty collections return ( diff --git a/immutablecollections/_immutableset.py b/immutablecollections/_immutableset.py index 0abe6fe..282fb0a 100644 --- a/immutablecollections/_immutableset.py +++ b/immutablecollections/_immutableset.py @@ -1,16 +1,14 @@ -from abc import ABCMeta, abstractmethod +from abc import ABCMeta from typing import ( AbstractSet, Any, Callable, - Container, FrozenSet, Generic, ItemsView, Iterable, Iterator, KeysView, - List, MutableSet, Optional, Sequence, @@ -39,6 +37,8 @@ def immutableset( *, disable_order_check: bool = False, forbid_duplicate_elements: bool = False, + check_top_type_matches: Optional[Type[T]] = None, + order_key: Optional[Callable[[T], Any]] = None, ) -> "ImmutableSet[T]": """ Create an immutable set with the given contents. @@ -55,6 +55,12 @@ def immutableset( If *forbid_duplicate_elements* is ``True`` and one item occurs twice in *iterable*, then a ``ValueError`` will be thrown. + If *check_top_matches* is specified, each element added to this set will be checked to be an + instance of that type. + + If *order_key* is present, the order of the resulting set will be sorted by that key function + rather than in the usual insertion order. + If *iterable* is already an ``ImmutableSet``, *iterable* itself will be returned. """ # immutableset() should return an empty set @@ -102,10 +108,24 @@ def immutableset( iteration_order = [] containment_set: MutableSet[T] = set() - for value in iterable: - if value not in containment_set: - containment_set.add(value) - iteration_order.append(value) + if check_top_type_matches is None: + for value in iterable: + if value not in containment_set: + containment_set.add(value) + iteration_order.append(value) + else: + # TODO having this if/else may cause a performance hit? + for value in iterable: + # Optimization: Don't use use check_isinstance to cut down on method calls + if not isinstance(value, check_top_type_matches): + raise TypeError( + "Expected instance of type {!r} but got type {!r} for {!r}".format( + check_top_type_matches, type(value), value + ) + ) + if value not in containment_set: + containment_set.add(value) + iteration_order.append(value) if forbid_duplicate_elements and len(containment_set) != original_length: seen_once: Set[T] = set() @@ -125,6 +145,8 @@ def immutableset( if len(iteration_order) == 1: return _SingletonImmutableSet(iteration_order[0], None) else: + if order_key is not None: + iteration_order = sorted(iteration_order, key=order_key) return _FrozenSetBackedImmutableSet(containment_set, iteration_order, None) else: return _EMPTY @@ -150,7 +172,7 @@ class ImmutableSet( # pylint: disable=duplicate-bases Sequence[T], metaclass=ABCMeta, ): - __slots__ = () + __slots__ = ("_top_level_type",) """ A immutable set with deterministic iteration order. @@ -161,8 +183,8 @@ class ImmutableSet( # pylint: disable=duplicate-bases Optional top-level run-time type setting is supported (see of()). - ImmutableSets should be created via of() or builder(), not by directly instantiating one of - the sub-classes. + ImmutableSets should be created via the module-level constructor `immutableset()`, not by + directly instantiating one of the sub-classes. For runtime type checks that wish to include ImmutableSet as well as the types set and frozenset, use collections.abc.Set or @@ -170,6 +192,9 @@ class ImmutableSet( # pylint: disable=duplicate-bases typing.Set, as that matches the built-in mutable set type. """ + def __init__(self, top_level_type: Optional[Type]) -> None: + self._top_level_type = top_level_type + # note to implementers: if a new implementing class is created besides the frozen set one, # we need to change how the equals method works @@ -196,13 +221,10 @@ def of( _check_all_isinstance(seq, check_top_type_matches) return seq else: - return ( - ImmutableSet.builder( - check_top_type_matches=check_top_type_matches, - require_ordered_input=require_ordered_input, - ) - .add_all(seq) - .build() + return immutableset( + seq, + check_top_type_matches=check_top_type_matches, + disable_order_check=not require_ordered_input, ) @staticmethod @@ -246,11 +268,8 @@ def union( If check top level types is provided, all elements of both sets must match the specified type. """ - return ( - ImmutableSet.builder(check_top_type_matches) - .add_all(self) - .add_all(other) - .build() + return immutableset( + list(self) + list(other), check_top_type_matches=check_top_type_matches ) # we deliberately tighten the type bounds from our parent @@ -271,12 +290,8 @@ def intersection(self, other: Iterable[Any]) -> "ImmutableSet[T]": should have already been in this set, so you can type check this set itself if you are concerned. """ - return ( - ImmutableSet.builder( - check_top_type_matches=self._top_level_type # type: ignore - ) - .add_all(x for x in self if x in other) - .build() + return immutableset( + (x for x in self if x in other), check_top_type_matches=self._top_level_type ) def __and__(self, other: AbstractSet[Any]) -> "ImmutableSet[T]": @@ -343,224 +358,6 @@ def __str__(self): as_list = str(list(self)) return "{%s}" % as_list[1:-1] - @staticmethod - def builder( - check_top_type_matches: Optional[Type[T]] = None, - require_ordered_input: bool = False, - order_key: Callable[[T], Any] = None, - ) -> "ImmutableSet.Builder[T]": - """ - Gets an object which can build an ImmutableSet. - - If check_top_matches is specified, each element added to this set will be - checked to be an instance of that type. - - If require_ordered_input is True (default False), an exception will be thrown if a - non-sequence, non-ImmutableSet is used for add_all. This is recommended to help - encourage determinism. - - If order_key is present, the order of the resulting set will be sorted by - that key function rather than in the usual insertion order. - - You can check item containment before building the set by using "in" on the builder. - """ - # Optimization: We use two different implementations, one that checks types and one that - # does not. Profiling revealed that this is faster than a single implementation that - # conditionally checks types based on a member variable. Two separate classes are needed; - # if you use a single class that conditionally defines add and add_all upon construction, - # Python's method-lookup optimizations are defeated and you don't get any benefit. - if check_top_type_matches is not None: - return _TypeCheckingBuilder( - top_level_type=check_top_type_matches, - require_ordered_input=require_ordered_input, - order_key=order_key, - ) - else: - return _NoTypeCheckingBuilder( - require_ordered_input=require_ordered_input, order_key=order_key - ) - - class Builder(Generic[T2], Container[T2], metaclass=ABCMeta): - @abstractmethod - def add(self: SelfType, item: T2) -> SelfType: - raise NotImplementedError() - - @abstractmethod - def add_all(self: SelfType, items: Iterable[T2]) -> SelfType: - raise NotImplementedError() - - @abstractmethod - def __contains__(self, item): - raise NotImplementedError() - - @abstractmethod - def build(self) -> "ImmutableSet[T2]": - raise NotImplementedError() - - -# When modifying this class, make sure any relevant changes are also made to _NoTypeCheckingBuilder -class _TypeCheckingBuilder(ImmutableSet.Builder[T]): - def __init__( - self, - top_level_type: Optional[Type] = None, - require_ordered_input: bool = False, - order_key: Callable[[T], Any] = None, - ) -> None: - if not isinstance(top_level_type, (type, type(None))): - raise TypeError( - f"Expected instance of type {type:!r} or {type(None):!r} " - f"but got type {type(top_level_type):!r} for top_level_type instead" - ) - self._top_level_type = top_level_type - if not isinstance(require_ordered_input, bool): - raise TypeError( - f"Expected instance of type {bool:!r} " - "but got type {type(require_ordered_input):!r} for require_ordered_input instead" - ) - self._require_ordered_input = require_ordered_input - self._order_key = order_key - - self._set: AbstractSet[T] = set() - self._iteration_order: List[T] = list() - - def add(self: SelfType, item: T) -> SelfType: - # Any changes made to add should also be made to add_all - if item not in self._set: - # Optimization: Don't use use check_isinstance to cut down on method calls - if not isinstance(item, self._top_level_type): - raise TypeError( - "Expected instance of type {!r} but got type {!r} for {!r}".format( - self._top_level_type, type(item), item - ) - ) - self._set.add(item) - self._iteration_order.append(item) - return self - - def add_all(self: SelfType, items: Iterable[T]) -> SelfType: - if ( - self._require_ordered_input - and not (isinstance(items, Sequence) or isinstance(items, ImmutableSet)) - and not self._order_key - ): - raise ValueError( - "Builder has require_ordered_input on, but provided collection " - "is neither a sequence or another ImmutableSet. A common cause " - "of this is initializing an ImmutableSet from a set literal; " - "prefer to initialize from a list instead to help preserve " - "determinism." - ) - - # Optimization: These methods are looked up once outside the inner loop. Note that applying - # the same approach to the containment check does not improve performance, probably because - # the containment check syntax itself is already optimized. - add = self._set.add - append = self._iteration_order.append - # Optimization: Store self._top_level_type to avoid repeated lookups - top_level_type = self._top_level_type - for item in items: - # Optimization: to save method call overhead in an inner loop, we don't call add and - # instead do the same thing. We don't use check_isinstance for the same reason. - if item not in self._set: - if not isinstance(item, top_level_type): - raise TypeError( - "Expected instance of type {!r} but got type {!r} for {!r}".format( - top_level_type, type(item), item - ) - ) - add(item) - append(item) - - return self - - def __contains__(self, item): - return self._set.__contains__(item) - - def build(self) -> "ImmutableSet[T]": - if self._set: - if len(self._set) > 1: - if self._order_key: - self._iteration_order.sort(key=self._order_key) - return _FrozenSetBackedImmutableSet( - self._set, self._iteration_order, top_level_type=self._top_level_type - ) - else: - return _SingletonImmutableSet( - self._set.__iter__().__next__(), top_level_type=self._top_level_type - ) - else: - return _EMPTY - - -# When modifying this class, make sure any relevant changes are also made to _TypeCheckingBuilder -class _NoTypeCheckingBuilder(ImmutableSet.Builder[T]): - def __init__( - self, require_ordered_input: bool = False, order_key: Callable[[T], Any] = None - ) -> None: - if not isinstance(require_ordered_input, bool): - raise TypeError( - f"Expected instance of type {bool:!r} " - "but got type {type(require_ordered_input):!r} for require_ordered_input instead" - ) - self._require_ordered_input = require_ordered_input - self._order_key = order_key - - self._set: AbstractSet[T] = set() - self._iteration_order: List[T] = list() - - def add(self: SelfType, item: T) -> SelfType: - # Any changes made to add should also be made to add_all - if item not in self._set: - self._set.add(item) - self._iteration_order.append(item) - return self - - def add_all(self: SelfType, items: Iterable[T]) -> SelfType: - if ( - self._require_ordered_input - and not (isinstance(items, Sequence) or isinstance(items, ImmutableSet)) - and not self._order_key - ): - raise ValueError( - "Builder has require_ordered_input on, but provided collection " - "is neither a sequence or another ImmutableSet. A common cause " - "of this is initializing an ImmutableSet from a set literal; " - "prefer to initialize from a list instead to help preserve " - "determinism." - ) - - # Optimization: These methods are looked up once outside the inner loop. Note that applying - # the same approach to the containment check does not improve performance, probably because - # the containment check syntax itself is already optimized. - add = self._set.add - append = self._iteration_order.append - for item in items: - # Optimization: to save method call overhead in an inner loop, we don't call add and - # instead do the same thing. - if item not in self._set: - add(item) - append(item) - - return self - - def __contains__(self, item): - return self._set.__contains__(item) - - def build(self) -> "ImmutableSet[T]": - if self._set: - if len(self._set) > 1: - if self._order_key: - self._iteration_order.sort(key=self._order_key) - return _FrozenSetBackedImmutableSet( - self._set, self._iteration_order, top_level_type=None - ) - else: - return _SingletonImmutableSet( - self._set.__iter__().__next__(), top_level_type=None - ) - else: - return _EMPTY - class _FrozenSetBackedImmutableSet(ImmutableSet[T]): """ @@ -570,7 +367,7 @@ class _FrozenSetBackedImmutableSet(ImmutableSet[T]): be directly instantiated by users or the ImmutableSet contract may fail to be satisfied! """ - __slots__ = "_set", "_iteration_order", "_top_level_type" + __slots__ = "_set", "_iteration_order" # pylint:disable=assigning-non-slot def __init__( @@ -579,9 +376,9 @@ def __init__( iteration_order: Sequence[T], top_level_type: Optional[Type], ) -> None: + super().__init__(top_level_type) self._set: FrozenSet[T] = frozenset(init_set) self._iteration_order = tuple(iteration_order) - self._top_level_type = top_level_type def __iter__(self) -> Iterator[T]: return self._iteration_order.__iter__() @@ -589,7 +386,7 @@ def __iter__(self) -> Iterator[T]: def __len__(self) -> int: return self._set.__len__() - def __contains__(self, item) -> bool: + def __contains__(self, item: Any) -> bool: return self._set.__contains__(item) @overload @@ -623,12 +420,12 @@ def __reduce__(self): class _SingletonImmutableSet(ImmutableSet[T]): - __slots__ = "_single_value", "_top_level_type" + __slots__ = ("_single_value",) # pylint:disable=assigning-non-slot def __init__(self, single_value: T, top_level_type: Optional[Type]) -> None: + super().__init__(top_level_type) self._single_value = single_value - self._top_level_type = top_level_type def __iter__(self) -> Iterator[T]: return iter((self._single_value,)) @@ -636,7 +433,7 @@ def __iter__(self) -> Iterator[T]: def __len__(self) -> int: return 1 - def __contains__(self, item) -> bool: + def __contains__(self, item: Any) -> bool: return self._single_value == item @overload diff --git a/immutablecollections/immutablecollection.py b/immutablecollections/immutablecollection.py index c4d231b..307f1ef 100644 --- a/immutablecollections/immutablecollection.py +++ b/immutablecollections/immutablecollection.py @@ -25,8 +25,3 @@ def of(seq): @abstractmethod def empty(): raise NotImplementedError() - - @staticmethod - @abstractmethod - def builder(): - raise NotImplementedError() diff --git a/tests/test_immutableset.py b/tests/test_immutableset.py index 9b55d01..96158da 100644 --- a/tests/test_immutableset.py +++ b/tests/test_immutableset.py @@ -17,18 +17,13 @@ def test_empty(self): empty2 = immutableset([]) self.assertEqual(0, len(empty2)) self.assertEqual(empty, empty2) - empty3 = ImmutableSet.builder().build() - self.assertEqual(0, len(empty3)) - self.assertEqual(empty, empty3) def test_empty_singleton(self): empty1 = immutableset() empty2 = immutableset() self.assertIs(empty1, empty2) - empty3 = ImmutableSet.builder().build() + empty3 = immutableset([]) self.assertIs(empty1, empty3) - empty4 = immutableset([]) - self.assertIs(empty1, empty4) def test_basic(self): source = (1, 2, 3) @@ -150,12 +145,7 @@ def test_order_irrelevant_for_equals_hash(self): def test_ordering(self): self.assertEqual( - ("a", "b", "c"), - tuple( - ImmutableSet.builder(order_key=lambda x: x) - .add_all(["b", "c", "a"]) - .build() - ), + ("a", "b", "c"), tuple(immutableset(["b", "c", "a"], order_key=lambda x: x)) ) # pylint: disable=blacklisted-name