Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor for performance #1817

Closed
wants to merge 8 commits into from
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,9 @@ pint/testsuite/dask-worker-space
# WebDAV file system cache files
.DAV/

# pytest benchmarks folder
.benchmarks

# tags files (from ctags)
tags

Expand Down
163 changes: 163 additions & 0 deletions pint/cache.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
"""functools.py - Tools for working with functions and callable objects
"""
# Python module wrapper for _functools C module
# to allow utilities written in Python to be added
# to the functools module.
# Written by Nick Coghlan <ncoghlan at gmail.com>,
# Raymond Hettinger <python at rcn.com>,
# and Łukasz Langa <lukasz at langa.pl>.
# Copyright (C) 2006-2013 Python Software Foundation.
# See C source code for _functools credits/copyright

from __future__ import annotations

__all__ = [
"cache",
"lru_cache",
]
from weakref import WeakKeyDictionary

from functools import update_wrapper

from typing import Any, Callable, Protocol, TYPE_CHECKING, TypeVar

T = TypeVar("T")

if TYPE_CHECKING:
from . import UnitRegistry


################################################################################
### LRU Cache function decorator
################################################################################


class Hashable(Protocol):
def __hash__(self) -> int:
...


class _HashedSeq(list[Any]):
"""This class guarantees that hash() will be called no more than once
per element. This is important because the lru_cache() will hash
the key multiple times on a cache miss.

"""

__slots__ = "hashvalue"

def __init__(self, tup: tuple[Any, ...], hashfun: Callable[[Any], int] = hash):
self[:] = tup
self.hashvalue = hashfun(tup)

def __hash__(self) -> int:
return self.hashvalue


def _make_key(
args: tuple[Any, ...],
kwds: dict[str, Any],
kwd_mark: tuple[Any, ...] = (object(),),
fasttypes: set[type] = {int, str},
tuple: type = tuple,
type: type = type,
len: Callable[[Any], int] = len,
) -> Hashable:
"""Make a cache key from optionally typed positional and keyword arguments

The key is constructed in a way that is flat as possible rather than
as a nested structure that would take more memory.

If there is only a single argument and its data type is known to cache
its hash value, then that argument is returned without a wrapper. This
saves space and improves lookup speed.

"""
# All of code below relies on kwds preserving the order input by the user.
# Formerly, we sorted() the kwds before looping. The new way is *much*
# faster; however, it means that f(x=1, y=2) will now be treated as a
# distinct call from f(y=2, x=1) which will be cached separately.
key = args
if kwds:
key += kwd_mark
for item in kwds.items():
key += item
if len(key) == 1 and type(key[0]) in fasttypes:
return key[0]
return _HashedSeq(key)


def lru_cache():
"""Least-recently-used cache decorator.

If *maxsize* is set to None, the LRU features are disabled and the cache
can grow without bound.

If *typed* is True, arguments of different types will be cached separately.
For example, f(decimal.Decimal("3.0")) and f(3.0) will be treated as
distinct calls with distinct results. Some types such as str and int may
be cached separately even when typed is false.

Arguments to the cached function must be hashable.

View the cache statistics named tuple (hits, misses, maxsize, currsize)
with f.cache_info(). Clear the cache and statistics with f.cache_clear().
Access the underlying function with f.__wrapped__.

See: https://en.wikipedia.org/wiki/Cache_replacement_policies#Least_recently_used_(LRU)

"""

# Users should only access the lru_cache through its public API:
# cache_info, cache_clear, and f.__wrapped__
# The internals of the lru_cache are encapsulated for thread safety and
# to allow the implementation to change (including a possible C version).

def decorating_function(user_function: Callable[..., T]) -> Callable[..., T]:
wrapper = _lru_cache_wrapper(user_function)
return update_wrapper(wrapper, user_function)

return decorating_function


def _lru_cache_wrapper(user_function: Callable[..., T]) -> Callable[..., T]:
# Constants shared by all lru cache instances:
sentinel = object() # unique object used to signal cache misses
make_key = _make_key # build a key from the function arguments

cache: WeakKeyDictionary[object, dict[Any, T]] = WeakKeyDictionary()

def wrapper(self: UnitRegistry, *args: Any, **kwds: Any) -> T:
# Simple caching without ordering or size limit

key = make_key(args, kwds)

subcache = cache.get(self, None)
if subcache is None:
cache[self] = subcache = {}

result = subcache.get(key, sentinel)

if result is not sentinel:
return result

subcache[key] = result = user_function(self, *args, **kwds)
return result

def cache_clear(self: UnitRegistry):
"""Clear the cache and cache statistics"""
if self in cache:
cache[self].clear()

wrapper.cache_clear = cache_clear
return wrapper


################################################################################
### cache -- simplified access to the infinity cache
################################################################################


def cache(user_function: Callable[..., Any], /):
'Simple lightweight unbounded cache. Sometimes called "memoize".'
return lru_cache()(user_function)
24 changes: 24 additions & 0 deletions pint/compat.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,14 @@
import math
import tokenize
from decimal import Decimal
import functools
from importlib import import_module
from io import BytesIO
from numbers import Number
from collections.abc import Mapping
from typing import Any, NoReturn, Callable, Optional, Union
from collections.abc import Generator, Iterable
import warnings


if sys.version_info >= (3, 10):
Expand Down Expand Up @@ -362,3 +364,25 @@ def zero_or_nan(obj: Any, check_all: bool) -> Union[bool, Iterable[bool]]:
if check_all and is_duck_array_type(type(out)):
return out.all()
return out


def deprecated(msg: str):
def _inner(func: Callable[..., Any]):
"""This is a decorator which can be used to mark functions
as deprecated. It will result in a warning being emitted
when the function is used."""

@functools.wraps(func)
def _new_func(*args: Any, **kwargs: Any):
warnings.simplefilter("always", DeprecationWarning) # turn off filter
warnings.warn(
f"Call to deprecated function {func.__name__}.\n{msg}",
category=DeprecationWarning,
stacklevel=2,
)
warnings.simplefilter("default", DeprecationWarning) # reset filter
return func(*args, **kwargs)

return _new_func

return _inner
15 changes: 11 additions & 4 deletions pint/facets/context/registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,13 @@
from ..._typing import F, Magnitude
from ...errors import UndefinedUnitError
from ...util import find_connected_nodes, find_shortest_path, logger, UnitsContainer
from ..plain import GenericPlainRegistry, UnitDefinition, QuantityT, UnitT
from ..plain import (
GenericPlainRegistry,
UnitDefinition,
QuantityT,
UnitT,
RegistryCache,
)
from .definitions import ContextDefinition
from . import objects

Expand All @@ -30,7 +36,7 @@ class ContextCacheOverlay:
active contexts which contain unit redefinitions.
"""

def __init__(self, registry_cache) -> None:
def __init__(self, registry_cache: RegistryCache) -> None:
self.dimensional_equivalents = registry_cache.dimensional_equivalents
self.root_units = {}
self.dimensionality = registry_cache.dimensionality
Expand Down Expand Up @@ -361,7 +367,8 @@ def _convert(
value: Magnitude,
src: UnitsContainer,
dst: UnitsContainer,
inplace: bool = False,
inplace: bool,
check_dimensionality: bool,
) -> Magnitude:
"""Convert value from some source to destination units.

Expand Down Expand Up @@ -400,7 +407,7 @@ def _convert(

value, src = src._magnitude, src._units

return super()._convert(value, src, dst, inplace)
return super()._convert(value, src, dst, inplace, check_dimensionality)

def _get_compatible_units(
self, input_units: UnitsContainer, group_or_system: Optional[str] = None
Expand Down
9 changes: 7 additions & 2 deletions pint/facets/nonmultiplicative/registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,12 @@ def _add_ref_of_log_or_offset_unit(
return all_units

def _convert(
self, value: T, src: UnitsContainer, dst: UnitsContainer, inplace: bool = False
self,
value: T,
src: UnitsContainer,
dst: UnitsContainer,
inplace: bool,
check_dimensionality: bool,
) -> T:
"""Convert value from some source to destination units.

Expand Down Expand Up @@ -251,7 +256,7 @@ def _convert(
)

if not (src_offset_unit or dst_offset_unit):
return super()._convert(value, src, dst, inplace)
return super()._convert(value, src, dst, inplace, check_dimensionality)

src_dim = self._get_dimensionality(src)
dst_dim = self._get_dimensionality(dst)
Expand Down
9 changes: 8 additions & 1 deletion pint/facets/plain/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,21 @@
UnitDefinition,
)
from .objects import PlainQuantity, PlainUnit
from .registry import PlainRegistry, GenericPlainRegistry, QuantityT, UnitT
from .registry import (
PlainRegistry,
GenericPlainRegistry,
QuantityT,
UnitT,
RegistryCache,
)
from .quantity import MagnitudeT

__all__ = [
"GenericPlainRegistry",
"PlainUnit",
"PlainQuantity",
"PlainRegistry",
"RegistryCache",
"AliasDefinition",
"DefaultsDefinition",
"DimensionDefinition",
Expand Down
Loading