From 07a63c2c17b146b52b7244d2d707d2df9012598d Mon Sep 17 00:00:00 2001 From: mjsqu Date: Tue, 6 Jun 2023 11:34:22 +1200 Subject: [PATCH] refactor(taps): Change `SQLStream.schema` into a cached property (#1745) * Change schema into a cached_property Schema is called at least once per record and does not change within a stream, so it should be cached for performance benefits. * Fix missing sys import * Changes for 3.7 compatibility ``` @property @lru_cache() ``` is a backwards compatible version of functools.cached_property for Python 3.7 https://stackoverflow.com/questions/4037481/caching-class-attributes-in-python * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Add parentheses to lru_cache for py 3.7 * Fix schema return type mypy checks fail because the schema return type is lru_cache: ``` singer_sdk/streams/sql.py:82: error: Signature of "schema" incompatible with supertype "Stream" [override] singer_sdk/streams/sql.py:154: error: Argument "schema" to "get_selected_schema" has incompatible type "_lru_cache_wrapper[Dict[Any, Any]]"; expected "Dict[Any, Any]" [arg-type] Found 2 errors in 1 file (checked 59 source files) ``` The update should pass over the `_lru_cache_wrapper` type and return a `Dict` as expected * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Fix for mypy - Remove `lru_cache` - Set `_cached_schema` up as a class variable to hold the schema for each stream - Check for presence of `self._cached_schema` on the stream * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Re-add removed TYPE_CHECKING * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Edgar R. M --- singer_sdk/streams/sql.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/singer_sdk/streams/sql.py b/singer_sdk/streams/sql.py index 3d3de5bbe..7a3fe3886 100644 --- a/singer_sdk/streams/sql.py +++ b/singer_sdk/streams/sql.py @@ -20,6 +20,7 @@ class SQLStream(Stream, metaclass=abc.ABCMeta): """Base class for SQLAlchemy-based streams.""" connector_class = SQLConnector + _cached_schema: dict | None = None def __init__( self, @@ -74,7 +75,7 @@ def metadata(self) -> MetadataMapping: """ return self._singer_catalog_entry.metadata - @property + @property # TODO: Investigate @cached_property after py > 3.7 def schema(self) -> dict: """Return metadata object (dict) as specified in the Singer spec. @@ -83,7 +84,13 @@ def schema(self) -> dict: Returns: The schema object. """ - return t.cast(dict, self._singer_catalog_entry.schema.to_dict()) + if not self._cached_schema: + self._cached_schema = t.cast( + dict, + self._singer_catalog_entry.schema.to_dict(), + ) + + return self._cached_schema @property def tap_stream_id(self) -> str: