From e37d69505909a89cf2c12320cdb1b0b046226d1c Mon Sep 17 00:00:00 2001 From: Joao Amaral <7281460+joaopamaral@users.noreply.github.com> Date: Thu, 15 Feb 2024 19:41:47 -0300 Subject: [PATCH] fix: Force flattened record according to provided flattened schema (#2243) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Force flattened record according to provided flattened schema * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fix import * fix * revert collections change * ruff linter fixes * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * ruff linter fixes * fix * fix * add test * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * removing None option for flatten schema and max level * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * revert typing * fix check * Add a short comment --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Edgar Ramírez Mondragón <16805946+edgarrmondragon@users.noreply.github.com> Co-authored-by: Edgar Ramírez-Mondragón --- singer_sdk/helpers/_flattening.py | 9 +++- tests/core/test_flattening.py | 74 +++++++++++++++++++++++++++++++ 2 files changed, 82 insertions(+), 1 deletion(-) create mode 100644 tests/core/test_flattening.py diff --git a/singer_sdk/helpers/_flattening.py b/singer_sdk/helpers/_flattening.py index 77e3935b9..2a3e194d0 100644 --- a/singer_sdk/helpers/_flattening.py +++ b/singer_sdk/helpers/_flattening.py @@ -415,7 +415,14 @@ def _flatten_record( items: list[tuple[str, t.Any]] = [] for k, v in record_node.items(): new_key = flatten_key(k, parent_key, separator) - if isinstance(v, collections.abc.MutableMapping) and level < max_level: + # If the value is a dictionary, and the key is not in the schema, and the + # level is less than the max level, then we should continue to flatten. + if ( + isinstance(v, collections.abc.MutableMapping) + and flattened_schema + and new_key not in flattened_schema.get("properties", {}) + and (level < max_level) + ): items.extend( _flatten_record( v, diff --git a/tests/core/test_flattening.py b/tests/core/test_flattening.py new file mode 100644 index 000000000..73169eab3 --- /dev/null +++ b/tests/core/test_flattening.py @@ -0,0 +1,74 @@ +from __future__ import annotations + +import pytest + +from singer_sdk.helpers._flattening import flatten_record + + +@pytest.mark.parametrize( + "flattened_schema, max_level, expected", + [ + pytest.param( + { + "properties": { + "key_1": {"type": ["null", "integer"]}, + "key_2__key_3": {"type": ["null", "string"]}, + "key_2__key_4": {"type": ["null", "object"]}, + } + }, + 99, + { + "key_1": 1, + "key_2__key_3": "value", + "key_2__key_4": '{"key_5": 1, "key_6": ["a", "b"]}', + }, + id="flattened schema limiting the max level", + ), + pytest.param( + { + "properties": { + "key_1": {"type": ["null", "integer"]}, + "key_2__key_3": {"type": ["null", "string"]}, + "key_2__key_4__key_5": {"type": ["null", "integer"]}, + "key_2__key_4__key_6": {"type": ["null", "array"]}, + } + }, + 99, + { + "key_1": 1, + "key_2__key_3": "value", + "key_2__key_4__key_5": 1, + "key_2__key_4__key_6": '["a", "b"]', + }, + id="flattened schema not limiting the max level", + ), + pytest.param( + { + "properties": { + "key_1": {"type": ["null", "integer"]}, + "key_2__key_3": {"type": ["null", "string"]}, + "key_2__key_4__key_5": {"type": ["null", "integer"]}, + "key_2__key_4__key_6": {"type": ["null", "array"]}, + } + }, + 1, + { + "key_1": 1, + "key_2__key_3": "value", + "key_2__key_4": '{"key_5": 1, "key_6": ["a", "b"]}', + }, + id="max level limiting flattened schema", + ), + ], +) +def test_flatten_record(flattened_schema, max_level, expected): + """Test flatten_record to obey the max_level and flattened_schema parameters.""" + record = { + "key_1": 1, + "key_2": {"key_3": "value", "key_4": {"key_5": 1, "key_6": ["a", "b"]}}, + } + + result = flatten_record( + record, max_level=max_level, flattened_schema=flattened_schema + ) + assert expected == result