dlt-hub · rudolfix · Nov 7, 2024 · Nov 5, 2024 · Nov 5, 2024 · Nov 6, 2024
diff --git a/dlt/common/normalizers/json/relational.py b/dlt/common/normalizers/json/relational.py
@@ -96,7 +96,7 @@ def _reset(self) -> None:
         # self.primary_keys = Dict[str, ]
 
     def _flatten(
-        self, table: str, dict_row: DictStrAny, _r_lvl: int
+        self, table: str, dict_row: DictStrAny, parent_path: Tuple[str, ...], _r_lvl: int
     ) -> Tuple[DictStrAny, Dict[Tuple[str, ...], Sequence[Any]]]:
         out_rec_row: DictStrAny = {}
         out_rec_list: Dict[Tuple[str, ...], Sequence[Any]] = {}
@@ -117,7 +117,7 @@ def norm_row_dicts(dict_row: StrAny, __r_lvl: int, path: Tuple[str, ...] = ()) -
                 # for lists and dicts we must check if type is possibly nested
                 if isinstance(v, (dict, list)):
                     if not self._is_nested_type(
-                        self.schema, table, nested_name, self.max_nesting, __r_lvl
+                        self.schema, table, nested_name, self.max_nesting, parent_path, __r_lvl
                     ):
                         # TODO: if schema contains table {table}__{nested_name} then convert v into single element list
                         if isinstance(v, dict):
@@ -268,7 +268,7 @@ def _normalize_row(
         schema = self.schema
         table = schema.naming.shorten_fragments(*parent_path, *ident_path)
         # flatten current row and extract all lists to recur into
-        flattened_row, lists = self._flatten(table, dict_row, _r_lvl)
+        flattened_row, lists = self._flatten(table, dict_row, parent_path, _r_lvl)
         # always extend row
         DataItemNormalizer._extend_row(extend, flattened_row)
         # infer record hash or leave existing primary key if present
@@ -423,10 +423,25 @@ def _normalize_prop(
         )
 
     @staticmethod
-    def _get_table_nesting_level(schema: Schema, table_name: str) -> Optional[int]:
+    def _get_table_nesting_level(
+        schema: Schema, table_name: str, parent_path: Tuple[str, ...]
+    ) -> Optional[int]:
+        """gets table nesting level, will inherit from parent if not set"""
+
+        # try go get table directly
         table = schema.tables.get(table_name)
+        max_nesting = None
+
+        if table and (max_nesting := cast(int, table.get("x-normalizer", {}).get("max_nesting"))):
+            return max_nesting
+
+        # if table is not found, try to get it from root path
+        if max_nesting is None and parent_path:
+            table = schema.tables.get(parent_path[0])
+
         if table:
-            return table.get("x-normalizer", {}).get("max_nesting")  # type: ignore
+            return cast(int, table.get("x-normalizer", {}).get("max_nesting"))
+
         return None
 
     @staticmethod
@@ -440,13 +455,20 @@ def _get_primary_key(schema: Schema, table_name: str) -> List[str]:
     @staticmethod
     @lru_cache(maxsize=None)
     def _is_nested_type(
-        schema: Schema, table_name: str, field_name: str, max_nesting: int, _r_lvl: int
+        schema: Schema,
+        table_name: str,
+        field_name: str,
+        max_nesting: int,
+        parent_path: Tuple[str, ...],
+        _r_lvl: int,
     ) -> bool:
         """For those paths the nested objects should be left in place.
         Cache perf: max_nesting < _r_lvl: ~2x faster, full check 10x faster
         """
         # turn everything at the recursion level into nested type
-        max_table_nesting = DataItemNormalizer._get_table_nesting_level(schema, table_name)
+        max_table_nesting = DataItemNormalizer._get_table_nesting_level(
+            schema, table_name, parent_path
+        )
         if max_table_nesting is not None:
             max_nesting = max_table_nesting
 

diff --git a/tests/common/normalizers/test_json_relational.py b/tests/common/normalizers/test_json_relational.py
@@ -29,7 +29,7 @@ def test_flatten_fix_field_name(norm: RelationalNormalizer) -> None:
         "f 2": [],
         "f!3": {"f4": "a", "f-5": "b", "f*6": {"c": 7, "c v": 8, "c x": []}},
     }
-    flattened_row, lists = norm._flatten("mock_table", row, 0)
+    flattened_row, lists = norm._flatten("mock_table", row, (), 0)
     assert "f_1" in flattened_row
     # assert "f_2" in flattened_row
     assert "f_3__f4" in flattened_row
@@ -62,11 +62,11 @@ def test_preserve_json_value(norm: RelationalNormalizer) -> None:
         )
     )
     row_1 = {"value": 1}
-    flattened_row, _ = norm._flatten("with_json", row_1, 0)
+    flattened_row, _ = norm._flatten("with_json", row_1, (), 0)
     assert flattened_row["value"] == 1
 
     row_2 = {"value": {"json": True}}
-    flattened_row, _ = norm._flatten("with_json", row_2, 0)
+    flattened_row, _ = norm._flatten("with_json", row_2, (), 0)
     assert flattened_row["value"] == row_2["value"]
     # json value is not flattened
     assert "value__json" not in flattened_row
@@ -78,11 +78,11 @@ def test_preserve_json_value_with_hint(norm: RelationalNormalizer) -> None:
     norm.schema._compile_settings()
 
     row_1 = {"value": 1}
-    flattened_row, _ = norm._flatten("any_table", row_1, 0)
+    flattened_row, _ = norm._flatten("any_table", row_1, (), 0)
     assert flattened_row["value"] == 1
 
     row_2 = {"value": {"json": True}}
-    flattened_row, _ = norm._flatten("any_table", row_2, 0)
+    flattened_row, _ = norm._flatten("any_table", row_2, (), 0)
     assert flattened_row["value"] == row_2["value"]
     # json value is not flattened
     assert "value__json" not in flattened_row
@@ -884,7 +884,7 @@ def test_caching_perf(norm: RelationalNormalizer) -> None:
     table["x-normalizer"] = {}
     start = time()
     for _ in range(100000):
-        norm._is_nested_type(norm.schema, "test", "field", 0, 0)
+        norm._is_nested_type(norm.schema, "test", "field", 0, (), 0)
         # norm._get_table_nesting_level(norm.schema, "test")
     print(f"{time() - start}")