From 8f7642b91078acbe69a3745f35e9b546516cd337 Mon Sep 17 00:00:00 2001 From: skrydal Date: Tue, 20 Aug 2024 13:24:16 +0200 Subject: [PATCH] fix(ingestion/tableau): Tableau field type parsing (#11202) --- .../ingestion/source/tableau_common.py | 7 ++++- .../tests/unit/test_tableau_source.py | 31 ++++++++++++++++++- 2 files changed, 36 insertions(+), 2 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/tableau_common.py b/metadata-ingestion/src/datahub/ingestion/source/tableau_common.py index 93f2a0ef2f6a8..f3a9c4a5aa201 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/tableau_common.py +++ b/metadata-ingestion/src/datahub/ingestion/source/tableau_common.py @@ -502,7 +502,12 @@ def get_tags_from_params(params: List[str] = []) -> GlobalTagsClass: def tableau_field_to_schema_field(field, ingest_tags): - nativeDataType = field.get("dataType", "UNKNOWN") + # The check here makes sure that even if 'dataType' key exists in the 'field' dictionary but has value None, + # it will be set as "UNKNOWN" (nativeDataType field can not be None in the SchemaField). + # Hence, field.get("dataType", "UNKNOWN") is not enough + nativeDataType = field.get("dataType") + if nativeDataType is None: + nativeDataType = "UNKNOWN" TypeClass = FIELD_TYPE_MAPPING.get(nativeDataType, NullTypeClass) schema_field = SchemaField( diff --git a/metadata-ingestion/tests/unit/test_tableau_source.py b/metadata-ingestion/tests/unit/test_tableau_source.py index f5410b161ed70..1cd0557d085f1 100644 --- a/metadata-ingestion/tests/unit/test_tableau_source.py +++ b/metadata-ingestion/tests/unit/test_tableau_source.py @@ -1,8 +1,37 @@ +from typing import Any, Dict + import pytest import datahub.ingestion.source.tableau_constant as c from datahub.ingestion.source.tableau import TableauSiteSource -from datahub.ingestion.source.tableau_common import get_filter_pages, make_filter +from datahub.ingestion.source.tableau_common import ( + get_filter_pages, + make_filter, + tableau_field_to_schema_field, +) +from datahub.metadata.com.linkedin.pegasus2avro.schema import SchemaField + + +def test_tablea_source_handles_none_nativedatatype(): + field: Dict[str, Any] = { + "__typename": "CalculatedField", + "id": "abcd", + "name": "Test Field", + "description": None, + "isHidden": False, + "folderName": None, + "upstreamFields": [], + "upstreamColumns": [], + "role": None, + "dataType": None, + "defaultFormat": "s", + "aggregation": None, + "formula": "a/b + d", + } + schema_field: SchemaField = tableau_field_to_schema_field( + field=field, ingest_tags=False + ) + assert schema_field.nativeDataType == "UNKNOWN" def test_tableau_source_unescapes_lt():