From 8adb0510c7314c090a57d772c445603a099016ba Mon Sep 17 00:00:00 2001 From: shubhamjagtap639 Date: Thu, 15 Feb 2024 00:54:26 +0530 Subject: [PATCH 1/7] Add sibling support in trino --- .../src/datahub/ingestion/source/sql/trino.py | 127 +++++++++++++++++- 1 file changed, 125 insertions(+), 2 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/trino.py b/metadata-ingestion/src/datahub/ingestion/source/sql/trino.py index cb2e05765bfff..ed2d297375d7c 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/trino.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/trino.py @@ -1,7 +1,9 @@ +import functools import json import uuid +from dataclasses import dataclass from textwrap import dedent -from typing import Any, Dict, List, Optional +from typing import Any, Dict, Iterable, List, Optional, Union import sqlalchemy import trino @@ -16,6 +18,12 @@ from trino.sqlalchemy import datatype from trino.sqlalchemy.dialect import TrinoDialect +from datahub.configuration.source_common import ( + EnvConfigMixin, + PlatformInstanceConfigMixin, +) +from datahub.emitter.mce_builder import make_dataset_urn_with_platform_instance +from datahub.emitter.mcp import MetadataChangeProposalWrapper from datahub.ingestion.api.common import PipelineContext from datahub.ingestion.api.decorators import ( SourceCapability, @@ -25,12 +33,18 @@ platform_name, support_status, ) +from datahub.ingestion.api.workunit import MetadataWorkUnit from datahub.ingestion.extractor import schema_util from datahub.ingestion.source.sql.sql_common import ( SQLAlchemySource, + SqlWorkUnit, register_custom_type, ) -from datahub.ingestion.source.sql.sql_config import BasicSQLAlchemyConfig +from datahub.ingestion.source.sql.sql_config import ( + BasicSQLAlchemyConfig, + SQLCommonConfig, +) +from datahub.metadata.com.linkedin.pegasus2avro.common import Siblings from datahub.metadata.com.linkedin.pegasus2avro.schema import ( MapTypeClass, NumberTypeClass, @@ -42,6 +56,21 @@ register_custom_type(datatype.MAP, MapTypeClass) register_custom_type(datatype.DOUBLE, NumberTypeClass) + +@dataclass +class PlatformDetail: + platform_name: str + is_three_tier: bool + + +KNOWN_CONNECTOR_PLATFORM_MAPPING = { + "hive": PlatformDetail("hive", False), + "postgresql": PlatformDetail("postgres", True), + "mysql": PlatformDetail("mysql", False), + "redshift": PlatformDetail("redshift", True), + "bigquery": PlatformDetail("bigquery", True), +} + # Type JSON was introduced in trino sqlalchemy dialect in version 0.317.0 if version.parse(trino.__version__) >= version.parse("0.317.0"): register_custom_type(datatype.JSON, RecordTypeClass) @@ -131,10 +160,42 @@ def _get_columns(self, connection, table_name, schema: str = None, **kw): # typ TrinoDialect._get_columns = _get_columns +@functools.lru_cache() +def get_catalog_connector_name( + catalog_name: str, inspector: Inspector +) -> Optional[str]: + if inspector.engine: + query = dedent( + """ + SELECT * + FROM "system"."metadata"."catalogs" + """ + ).strip() + res = inspector.engine.execute(sql.text(query)) + catalog_connector_dict = {row.catalog_name: row.connector_name for row in res} + return catalog_connector_dict.get(catalog_name) + return None + + +class ConnectorDetail(PlatformInstanceConfigMixin, EnvConfigMixin): + connector_database: Optional[str] = Field(default=None, description="") + + class TrinoConfig(BasicSQLAlchemyConfig): # defaults scheme: str = Field(default="trino", description="", hidden_from_docs=True) + catalog_to_connector_details: Dict[str, ConnectorDetail] = Field( + default={}, + description="A mapping of trino catalog to its connector details like connector database, platform instance." + "This configuration is used to ingest siblings of datasets. Use catalog name as key." + "For three tier connectors like postgresql, connector database is required.", + ) + + ingest_siblings: bool = Field( + default=True, description="Whether siblings of datasets should be ingested" + ) + def get_identifier(self: BasicSQLAlchemyConfig, schema: str, table: str) -> str: identifier = f"{schema}.{table}" if self.database: # TODO: this should be required field @@ -175,6 +236,68 @@ def get_db_name(self, inspector: Inspector) -> str: else: return super().get_db_name(inspector) + def _get_sibling_urn( + self, + dataset_name: str, + inspector: Inspector, + schema: str, + table: str, + ) -> Optional[str]: + catalog_name = dataset_name.split(".")[0] + connector_platform_details: Optional[PlatformDetail] = None + + connector_name = get_catalog_connector_name(catalog_name, inspector) + if connector_name: + connector_platform_details = KNOWN_CONNECTOR_PLATFORM_MAPPING.get( + connector_name + ) + + connector_details = self.config.catalog_to_connector_details.get( + catalog_name, ConnectorDetail() + ) + + if connector_platform_details: + if not connector_platform_details.is_three_tier: # connector is two tier + return make_dataset_urn_with_platform_instance( + platform=connector_platform_details.platform_name, + name=f"{schema}.{table}", + platform_instance=connector_details.platform_instance, + env=connector_details.env, + ) + elif connector_details.connector_database: # connector is three tier + return make_dataset_urn_with_platform_instance( + platform=connector_platform_details.platform_name, + name=f"{connector_details.connector_database}.{schema}.{table}", + platform_instance=connector_details.platform_instance, + env=connector_details.env, + ) + + return None + + def _process_table( + self, + dataset_name: str, + inspector: Inspector, + schema: str, + table: str, + sql_config: SQLCommonConfig, + ) -> Iterable[Union[SqlWorkUnit, MetadataWorkUnit]]: + yield from super()._process_table( + dataset_name, inspector, schema, table, sql_config + ) + dataset_urn = make_dataset_urn_with_platform_instance( + self.platform, + dataset_name, + self.config.platform_instance, + self.config.env, + ) + sibling_urn = self._get_sibling_urn(dataset_name, inspector, schema, table) + if self.config.ingest_siblings and sibling_urn: + yield MetadataChangeProposalWrapper( + entityUrn=dataset_urn, + aspect=Siblings(primary=False, siblings=[sibling_urn]), + ).as_workunit() + @classmethod def create(cls, config_dict, ctx): config = TrinoConfig.parse_obj(config_dict) From 97bc765d78292e7bf4ed4c43c496cc2bae287d34 Mon Sep 17 00:00:00 2001 From: shubhamjagtap639 Date: Thu, 15 Feb 2024 13:26:57 +0530 Subject: [PATCH 2/7] Modify trino test cases --- .../src/datahub/ingestion/source/sql/trino.py | 51 +- .../tests/integration/trino/test_trino.py | 26 +- .../trino/trino_hive_mces_golden.json | 451 +++++++++++++++++- .../integration/trino/trino_mces_golden.json | 428 +++++++++++++---- 4 files changed, 840 insertions(+), 116 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/trino.py b/metadata-ingestion/src/datahub/ingestion/source/sql/trino.py index ed2d297375d7c..7dda5a4e1da1b 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/trino.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/trino.py @@ -200,11 +200,7 @@ def get_identifier(self: BasicSQLAlchemyConfig, schema: str, table: str) -> str: identifier = f"{schema}.{table}" if self.database: # TODO: this should be required field identifier = f"{self.database}.{identifier}" - return ( - f"{self.platform_instance}.{identifier}" - if self.platform_instance - else identifier - ) + return identifier @platform_name("Trino", doc_order=1) @@ -274,17 +270,13 @@ def _get_sibling_urn( return None - def _process_table( + def get_sibling_workunit( self, dataset_name: str, inspector: Inspector, schema: str, table: str, - sql_config: SQLCommonConfig, - ) -> Iterable[Union[SqlWorkUnit, MetadataWorkUnit]]: - yield from super()._process_table( - dataset_name, inspector, schema, table, sql_config - ) + ) -> Optional[MetadataWorkUnit]: dataset_urn = make_dataset_urn_with_platform_instance( self.platform, dataset_name, @@ -293,10 +285,45 @@ def _process_table( ) sibling_urn = self._get_sibling_urn(dataset_name, inspector, schema, table) if self.config.ingest_siblings and sibling_urn: - yield MetadataChangeProposalWrapper( + return MetadataChangeProposalWrapper( entityUrn=dataset_urn, aspect=Siblings(primary=False, siblings=[sibling_urn]), ).as_workunit() + return None + + def _process_table( + self, + dataset_name: str, + inspector: Inspector, + schema: str, + table: str, + sql_config: SQLCommonConfig, + ) -> Iterable[Union[SqlWorkUnit, MetadataWorkUnit]]: + yield from super()._process_table( + dataset_name, inspector, schema, table, sql_config + ) + sibling_workunit = self.get_sibling_workunit( + dataset_name, inspector, schema, table + ) + if sibling_workunit: + yield sibling_workunit + + def _process_view( + self, + dataset_name: str, + inspector: Inspector, + schema: str, + view: str, + sql_config: SQLCommonConfig, + ) -> Iterable[Union[SqlWorkUnit, MetadataWorkUnit]]: + yield from super()._process_view( + dataset_name, inspector, schema, view, sql_config + ) + sibling_workunit = self.get_sibling_workunit( + dataset_name, inspector, schema, view + ) + if sibling_workunit: + yield sibling_workunit @classmethod def create(cls, config_dict, ctx): diff --git a/metadata-ingestion/tests/integration/trino/test_trino.py b/metadata-ingestion/tests/integration/trino/test_trino.py index 8ab3ed8056e90..0b50bca79d89e 100644 --- a/metadata-ingestion/tests/integration/trino/test_trino.py +++ b/metadata-ingestion/tests/integration/trino/test_trino.py @@ -9,7 +9,7 @@ from datahub.ingestion.run.pipeline import Pipeline from datahub.ingestion.sink.file import FileSinkConfig from datahub.ingestion.source.ge_profiling_config import GEProfilingConfig -from datahub.ingestion.source.sql.trino import TrinoConfig +from datahub.ingestion.source.sql.trino import ConnectorDetail, TrinoConfig from tests.test_helpers import fs_helpers, mce_helpers from tests.test_helpers.docker_helpers import wait_for_port @@ -53,7 +53,6 @@ def loaded_trino(trino_runner): @freeze_time(FROZEN_TIME) -@pytest.mark.xfail def test_trino_ingest( loaded_trino, test_resources_dir, pytestconfig, tmp_path, mock_time ): @@ -73,7 +72,7 @@ def test_trino_ingest( username="foo", schema_pattern=AllowDenyPattern(allow=["^librarydb"]), profile_pattern=AllowDenyPattern( - allow=["library_catalog.librarydb.*"] + allow=["postgresqldb.librarydb.*"] ), profiling=GEProfilingConfig( enabled=True, @@ -89,6 +88,12 @@ def test_trino_ingest( include_field_histogram=True, include_field_sample_values=True, ), + catalog_to_connector_details={ + "postgresqldb": ConnectorDetail( + connector_database="postgres", + platform_instance="local_server", + ) + }, ).dict(), }, "sink": { @@ -127,6 +132,11 @@ def test_trino_hive_ingest( database="hivedb", username="foo", schema_pattern=AllowDenyPattern(allow=["^db1"]), + catalog_to_connector_details={ + "hivedb": ConnectorDetail( + platform_instance="local_server", + ) + }, ).dict(), }, "sink": { @@ -234,3 +244,13 @@ def test_trino_instance_ingest( ) >= 1 ) + # all dataset entities emitted must have a sibling aspect emitted + assert ( + mce_helpers.assert_for_each_entity( + entity_type="dataset", + aspect_name="siblings", + aspect_field_matcher={}, + file=events_file, + ) + >= 1 + ) diff --git a/metadata-ingestion/tests/integration/trino/trino_hive_mces_golden.json b/metadata-ingestion/tests/integration/trino/trino_hive_mces_golden.json index c43223c68a6b6..c82e403bb9c27 100644 --- a/metadata-ingestion/tests/integration/trino/trino_hive_mces_golden.json +++ b/metadata-ingestion/tests/integration/trino/trino_hive_mces_golden.json @@ -231,7 +231,7 @@ "numrows": "1", "rawdatasize": "32", "totalsize": "33", - "transient_lastddltime": "1698223433" + "transient_lastddltime": "1707983411" }, "name": "array_struct_test", "description": "This table has array of structs", @@ -349,6 +349,25 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.array_struct_test,PROD)", + "changeType": "UPSERT", + "aspectName": "siblings", + "aspect": { + "json": { + "siblings": [ + "urn:li:dataset:(urn:li:dataPlatform:hive,local_server.db1.array_struct_test,PROD)" + ], + "primary": false + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.array_struct_test,PROD)", @@ -408,7 +427,7 @@ "numrows": "0", "rawdatasize": "0", "totalsize": "0", - "transient_lastddltime": "1698223435" + "transient_lastddltime": "1707983413" }, "name": "map_test", "tags": [] @@ -492,6 +511,25 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.map_test,PROD)", + "changeType": "UPSERT", + "aspectName": "siblings", + "aspect": { + "json": { + "siblings": [ + "urn:li:dataset:(urn:li:dataPlatform:hive,local_server.db1.map_test,PROD)" + ], + "primary": false + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.map_test,PROD)", @@ -551,7 +589,7 @@ "numrows": "0", "rawdatasize": "0", "totalsize": "0", - "transient_lastddltime": "1698223435" + "transient_lastddltime": "1707983413" }, "name": "nested_struct_test", "tags": [] @@ -684,6 +722,25 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.nested_struct_test,PROD)", + "changeType": "UPSERT", + "aspectName": "siblings", + "aspect": { + "json": { + "siblings": [ + "urn:li:dataset:(urn:li:dataPlatform:hive,local_server.db1.nested_struct_test,PROD)" + ], + "primary": false + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.nested_struct_test,PROD)", @@ -738,7 +795,7 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { - "transient_lastddltime": "1698223429" + "transient_lastddltime": "1707983406" }, "name": "pokes", "tags": [] @@ -830,6 +887,25 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.pokes,PROD)", + "changeType": "UPSERT", + "aspectName": "siblings", + "aspect": { + "json": { + "siblings": [ + "urn:li:dataset:(urn:li:dataPlatform:hive,local_server.db1.pokes,PROD)" + ], + "primary": false + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.pokes,PROD)", @@ -889,7 +965,7 @@ "numrows": "0", "rawdatasize": "0", "totalsize": "0", - "transient_lastddltime": "1698223431" + "transient_lastddltime": "1707983408" }, "name": "struct_test", "tags": [] @@ -1000,6 +1076,25 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.struct_test,PROD)", + "changeType": "UPSERT", + "aspectName": "siblings", + "aspect": { + "json": { + "siblings": [ + "urn:li:dataset:(urn:li:dataPlatform:hive,local_server.db1.struct_test,PROD)" + ], + "primary": false + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.struct_test,PROD)", @@ -1056,7 +1151,7 @@ "customProperties": { "numfiles": "0", "totalsize": "0", - "transient_lastddltime": "1698223435" + "transient_lastddltime": "1707983413" }, "name": "struct_test_view_materialized", "tags": [] @@ -1167,6 +1262,25 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.struct_test_view_materialized,PROD)", + "changeType": "UPSERT", + "aspectName": "siblings", + "aspect": { + "json": { + "siblings": [ + "urn:li:dataset:(urn:li:dataPlatform:hive,local_server.db1.struct_test_view_materialized,PROD)" + ], + "primary": false + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.struct_test_view_materialized,PROD)", @@ -1226,7 +1340,7 @@ "numrows": "0", "rawdatasize": "0", "totalsize": "0", - "transient_lastddltime": "1698223431" + "transient_lastddltime": "1707983408" }, "name": "_test_table_underscore", "tags": [] @@ -1306,6 +1420,25 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1._test_table_underscore,PROD)", + "changeType": "UPSERT", + "aspectName": "siblings", + "aspect": { + "json": { + "siblings": [ + "urn:li:dataset:(urn:li:dataPlatform:hive,local_server.db1._test_table_underscore,PROD)" + ], + "primary": false + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1._test_table_underscore,PROD)", @@ -1365,7 +1498,7 @@ "numrows": "0", "rawdatasize": "0", "totalsize": "0", - "transient_lastddltime": "1698223435" + "transient_lastddltime": "1707983413" }, "name": "union_test", "tags": [] @@ -1529,6 +1662,25 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.union_test,PROD)", + "changeType": "UPSERT", + "aspectName": "siblings", + "aspect": { + "json": { + "siblings": [ + "urn:li:dataset:(urn:li:dataPlatform:hive,local_server.db1.union_test,PROD)" + ], + "primary": false + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.union_test,PROD)", @@ -1583,7 +1735,7 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { - "transient_lastddltime": "1698223435", + "transient_lastddltime": "1707983413", "view_definition": "SELECT \"property_id\", \"service\"\nFROM \"db1\".\"array_struct_test\"", "is_view": "True" }, @@ -1718,6 +1870,25 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.array_struct_test_view,PROD)", + "changeType": "UPSERT", + "aspectName": "siblings", + "aspect": { + "json": { + "siblings": [ + "urn:li:dataset:(urn:li:dataPlatform:hive,local_server.db1.array_struct_test_view,PROD)" + ], + "primary": false + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.array_struct_test_view,PROD)", @@ -1767,6 +1938,268 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.array_struct_test_view,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:c7a81f6ed9a7cdd0c74436ac2dc4d1f7", + "urn": "urn:li:container:c7a81f6ed9a7cdd0c74436ac2dc4d1f7" + }, + { + "id": "urn:li:container:304fd7ad57dc0ab32fb2cb778cbccd84", + "urn": "urn:li:container:304fd7ad57dc0ab32fb2cb778cbccd84" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:c7a81f6ed9a7cdd0c74436ac2dc4d1f7", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:304fd7ad57dc0ab32fb2cb778cbccd84", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:c7a81f6ed9a7cdd0c74436ac2dc4d1f7", + "urn": "urn:li:container:c7a81f6ed9a7cdd0c74436ac2dc4d1f7" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.array_struct_test,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:c7a81f6ed9a7cdd0c74436ac2dc4d1f7", + "urn": "urn:li:container:c7a81f6ed9a7cdd0c74436ac2dc4d1f7" + }, + { + "id": "urn:li:container:304fd7ad57dc0ab32fb2cb778cbccd84", + "urn": "urn:li:container:304fd7ad57dc0ab32fb2cb778cbccd84" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.map_test,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:c7a81f6ed9a7cdd0c74436ac2dc4d1f7", + "urn": "urn:li:container:c7a81f6ed9a7cdd0c74436ac2dc4d1f7" + }, + { + "id": "urn:li:container:304fd7ad57dc0ab32fb2cb778cbccd84", + "urn": "urn:li:container:304fd7ad57dc0ab32fb2cb778cbccd84" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.nested_struct_test,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:c7a81f6ed9a7cdd0c74436ac2dc4d1f7", + "urn": "urn:li:container:c7a81f6ed9a7cdd0c74436ac2dc4d1f7" + }, + { + "id": "urn:li:container:304fd7ad57dc0ab32fb2cb778cbccd84", + "urn": "urn:li:container:304fd7ad57dc0ab32fb2cb778cbccd84" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.pokes,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:c7a81f6ed9a7cdd0c74436ac2dc4d1f7", + "urn": "urn:li:container:c7a81f6ed9a7cdd0c74436ac2dc4d1f7" + }, + { + "id": "urn:li:container:304fd7ad57dc0ab32fb2cb778cbccd84", + "urn": "urn:li:container:304fd7ad57dc0ab32fb2cb778cbccd84" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.struct_test,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:c7a81f6ed9a7cdd0c74436ac2dc4d1f7", + "urn": "urn:li:container:c7a81f6ed9a7cdd0c74436ac2dc4d1f7" + }, + { + "id": "urn:li:container:304fd7ad57dc0ab32fb2cb778cbccd84", + "urn": "urn:li:container:304fd7ad57dc0ab32fb2cb778cbccd84" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.struct_test_view_materialized,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:c7a81f6ed9a7cdd0c74436ac2dc4d1f7", + "urn": "urn:li:container:c7a81f6ed9a7cdd0c74436ac2dc4d1f7" + }, + { + "id": "urn:li:container:304fd7ad57dc0ab32fb2cb778cbccd84", + "urn": "urn:li:container:304fd7ad57dc0ab32fb2cb778cbccd84" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1._test_table_underscore,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:c7a81f6ed9a7cdd0c74436ac2dc4d1f7", + "urn": "urn:li:container:c7a81f6ed9a7cdd0c74436ac2dc4d1f7" + }, + { + "id": "urn:li:container:304fd7ad57dc0ab32fb2cb778cbccd84", + "urn": "urn:li:container:304fd7ad57dc0ab32fb2cb778cbccd84" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.union_test,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:c7a81f6ed9a7cdd0c74436ac2dc4d1f7", + "urn": "urn:li:container:c7a81f6ed9a7cdd0c74436ac2dc4d1f7" + }, + { + "id": "urn:li:container:304fd7ad57dc0ab32fb2cb778cbccd84", + "urn": "urn:li:container:304fd7ad57dc0ab32fb2cb778cbccd84" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.array_struct_test_view,PROD)", diff --git a/metadata-ingestion/tests/integration/trino/trino_mces_golden.json b/metadata-ingestion/tests/integration/trino/trino_mces_golden.json index dce10ce377be3..0a3fdee342cad 100644 --- a/metadata-ingestion/tests/integration/trino/trino_mces_golden.json +++ b/metadata-ingestion/tests/integration/trino/trino_mces_golden.json @@ -1,7 +1,7 @@ [ { "entityType": "container", - "entityUrn": "urn:li:container:57aba13b10c1691508600999cd411c25", + "entityUrn": "urn:li:container:ad9f7c5e0d4bf83d6278f62271c28761", "changeType": "UPSERT", "aspectName": "containerProperties", "aspect": { @@ -9,19 +9,20 @@ "customProperties": { "platform": "trino", "env": "PROD", - "database": "library_catalog" + "database": "postgresqldb" }, - "name": "library_catalog" + "name": "postgresqldb" } }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-test" + "runId": "trino-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:57aba13b10c1691508600999cd411c25", + "entityUrn": "urn:li:container:ad9f7c5e0d4bf83d6278f62271c28761", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -31,12 +32,13 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-test" + "runId": "trino-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:57aba13b10c1691508600999cd411c25", + "entityUrn": "urn:li:container:ad9f7c5e0d4bf83d6278f62271c28761", "changeType": "UPSERT", "aspectName": "dataPlatformInstance", "aspect": { @@ -46,12 +48,13 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-test" + "runId": "trino-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:57aba13b10c1691508600999cd411c25", + "entityUrn": "urn:li:container:ad9f7c5e0d4bf83d6278f62271c28761", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -63,12 +66,13 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-test" + "runId": "trino-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:57aba13b10c1691508600999cd411c25", + "entityUrn": "urn:li:container:ad9f7c5e0d4bf83d6278f62271c28761", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { @@ -78,12 +82,13 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-test" + "runId": "trino-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:8568c0971ce28c183fde0dff7f88e617", + "entityUrn": "urn:li:container:2d206e03e435f48a5b8bacf444bf565c", "changeType": "UPSERT", "aspectName": "containerProperties", "aspect": { @@ -91,7 +96,7 @@ "customProperties": { "platform": "trino", "env": "PROD", - "database": "library_catalog", + "database": "postgresqldb", "schema": "librarydb" }, "name": "librarydb" @@ -99,12 +104,13 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-test" + "runId": "trino-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:8568c0971ce28c183fde0dff7f88e617", + "entityUrn": "urn:li:container:2d206e03e435f48a5b8bacf444bf565c", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -114,12 +120,13 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-test" + "runId": "trino-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:8568c0971ce28c183fde0dff7f88e617", + "entityUrn": "urn:li:container:2d206e03e435f48a5b8bacf444bf565c", "changeType": "UPSERT", "aspectName": "dataPlatformInstance", "aspect": { @@ -129,12 +136,13 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-test" + "runId": "trino-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:8568c0971ce28c183fde0dff7f88e617", + "entityUrn": "urn:li:container:2d206e03e435f48a5b8bacf444bf565c", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -146,63 +154,67 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-test" + "runId": "trino-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:8568c0971ce28c183fde0dff7f88e617", + "entityUrn": "urn:li:container:2d206e03e435f48a5b8bacf444bf565c", "changeType": "UPSERT", "aspectName": "container", "aspect": { "json": { - "container": "urn:li:container:57aba13b10c1691508600999cd411c25" + "container": "urn:li:container:ad9f7c5e0d4bf83d6278f62271c28761" } }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-test" + "runId": "trino-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:8568c0971ce28c183fde0dff7f88e617", + "entityUrn": "urn:li:container:2d206e03e435f48a5b8bacf444bf565c", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { "json": { "path": [ { - "id": "urn:li:container:57aba13b10c1691508600999cd411c25", - "urn": "urn:li:container:57aba13b10c1691508600999cd411c25" + "id": "urn:li:container:ad9f7c5e0d4bf83d6278f62271c28761", + "urn": "urn:li:container:ad9f7c5e0d4bf83d6278f62271c28761" } ] } }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-test" + "runId": "trino-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,library_catalog.librarydb.book,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,postgresqldb.librarydb.book,PROD)", "changeType": "UPSERT", "aspectName": "container", "aspect": { "json": { - "container": "urn:li:container:8568c0971ce28c183fde0dff7f88e617" + "container": "urn:li:container:2d206e03e435f48a5b8bacf444bf565c" } }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-test" + "runId": "trino-test", + "lastRunId": "no-run-id-provided" } }, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:trino,library_catalog.librarydb.book,PROD)", + "urn": "urn:li:dataset:(urn:li:dataPlatform:trino,postgresqldb.librarydb.book,PROD)", "aspects": [ { "com.linkedin.pegasus2avro.common.Status": { @@ -218,7 +230,7 @@ }, { "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "library_catalog.librarydb.book", + "schemaName": "postgresqldb.librarydb.book", "platform": "urn:li:dataPlatform:trino", "version": 0, "created": { @@ -321,12 +333,13 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-test" + "runId": "trino-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,library_catalog.librarydb.book,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,postgresqldb.librarydb.book,PROD)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -338,52 +351,74 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-test" + "runId": "trino-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,library_catalog.librarydb.book,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,postgresqldb.librarydb.book,PROD)", + "changeType": "UPSERT", + "aspectName": "siblings", + "aspect": { + "json": { + "siblings": [ + "urn:li:dataset:(urn:li:dataPlatform:postgres,local_server.postgres.librarydb.book,PROD)" + ], + "primary": false + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,postgresqldb.librarydb.book,PROD)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { "json": { "path": [ { - "id": "urn:li:container:57aba13b10c1691508600999cd411c25", - "urn": "urn:li:container:57aba13b10c1691508600999cd411c25" + "id": "urn:li:container:ad9f7c5e0d4bf83d6278f62271c28761", + "urn": "urn:li:container:ad9f7c5e0d4bf83d6278f62271c28761" }, { - "id": "urn:li:container:8568c0971ce28c183fde0dff7f88e617", - "urn": "urn:li:container:8568c0971ce28c183fde0dff7f88e617" + "id": "urn:li:container:2d206e03e435f48a5b8bacf444bf565c", + "urn": "urn:li:container:2d206e03e435f48a5b8bacf444bf565c" } ] } }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-test" + "runId": "trino-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,library_catalog.librarydb.issue_history,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,postgresqldb.librarydb.issue_history,PROD)", "changeType": "UPSERT", "aspectName": "container", "aspect": { "json": { - "container": "urn:li:container:8568c0971ce28c183fde0dff7f88e617" + "container": "urn:li:container:2d206e03e435f48a5b8bacf444bf565c" } }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-test" + "runId": "trino-test", + "lastRunId": "no-run-id-provided" } }, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:trino,library_catalog.librarydb.issue_history,PROD)", + "urn": "urn:li:dataset:(urn:li:dataPlatform:trino,postgresqldb.librarydb.issue_history,PROD)", "aspects": [ { "com.linkedin.pegasus2avro.common.Status": { @@ -399,7 +434,7 @@ }, { "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "library_catalog.librarydb.issue_history", + "schemaName": "postgresqldb.librarydb.issue_history", "platform": "urn:li:dataPlatform:trino", "version": 0, "created": { @@ -473,12 +508,13 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-test" + "runId": "trino-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,library_catalog.librarydb.issue_history,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,postgresqldb.librarydb.issue_history,PROD)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -490,52 +526,74 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-test" + "runId": "trino-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,library_catalog.librarydb.issue_history,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,postgresqldb.librarydb.issue_history,PROD)", + "changeType": "UPSERT", + "aspectName": "siblings", + "aspect": { + "json": { + "siblings": [ + "urn:li:dataset:(urn:li:dataPlatform:postgres,local_server.postgres.librarydb.issue_history,PROD)" + ], + "primary": false + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,postgresqldb.librarydb.issue_history,PROD)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { "json": { "path": [ { - "id": "urn:li:container:57aba13b10c1691508600999cd411c25", - "urn": "urn:li:container:57aba13b10c1691508600999cd411c25" + "id": "urn:li:container:ad9f7c5e0d4bf83d6278f62271c28761", + "urn": "urn:li:container:ad9f7c5e0d4bf83d6278f62271c28761" }, { - "id": "urn:li:container:8568c0971ce28c183fde0dff7f88e617", - "urn": "urn:li:container:8568c0971ce28c183fde0dff7f88e617" + "id": "urn:li:container:2d206e03e435f48a5b8bacf444bf565c", + "urn": "urn:li:container:2d206e03e435f48a5b8bacf444bf565c" } ] } }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-test" + "runId": "trino-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,library_catalog.librarydb.member,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,postgresqldb.librarydb.member,PROD)", "changeType": "UPSERT", "aspectName": "container", "aspect": { "json": { - "container": "urn:li:container:8568c0971ce28c183fde0dff7f88e617" + "container": "urn:li:container:2d206e03e435f48a5b8bacf444bf565c" } }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-test" + "runId": "trino-test", + "lastRunId": "no-run-id-provided" } }, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:trino,library_catalog.librarydb.member,PROD)", + "urn": "urn:li:dataset:(urn:li:dataPlatform:trino,postgresqldb.librarydb.member,PROD)", "aspects": [ { "com.linkedin.pegasus2avro.common.Status": { @@ -551,7 +609,7 @@ }, { "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "library_catalog.librarydb.member", + "schemaName": "postgresqldb.librarydb.member", "platform": "urn:li:dataPlatform:trino", "version": 0, "created": { @@ -601,12 +659,13 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-test" + "runId": "trino-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,library_catalog.librarydb.member,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,postgresqldb.librarydb.member,PROD)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -618,52 +677,74 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-test" + "runId": "trino-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,postgresqldb.librarydb.member,PROD)", + "changeType": "UPSERT", + "aspectName": "siblings", + "aspect": { + "json": { + "siblings": [ + "urn:li:dataset:(urn:li:dataPlatform:postgres,local_server.postgres.librarydb.member,PROD)" + ], + "primary": false + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,library_catalog.librarydb.member,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,postgresqldb.librarydb.member,PROD)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { "json": { "path": [ { - "id": "urn:li:container:57aba13b10c1691508600999cd411c25", - "urn": "urn:li:container:57aba13b10c1691508600999cd411c25" + "id": "urn:li:container:ad9f7c5e0d4bf83d6278f62271c28761", + "urn": "urn:li:container:ad9f7c5e0d4bf83d6278f62271c28761" }, { - "id": "urn:li:container:8568c0971ce28c183fde0dff7f88e617", - "urn": "urn:li:container:8568c0971ce28c183fde0dff7f88e617" + "id": "urn:li:container:2d206e03e435f48a5b8bacf444bf565c", + "urn": "urn:li:container:2d206e03e435f48a5b8bacf444bf565c" } ] } }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-test" + "runId": "trino-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,library_catalog.librarydb.book_in_circulation,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,postgresqldb.librarydb.book_in_circulation,PROD)", "changeType": "UPSERT", "aspectName": "container", "aspect": { "json": { - "container": "urn:li:container:8568c0971ce28c183fde0dff7f88e617" + "container": "urn:li:container:2d206e03e435f48a5b8bacf444bf565c" } }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-test" + "runId": "trino-test", + "lastRunId": "no-run-id-provided" } }, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:trino,library_catalog.librarydb.book_in_circulation,PROD)", + "urn": "urn:li:dataset:(urn:li:dataPlatform:trino,postgresqldb.librarydb.book_in_circulation,PROD)", "aspects": [ { "com.linkedin.pegasus2avro.common.Status": { @@ -679,7 +760,7 @@ }, { "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "library_catalog.librarydb.book_in_circulation", + "schemaName": "postgresqldb.librarydb.book_in_circulation", "platform": "urn:li:dataPlatform:trino", "version": 0, "created": { @@ -777,12 +858,13 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-test" + "runId": "trino-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,library_catalog.librarydb.book_in_circulation,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,postgresqldb.librarydb.book_in_circulation,PROD)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -794,36 +876,57 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-test" + "runId": "trino-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,library_catalog.librarydb.book_in_circulation,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,postgresqldb.librarydb.book_in_circulation,PROD)", + "changeType": "UPSERT", + "aspectName": "siblings", + "aspect": { + "json": { + "siblings": [ + "urn:li:dataset:(urn:li:dataPlatform:postgres,local_server.postgres.librarydb.book_in_circulation,PROD)" + ], + "primary": false + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,postgresqldb.librarydb.book_in_circulation,PROD)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { "json": { "path": [ { - "id": "urn:li:container:57aba13b10c1691508600999cd411c25", - "urn": "urn:li:container:57aba13b10c1691508600999cd411c25" + "id": "urn:li:container:ad9f7c5e0d4bf83d6278f62271c28761", + "urn": "urn:li:container:ad9f7c5e0d4bf83d6278f62271c28761" }, { - "id": "urn:li:container:8568c0971ce28c183fde0dff7f88e617", - "urn": "urn:li:container:8568c0971ce28c183fde0dff7f88e617" + "id": "urn:li:container:2d206e03e435f48a5b8bacf444bf565c", + "urn": "urn:li:container:2d206e03e435f48a5b8bacf444bf565c" } ] } }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-test" + "runId": "trino-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,library_catalog.librarydb.book,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,postgresqldb.librarydb.book,PROD)", "changeType": "UPSERT", "aspectName": "datasetProfile", "aspect": { @@ -902,12 +1005,13 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-test" + "runId": "trino-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,library_catalog.librarydb.issue_history,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,postgresqldb.librarydb.issue_history,PROD)", "changeType": "UPSERT", "aspectName": "datasetProfile", "aspect": { @@ -988,12 +1092,13 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-test" + "runId": "trino-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,library_catalog.librarydb.member,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,postgresqldb.librarydb.member,PROD)", "changeType": "UPSERT", "aspectName": "datasetProfile", "aspect": { @@ -1038,12 +1143,13 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-test" + "runId": "trino-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,library_catalog.librarydb.book_in_circulation,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,postgresqldb.librarydb.book_in_circulation,PROD)", "changeType": "UPSERT", "aspectName": "datasetProfile", "aspect": { @@ -1130,7 +1236,145 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-test" + "runId": "trino-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:ad9f7c5e0d4bf83d6278f62271c28761", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:2d206e03e435f48a5b8bacf444bf565c", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:ad9f7c5e0d4bf83d6278f62271c28761", + "urn": "urn:li:container:ad9f7c5e0d4bf83d6278f62271c28761" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,postgresqldb.librarydb.book,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:ad9f7c5e0d4bf83d6278f62271c28761", + "urn": "urn:li:container:ad9f7c5e0d4bf83d6278f62271c28761" + }, + { + "id": "urn:li:container:2d206e03e435f48a5b8bacf444bf565c", + "urn": "urn:li:container:2d206e03e435f48a5b8bacf444bf565c" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,postgresqldb.librarydb.issue_history,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:ad9f7c5e0d4bf83d6278f62271c28761", + "urn": "urn:li:container:ad9f7c5e0d4bf83d6278f62271c28761" + }, + { + "id": "urn:li:container:2d206e03e435f48a5b8bacf444bf565c", + "urn": "urn:li:container:2d206e03e435f48a5b8bacf444bf565c" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,postgresqldb.librarydb.member,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:ad9f7c5e0d4bf83d6278f62271c28761", + "urn": "urn:li:container:ad9f7c5e0d4bf83d6278f62271c28761" + }, + { + "id": "urn:li:container:2d206e03e435f48a5b8bacf444bf565c", + "urn": "urn:li:container:2d206e03e435f48a5b8bacf444bf565c" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,postgresqldb.librarydb.book_in_circulation,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:ad9f7c5e0d4bf83d6278f62271c28761", + "urn": "urn:li:container:ad9f7c5e0d4bf83d6278f62271c28761" + }, + { + "id": "urn:li:container:2d206e03e435f48a5b8bacf444bf565c", + "urn": "urn:li:container:2d206e03e435f48a5b8bacf444bf565c" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-test", + "lastRunId": "no-run-id-provided" } } ] \ No newline at end of file From 6be1d341d96500bd48dd923bb42466d758887195 Mon Sep 17 00:00:00 2001 From: shubhamjagtap639 Date: Thu, 15 Feb 2024 18:57:33 +0530 Subject: [PATCH 3/7] Address review comments --- .../docs/sources/trino/trino_recipe.yml | 8 + .../src/datahub/ingestion/source/sql/trino.py | 170 +- .../tests/integration/trino/test_trino.py | 64 +- .../trino_hive_instance_mces_golden.json | 2990 +++++++++++++++++ .../trino/trino_hive_mces_golden.json | 575 +++- .../integration/trino/trino_mces_golden.json | 240 ++ 6 files changed, 3893 insertions(+), 154 deletions(-) create mode 100644 metadata-ingestion/tests/integration/trino/trino_hive_instance_mces_golden.json diff --git a/metadata-ingestion/docs/sources/trino/trino_recipe.yml b/metadata-ingestion/docs/sources/trino/trino_recipe.yml index 70e2afa81b972..2b874801b7c32 100644 --- a/metadata-ingestion/docs/sources/trino/trino_recipe.yml +++ b/metadata-ingestion/docs/sources/trino/trino_recipe.yml @@ -13,6 +13,14 @@ source: # options: # connect_args: # http_scheme: http + + # Optional -- A mapping of trino catalog to its connector details like connector database, env and platform instance. + # This configuration is used to ingest lineage of datasets to connectors. Use catalog name as key. + # catalog_to_connector_details: + # catalog_name: + # connector_database: db_name + # platform_instance: cloud_instance + # env: DEV sink: # sink configs diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/trino.py b/metadata-ingestion/src/datahub/ingestion/source/sql/trino.py index 7dda5a4e1da1b..d0c35ff25842c 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/trino.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/trino.py @@ -1,7 +1,6 @@ import functools import json import uuid -from dataclasses import dataclass from textwrap import dedent from typing import Any, Dict, Iterable, List, Optional, Union @@ -45,6 +44,11 @@ SQLCommonConfig, ) from datahub.metadata.com.linkedin.pegasus2avro.common import Siblings +from datahub.metadata.com.linkedin.pegasus2avro.dataset import ( + DatasetLineageType, + Upstream, + UpstreamLineage, +) from datahub.metadata.com.linkedin.pegasus2avro.schema import ( MapTypeClass, NumberTypeClass, @@ -57,20 +61,18 @@ register_custom_type(datatype.DOUBLE, NumberTypeClass) -@dataclass -class PlatformDetail: - platform_name: str - is_three_tier: bool - - KNOWN_CONNECTOR_PLATFORM_MAPPING = { - "hive": PlatformDetail("hive", False), - "postgresql": PlatformDetail("postgres", True), - "mysql": PlatformDetail("mysql", False), - "redshift": PlatformDetail("redshift", True), - "bigquery": PlatformDetail("bigquery", True), + "clickhouse": "clickhouse", + "hive": "hive", + "postgresql": "postgres", + "mysql": "mysql", + "iceberg": "iceberg", + "redshift": "redshift", + "bigquery": "bigquery", } +TWO_TIER_CONNECTORS = ["clickhouse", "hive", "mysql", "iceberg"] + # Type JSON was introduced in trino sqlalchemy dialect in version 0.317.0 if version.parse(trino.__version__) >= version.parse("0.317.0"): register_custom_type(datatype.JSON, RecordTypeClass) @@ -187,13 +189,13 @@ class TrinoConfig(BasicSQLAlchemyConfig): catalog_to_connector_details: Dict[str, ConnectorDetail] = Field( default={}, - description="A mapping of trino catalog to its connector details like connector database, platform instance." - "This configuration is used to ingest siblings of datasets. Use catalog name as key." - "For three tier connectors like postgresql, connector database is required.", + description="A mapping of trino catalog to its connector details like connector database, env and platform instance." + "This configuration is used to ingest lineage of datasets to connectors. Use catalog name as key.", ) - ingest_siblings: bool = Field( - default=True, description="Whether siblings of datasets should be ingested" + ingest_lineage_to_connectors: bool = Field( + default=True, + description="Whether lineage of datasets to connectors should be ingested", ) def get_identifier(self: BasicSQLAlchemyConfig, schema: str, table: str) -> str: @@ -232,7 +234,7 @@ def get_db_name(self, inspector: Inspector) -> str: else: return super().get_db_name(inspector) - def _get_sibling_urn( + def _get_source_dataset_urn( self, dataset_name: str, inspector: Inspector, @@ -240,56 +242,69 @@ def _get_sibling_urn( table: str, ) -> Optional[str]: catalog_name = dataset_name.split(".")[0] - connector_platform_details: Optional[PlatformDetail] = None - connector_name = get_catalog_connector_name(catalog_name, inspector) if connector_name: - connector_platform_details = KNOWN_CONNECTOR_PLATFORM_MAPPING.get( + connector_platform_name = KNOWN_CONNECTOR_PLATFORM_MAPPING.get( connector_name ) - - connector_details = self.config.catalog_to_connector_details.get( - catalog_name, ConnectorDetail() - ) - - if connector_platform_details: - if not connector_platform_details.is_three_tier: # connector is two tier - return make_dataset_urn_with_platform_instance( - platform=connector_platform_details.platform_name, - name=f"{schema}.{table}", - platform_instance=connector_details.platform_instance, - env=connector_details.env, + if connector_platform_name: + connector_details = self.config.catalog_to_connector_details.get( + catalog_name, ConnectorDetail() ) - elif connector_details.connector_database: # connector is three tier - return make_dataset_urn_with_platform_instance( - platform=connector_platform_details.platform_name, - name=f"{connector_details.connector_database}.{schema}.{table}", - platform_instance=connector_details.platform_instance, - env=connector_details.env, - ) - + if ( + connector_platform_name in TWO_TIER_CONNECTORS + ): # connector is two tier + return make_dataset_urn_with_platform_instance( + platform=connector_platform_name, + name=f"{schema}.{table}", + platform_instance=connector_details.platform_instance, + env=connector_details.env, + ) + elif ( + connector_details.connector_database + ): # else connector is three tier + return make_dataset_urn_with_platform_instance( + platform=connector_platform_name, + name=f"{connector_details.connector_database}.{schema}.{table}", + platform_instance=connector_details.platform_instance, + env=connector_details.env, + ) return None - def get_sibling_workunit( + def gen_siblings_workunit( self, - dataset_name: str, - inspector: Inspector, - schema: str, - table: str, - ) -> Optional[MetadataWorkUnit]: - dataset_urn = make_dataset_urn_with_platform_instance( - self.platform, - dataset_name, - self.config.platform_instance, - self.config.env, - ) - sibling_urn = self._get_sibling_urn(dataset_name, inspector, schema, table) - if self.config.ingest_siblings and sibling_urn: - return MetadataChangeProposalWrapper( - entityUrn=dataset_urn, - aspect=Siblings(primary=False, siblings=[sibling_urn]), - ).as_workunit() - return None + dataset_urn: str, + source_dataset_urn: str, + ) -> Iterable[MetadataWorkUnit]: + """ + Generate sibling workunit for both trino dataset and its connector source dataset + """ + yield MetadataChangeProposalWrapper( + entityUrn=dataset_urn, + aspect=Siblings(primary=False, siblings=[source_dataset_urn]), + ).as_workunit() + + yield MetadataChangeProposalWrapper( + entityUrn=source_dataset_urn, + aspect=Siblings(primary=True, siblings=[dataset_urn]), + ).as_workunit() + + def gen_lineage_workunit( + self, + dataset_urn: str, + source_dataset_urn: str, + ) -> Iterable[MetadataWorkUnit]: + """ + Generate dataset to source connector lineage workunit + """ + yield MetadataChangeProposalWrapper( + entityUrn=dataset_urn, + aspect=UpstreamLineage( + upstreams=[ + Upstream(dataset=source_dataset_urn, type=DatasetLineageType.VIEW) + ] + ), + ).as_workunit() def _process_table( self, @@ -302,11 +317,19 @@ def _process_table( yield from super()._process_table( dataset_name, inspector, schema, table, sql_config ) - sibling_workunit = self.get_sibling_workunit( - dataset_name, inspector, schema, table - ) - if sibling_workunit: - yield sibling_workunit + if self.config.ingest_lineage_to_connectors: + dataset_urn = make_dataset_urn_with_platform_instance( + self.platform, + dataset_name, + self.config.platform_instance, + self.config.env, + ) + source_dataset_urn = self._get_source_dataset_urn( + dataset_name, inspector, schema, table + ) + if source_dataset_urn: + yield from self.gen_siblings_workunit(dataset_urn, source_dataset_urn) + yield from self.gen_lineage_workunit(dataset_urn, source_dataset_urn) def _process_view( self, @@ -319,11 +342,18 @@ def _process_view( yield from super()._process_view( dataset_name, inspector, schema, view, sql_config ) - sibling_workunit = self.get_sibling_workunit( - dataset_name, inspector, schema, view - ) - if sibling_workunit: - yield sibling_workunit + if self.config.ingest_lineage_to_connectors: + dataset_urn = make_dataset_urn_with_platform_instance( + self.platform, + dataset_name, + self.config.platform_instance, + self.config.env, + ) + source_dataset_urn = self._get_source_dataset_urn( + dataset_name, inspector, schema, view + ) + if source_dataset_urn: + yield from self.gen_siblings_workunit(dataset_urn, source_dataset_urn) @classmethod def create(cls, config_dict, ctx): diff --git a/metadata-ingestion/tests/integration/trino/test_trino.py b/metadata-ingestion/tests/integration/trino/test_trino.py index 0b50bca79d89e..e0d775f27b4ed 100644 --- a/metadata-ingestion/tests/integration/trino/test_trino.py +++ b/metadata-ingestion/tests/integration/trino/test_trino.py @@ -1,4 +1,3 @@ -import re import subprocess import pytest @@ -132,11 +131,6 @@ def test_trino_hive_ingest( database="hivedb", username="foo", schema_pattern=AllowDenyPattern(allow=["^db1"]), - catalog_to_connector_details={ - "hivedb": ConnectorDetail( - platform_instance="local_server", - ) - }, ).dict(), }, "sink": { @@ -179,8 +173,6 @@ def test_trino_hive_ingest( def test_trino_instance_ingest( loaded_trino, test_resources_dir, pytestconfig, tmp_path, mock_time ): - instance = "production_warehouse" - platform = "trino" mce_out_file = "trino_instance_mces.json" events_file = tmp_path / mce_out_file pipeline_config = { @@ -193,6 +185,11 @@ def test_trino_instance_ingest( username="foo", platform_instance="production_warehouse", schema_pattern=AllowDenyPattern(allow=["^db1"]), + catalog_to_connector_details={ + "hivedb": ConnectorDetail( + platform_instance="local_server", + ) + }, ).dict(), }, "sink": { @@ -207,50 +204,9 @@ def test_trino_instance_ingest( pipeline.pretty_print_summary() pipeline.raise_from_status(raise_warnings=True) - # Assert that all events generated have instance specific urns - urn_pattern = "^" + re.escape( - f"urn:li:dataset:(urn:li:dataPlatform:{platform},{instance}." - ) - assert ( - mce_helpers.assert_mce_entity_urn( - "ALL", - entity_type="dataset", - regex_pattern=urn_pattern, - file=events_file, - ) - >= 0 - ), "There should be at least one match" - - assert ( - mce_helpers.assert_mcp_entity_urn( - "ALL", - entity_type="dataset", - regex_pattern=urn_pattern, - file=events_file, - ) - >= 0 - ), "There should be at least one MCP" - - # all dataset entities emitted must have a dataPlatformInstance aspect emitted - # there must be at least one entity emitted - assert ( - mce_helpers.assert_for_each_entity( - entity_type="dataset", - aspect_name="dataPlatformInstance", - aspect_field_matcher={ - "instance": f"urn:li:dataPlatformInstance:(urn:li:dataPlatform:{platform},{instance})" - }, - file=events_file, - ) - >= 1 - ) - # all dataset entities emitted must have a sibling aspect emitted - assert ( - mce_helpers.assert_for_each_entity( - entity_type="dataset", - aspect_name="siblings", - aspect_field_matcher={}, - file=events_file, - ) - >= 1 + # Verify the output. + mce_helpers.check_golden_file( + pytestconfig, + output_path=events_file, + golden_path=test_resources_dir / "trino_hive_instance_mces_golden.json", ) diff --git a/metadata-ingestion/tests/integration/trino/trino_hive_instance_mces_golden.json b/metadata-ingestion/tests/integration/trino/trino_hive_instance_mces_golden.json new file mode 100644 index 0000000000000..32fe7efcbf918 --- /dev/null +++ b/metadata-ingestion/tests/integration/trino/trino_hive_instance_mces_golden.json @@ -0,0 +1,2990 @@ +[ +{ + "entityType": "container", + "entityUrn": "urn:li:container:f311add3fdc7c16e8a50a63fe1dcce8b", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "trino", + "instance": "production_warehouse", + "env": "PROD", + "database": "hivedb" + }, + "name": "hivedb" + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:f311add3fdc7c16e8a50a63fe1dcce8b", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:f311add3fdc7c16e8a50a63fe1dcce8b", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:trino", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:trino,production_warehouse)" + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:f311add3fdc7c16e8a50a63fe1dcce8b", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Database" + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:f311add3fdc7c16e8a50a63fe1dcce8b", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:trino,production_warehouse)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:trino,production_warehouse)" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:46baa6eebd802861e5ee3d043456e171", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "trino", + "instance": "production_warehouse", + "env": "PROD", + "database": "hivedb", + "schema": "db1" + }, + "name": "db1" + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:46baa6eebd802861e5ee3d043456e171", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:46baa6eebd802861e5ee3d043456e171", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:trino", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:trino,production_warehouse)" + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:46baa6eebd802861e5ee3d043456e171", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Schema" + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:46baa6eebd802861e5ee3d043456e171", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:f311add3fdc7c16e8a50a63fe1dcce8b" + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:46baa6eebd802861e5ee3d043456e171", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:trino,production_warehouse)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:trino,production_warehouse)" + }, + { + "id": "urn:li:container:f311add3fdc7c16e8a50a63fe1dcce8b", + "urn": "urn:li:container:f311add3fdc7c16e8a50a63fe1dcce8b" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.array_struct_test,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:46baa6eebd802861e5ee3d043456e171" + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.array_struct_test,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "column_stats_accurate": "{\"BASIC_STATS\":\"true\"}", + "another.comment": "This table has no partitions", + "comment": "This table has array of structs", + "numfiles": "1", + "numrows": "1", + "rawdatasize": "32", + "totalsize": "33", + "transient_lastddltime": "1708001962" + }, + "name": "array_struct_test", + "description": "This table has array of structs", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "hivedb.db1.array_struct_test", + "platform": "urn:li:dataPlatform:trino", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "property_id", + "nullable": true, + "description": "id of property", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER()", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=array].[type=struct].service", + "nullable": true, + "description": "service types and providers", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.ArrayType": { + "nestedType": [ + "record" + ] + } + } + }, + "nativeDataType": "ARRAY(ROW([('type', VARCHAR()), ('provider', ARRAY(INTEGER()))]))", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"ARRAY(ROW([('type', VARCHAR()), ('provider', ARRAY(INTEGER()))]))\"}" + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=array].[type=struct].service.[type=string].type", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR()", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"VARCHAR()\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=array].[type=struct].service.[type=array].[type=int].provider", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.ArrayType": { + "nestedType": [ + "int" + ] + } + } + }, + "nativeDataType": "ARRAY(INTEGER())", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"ARRAY(INTEGER())\"}" + } + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.array_struct_test,PROD)", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:trino", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:trino,production_warehouse)" + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.array_struct_test,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.array_struct_test,PROD)", + "changeType": "UPSERT", + "aspectName": "siblings", + "aspect": { + "json": { + "siblings": [ + "urn:li:dataset:(urn:li:dataPlatform:hive,local_server.db1.array_struct_test,PROD)" + ], + "primary": false + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.array_struct_test,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:trino,production_warehouse)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:trino,production_warehouse)" + }, + { + "id": "urn:li:container:f311add3fdc7c16e8a50a63fe1dcce8b", + "urn": "urn:li:container:f311add3fdc7c16e8a50a63fe1dcce8b" + }, + { + "id": "urn:li:container:46baa6eebd802861e5ee3d043456e171", + "urn": "urn:li:container:46baa6eebd802861e5ee3d043456e171" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,local_server.db1.array_struct_test,PROD)", + "changeType": "UPSERT", + "aspectName": "siblings", + "aspect": { + "json": { + "siblings": [ + "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.array_struct_test,PROD)" + ], + "primary": true + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.array_struct_test,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:hive,local_server.db1.array_struct_test,PROD)", + "type": "VIEW" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.map_test,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:46baa6eebd802861e5ee3d043456e171" + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.map_test,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "column_stats_accurate": "{\"BASIC_STATS\":\"true\"}", + "numfiles": "0", + "numrows": "0", + "rawdatasize": "0", + "totalsize": "0", + "transient_lastddltime": "1708001965" + }, + "name": "map_test", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "hivedb.db1.map_test", + "platform": "urn:li:dataPlatform:trino", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "keyvalue", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR()", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=map].[type=string].recordid", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.MapType": { + "keyType": "string", + "valueType": "string" + } + } + }, + "nativeDataType": "MAP(INTEGER(), VARCHAR())", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"MAP(INTEGER(), VARCHAR())\", \"key_type\": {\"type\": \"int\", \"native_data_type\": \"INTEGER()\", \"_nullable\": true}, \"key_native_data_type\": \"INTEGER()\"}" + } + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.map_test,PROD)", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:trino", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:trino,production_warehouse)" + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.map_test,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.map_test,PROD)", + "changeType": "UPSERT", + "aspectName": "siblings", + "aspect": { + "json": { + "siblings": [ + "urn:li:dataset:(urn:li:dataPlatform:hive,local_server.db1.map_test,PROD)" + ], + "primary": false + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.map_test,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:trino,production_warehouse)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:trino,production_warehouse)" + }, + { + "id": "urn:li:container:f311add3fdc7c16e8a50a63fe1dcce8b", + "urn": "urn:li:container:f311add3fdc7c16e8a50a63fe1dcce8b" + }, + { + "id": "urn:li:container:46baa6eebd802861e5ee3d043456e171", + "urn": "urn:li:container:46baa6eebd802861e5ee3d043456e171" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,local_server.db1.map_test,PROD)", + "changeType": "UPSERT", + "aspectName": "siblings", + "aspect": { + "json": { + "siblings": [ + "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.map_test,PROD)" + ], + "primary": true + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.map_test,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:hive,local_server.db1.map_test,PROD)", + "type": "VIEW" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.nested_struct_test,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:46baa6eebd802861e5ee3d043456e171" + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.nested_struct_test,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "column_stats_accurate": "{\"BASIC_STATS\":\"true\"}", + "numfiles": "0", + "numrows": "0", + "rawdatasize": "0", + "totalsize": "0", + "transient_lastddltime": "1708001965" + }, + "name": "nested_struct_test", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "hivedb.db1.nested_struct_test", + "platform": "urn:li:dataPlatform:trino", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "property_id", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER()", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=struct].service", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.RecordType": {} + } + }, + "nativeDataType": "ROW([('type', VARCHAR()), ('provider', ROW([('name', VARCHAR(length=50)), ('id', SMALLINT())]))])", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"ROW([('type', VARCHAR()), ('provider', ROW([('name', VARCHAR(length=50)), ('id', SMALLINT())]))])\"}" + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=struct].service.[type=string].type", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR()", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"VARCHAR()\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=struct].service.[type=struct].provider", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.RecordType": {} + } + }, + "nativeDataType": "ROW([('name', VARCHAR(length=50)), ('id', SMALLINT())])", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"ROW([('name', VARCHAR(length=50)), ('id', SMALLINT())])\"}" + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=struct].service.[type=struct].provider.[type=string].name", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(length=50)", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"VARCHAR(length=50)\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=struct].service.[type=struct].provider.[type=int].id", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "SMALLINT()", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"SMALLINT()\", \"_nullable\": true}" + } + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.nested_struct_test,PROD)", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:trino", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:trino,production_warehouse)" + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.nested_struct_test,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.nested_struct_test,PROD)", + "changeType": "UPSERT", + "aspectName": "siblings", + "aspect": { + "json": { + "siblings": [ + "urn:li:dataset:(urn:li:dataPlatform:hive,local_server.db1.nested_struct_test,PROD)" + ], + "primary": false + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.nested_struct_test,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:trino,production_warehouse)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:trino,production_warehouse)" + }, + { + "id": "urn:li:container:f311add3fdc7c16e8a50a63fe1dcce8b", + "urn": "urn:li:container:f311add3fdc7c16e8a50a63fe1dcce8b" + }, + { + "id": "urn:li:container:46baa6eebd802861e5ee3d043456e171", + "urn": "urn:li:container:46baa6eebd802861e5ee3d043456e171" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,local_server.db1.nested_struct_test,PROD)", + "changeType": "UPSERT", + "aspectName": "siblings", + "aspect": { + "json": { + "siblings": [ + "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.nested_struct_test,PROD)" + ], + "primary": true + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.nested_struct_test,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:hive,local_server.db1.nested_struct_test,PROD)", + "type": "VIEW" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.pokes,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:46baa6eebd802861e5ee3d043456e171" + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.pokes,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "transient_lastddltime": "1708001956" + }, + "name": "pokes", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "hivedb.db1.pokes", + "platform": "urn:li:dataPlatform:trino", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "foo", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER()", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "bar", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR()", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "baz", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR()", + "recursive": false, + "isPartOfKey": false + } + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.pokes,PROD)", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:trino", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:trino,production_warehouse)" + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.pokes,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.pokes,PROD)", + "changeType": "UPSERT", + "aspectName": "siblings", + "aspect": { + "json": { + "siblings": [ + "urn:li:dataset:(urn:li:dataPlatform:hive,local_server.db1.pokes,PROD)" + ], + "primary": false + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.pokes,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:trino,production_warehouse)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:trino,production_warehouse)" + }, + { + "id": "urn:li:container:f311add3fdc7c16e8a50a63fe1dcce8b", + "urn": "urn:li:container:f311add3fdc7c16e8a50a63fe1dcce8b" + }, + { + "id": "urn:li:container:46baa6eebd802861e5ee3d043456e171", + "urn": "urn:li:container:46baa6eebd802861e5ee3d043456e171" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,local_server.db1.pokes,PROD)", + "changeType": "UPSERT", + "aspectName": "siblings", + "aspect": { + "json": { + "siblings": [ + "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.pokes,PROD)" + ], + "primary": true + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.pokes,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:hive,local_server.db1.pokes,PROD)", + "type": "VIEW" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.struct_test,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:46baa6eebd802861e5ee3d043456e171" + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.struct_test,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "column_stats_accurate": "{\"BASIC_STATS\":\"true\"}", + "numfiles": "0", + "numrows": "0", + "rawdatasize": "0", + "totalsize": "0", + "transient_lastddltime": "1708001958" + }, + "name": "struct_test", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "hivedb.db1.struct_test", + "platform": "urn:li:dataPlatform:trino", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "property_id", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER()", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=struct].service", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.RecordType": {} + } + }, + "nativeDataType": "ROW([('type', VARCHAR()), ('provider', ARRAY(INTEGER()))])", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"ROW([('type', VARCHAR()), ('provider', ARRAY(INTEGER()))])\"}" + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=struct].service.[type=string].type", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR()", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"VARCHAR()\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=struct].service.[type=array].[type=int].provider", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.ArrayType": { + "nestedType": [ + "int" + ] + } + } + }, + "nativeDataType": "ARRAY(INTEGER())", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"ARRAY(INTEGER())\"}" + } + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.struct_test,PROD)", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:trino", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:trino,production_warehouse)" + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.struct_test,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.struct_test,PROD)", + "changeType": "UPSERT", + "aspectName": "siblings", + "aspect": { + "json": { + "siblings": [ + "urn:li:dataset:(urn:li:dataPlatform:hive,local_server.db1.struct_test,PROD)" + ], + "primary": false + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.struct_test,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:trino,production_warehouse)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:trino,production_warehouse)" + }, + { + "id": "urn:li:container:f311add3fdc7c16e8a50a63fe1dcce8b", + "urn": "urn:li:container:f311add3fdc7c16e8a50a63fe1dcce8b" + }, + { + "id": "urn:li:container:46baa6eebd802861e5ee3d043456e171", + "urn": "urn:li:container:46baa6eebd802861e5ee3d043456e171" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,local_server.db1.struct_test,PROD)", + "changeType": "UPSERT", + "aspectName": "siblings", + "aspect": { + "json": { + "siblings": [ + "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.struct_test,PROD)" + ], + "primary": true + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.struct_test,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:hive,local_server.db1.struct_test,PROD)", + "type": "VIEW" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.struct_test_view_materialized,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:46baa6eebd802861e5ee3d043456e171" + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.struct_test_view_materialized,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "numfiles": "0", + "totalsize": "0", + "transient_lastddltime": "1708001964" + }, + "name": "struct_test_view_materialized", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "hivedb.db1.struct_test_view_materialized", + "platform": "urn:li:dataPlatform:trino", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "property_id", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER()", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=struct].service", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.RecordType": {} + } + }, + "nativeDataType": "ROW([('type', VARCHAR()), ('provider', ARRAY(INTEGER()))])", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"ROW([('type', VARCHAR()), ('provider', ARRAY(INTEGER()))])\"}" + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=struct].service.[type=string].type", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR()", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"VARCHAR()\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=struct].service.[type=array].[type=int].provider", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.ArrayType": { + "nestedType": [ + "int" + ] + } + } + }, + "nativeDataType": "ARRAY(INTEGER())", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"ARRAY(INTEGER())\"}" + } + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.struct_test_view_materialized,PROD)", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:trino", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:trino,production_warehouse)" + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.struct_test_view_materialized,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.struct_test_view_materialized,PROD)", + "changeType": "UPSERT", + "aspectName": "siblings", + "aspect": { + "json": { + "siblings": [ + "urn:li:dataset:(urn:li:dataPlatform:hive,local_server.db1.struct_test_view_materialized,PROD)" + ], + "primary": false + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.struct_test_view_materialized,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:trino,production_warehouse)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:trino,production_warehouse)" + }, + { + "id": "urn:li:container:f311add3fdc7c16e8a50a63fe1dcce8b", + "urn": "urn:li:container:f311add3fdc7c16e8a50a63fe1dcce8b" + }, + { + "id": "urn:li:container:46baa6eebd802861e5ee3d043456e171", + "urn": "urn:li:container:46baa6eebd802861e5ee3d043456e171" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,local_server.db1.struct_test_view_materialized,PROD)", + "changeType": "UPSERT", + "aspectName": "siblings", + "aspect": { + "json": { + "siblings": [ + "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.struct_test_view_materialized,PROD)" + ], + "primary": true + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.struct_test_view_materialized,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:hive,local_server.db1.struct_test_view_materialized,PROD)", + "type": "VIEW" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1._test_table_underscore,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:46baa6eebd802861e5ee3d043456e171" + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1._test_table_underscore,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "column_stats_accurate": "{\"BASIC_STATS\":\"true\"}", + "numfiles": "0", + "numrows": "0", + "rawdatasize": "0", + "totalsize": "0", + "transient_lastddltime": "1708001958" + }, + "name": "_test_table_underscore", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "hivedb.db1._test_table_underscore", + "platform": "urn:li:dataPlatform:trino", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "foo", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER()", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "bar", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR()", + "recursive": false, + "isPartOfKey": false + } + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1._test_table_underscore,PROD)", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:trino", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:trino,production_warehouse)" + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1._test_table_underscore,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1._test_table_underscore,PROD)", + "changeType": "UPSERT", + "aspectName": "siblings", + "aspect": { + "json": { + "siblings": [ + "urn:li:dataset:(urn:li:dataPlatform:hive,local_server.db1._test_table_underscore,PROD)" + ], + "primary": false + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1._test_table_underscore,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:trino,production_warehouse)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:trino,production_warehouse)" + }, + { + "id": "urn:li:container:f311add3fdc7c16e8a50a63fe1dcce8b", + "urn": "urn:li:container:f311add3fdc7c16e8a50a63fe1dcce8b" + }, + { + "id": "urn:li:container:46baa6eebd802861e5ee3d043456e171", + "urn": "urn:li:container:46baa6eebd802861e5ee3d043456e171" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,local_server.db1._test_table_underscore,PROD)", + "changeType": "UPSERT", + "aspectName": "siblings", + "aspect": { + "json": { + "siblings": [ + "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1._test_table_underscore,PROD)" + ], + "primary": true + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1._test_table_underscore,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:hive,local_server.db1._test_table_underscore,PROD)", + "type": "VIEW" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.union_test,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:46baa6eebd802861e5ee3d043456e171" + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.union_test,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "column_stats_accurate": "{\"BASIC_STATS\":\"true\"}", + "numfiles": "0", + "numrows": "0", + "rawdatasize": "0", + "totalsize": "0", + "transient_lastddltime": "1708001965" + }, + "name": "union_test", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "hivedb.db1.union_test", + "platform": "urn:li:dataPlatform:trino", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "[version=2.0].[type=struct].[type=struct].foo", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.RecordType": {} + } + }, + "nativeDataType": "ROW([('tag', SMALLINT()), ('field0', INTEGER()), ('field1', DOUBLE()), ('field2', ARRAY(VARCHAR())), ('field3', ROW([('a', INTEGER()), ('b', VARCHAR())]))])", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"ROW([('tag', SMALLINT()), ('field0', INTEGER()), ('field1', DOUBLE()), ('field2', ARRAY(VARCHAR())), ('field3', ROW([('a', INTEGER()), ('b', VARCHAR())]))])\"}" + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=struct].foo.[type=int].tag", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "SMALLINT()", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"SMALLINT()\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=struct].foo.[type=int].field0", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER()", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"INTEGER()\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=struct].foo.[type=double].field1", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "DOUBLE()", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"DOUBLE()\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=struct].foo.[type=array].[type=string].field2", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.ArrayType": { + "nestedType": [ + "string" + ] + } + } + }, + "nativeDataType": "ARRAY(VARCHAR())", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"ARRAY(VARCHAR())\"}" + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=struct].foo.[type=struct].field3", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.RecordType": {} + } + }, + "nativeDataType": "ROW([('a', INTEGER()), ('b', VARCHAR())])", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"ROW([('a', INTEGER()), ('b', VARCHAR())])\"}" + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=struct].foo.[type=struct].field3.[type=int].a", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER()", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"INTEGER()\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=struct].foo.[type=struct].field3.[type=string].b", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR()", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"VARCHAR()\", \"_nullable\": true}" + } + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.union_test,PROD)", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:trino", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:trino,production_warehouse)" + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.union_test,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.union_test,PROD)", + "changeType": "UPSERT", + "aspectName": "siblings", + "aspect": { + "json": { + "siblings": [ + "urn:li:dataset:(urn:li:dataPlatform:hive,local_server.db1.union_test,PROD)" + ], + "primary": false + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.union_test,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:trino,production_warehouse)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:trino,production_warehouse)" + }, + { + "id": "urn:li:container:f311add3fdc7c16e8a50a63fe1dcce8b", + "urn": "urn:li:container:f311add3fdc7c16e8a50a63fe1dcce8b" + }, + { + "id": "urn:li:container:46baa6eebd802861e5ee3d043456e171", + "urn": "urn:li:container:46baa6eebd802861e5ee3d043456e171" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,local_server.db1.union_test,PROD)", + "changeType": "UPSERT", + "aspectName": "siblings", + "aspect": { + "json": { + "siblings": [ + "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.union_test,PROD)" + ], + "primary": true + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.union_test,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:hive,local_server.db1.union_test,PROD)", + "type": "VIEW" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.array_struct_test_view,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:46baa6eebd802861e5ee3d043456e171" + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.array_struct_test_view,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "transient_lastddltime": "1708001965", + "view_definition": "SELECT \"property_id\", \"service\"\nFROM \"db1\".\"array_struct_test\"", + "is_view": "True" + }, + "name": "array_struct_test_view", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "hivedb.db1.array_struct_test_view", + "platform": "urn:li:dataPlatform:trino", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "property_id", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER()", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=array].[type=struct].service", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.ArrayType": { + "nestedType": [ + "record" + ] + } + } + }, + "nativeDataType": "ARRAY(ROW([('type', VARCHAR()), ('provider', ARRAY(INTEGER()))]))", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"ARRAY(ROW([('type', VARCHAR()), ('provider', ARRAY(INTEGER()))]))\"}" + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=array].[type=struct].service.[type=string].type", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR()", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"VARCHAR()\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=array].[type=struct].service.[type=array].[type=int].provider", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.ArrayType": { + "nestedType": [ + "int" + ] + } + } + }, + "nativeDataType": "ARRAY(INTEGER())", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"ARRAY(INTEGER())\"}" + } + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.array_struct_test_view,PROD)", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:trino", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:trino,production_warehouse)" + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.array_struct_test_view,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "View" + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.array_struct_test_view,PROD)", + "changeType": "UPSERT", + "aspectName": "viewProperties", + "aspect": { + "json": { + "materialized": false, + "viewLogic": "SELECT \"property_id\", \"service\"\nFROM \"db1\".\"array_struct_test\"", + "viewLanguage": "SQL" + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.array_struct_test_view,PROD)", + "changeType": "UPSERT", + "aspectName": "siblings", + "aspect": { + "json": { + "siblings": [ + "urn:li:dataset:(urn:li:dataPlatform:hive,local_server.db1.array_struct_test_view,PROD)" + ], + "primary": false + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.array_struct_test_view,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:trino,production_warehouse)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:trino,production_warehouse)" + }, + { + "id": "urn:li:container:f311add3fdc7c16e8a50a63fe1dcce8b", + "urn": "urn:li:container:f311add3fdc7c16e8a50a63fe1dcce8b" + }, + { + "id": "urn:li:container:46baa6eebd802861e5ee3d043456e171", + "urn": "urn:li:container:46baa6eebd802861e5ee3d043456e171" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,local_server.db1.array_struct_test_view,PROD)", + "changeType": "UPSERT", + "aspectName": "siblings", + "aspect": { + "json": { + "siblings": [ + "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.array_struct_test_view,PROD)" + ], + "primary": true + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.array_struct_test_view,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.array_struct_test,PROD)", + "type": "VIEW" + } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.array_struct_test,PROD),property_id)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.array_struct_test_view,PROD),property_id)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.array_struct_test,PROD),service)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.array_struct_test_view,PROD),service)" + ], + "confidenceScore": 1.0 + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,local_server.db1._test_table_underscore,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,local_server.db1.array_struct_test,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,local_server.db1.array_struct_test_view,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,local_server.db1.map_test,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,local_server.db1.nested_struct_test,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,local_server.db1.pokes,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,local_server.db1.struct_test,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,local_server.db1.struct_test_view_materialized,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,local_server.db1.union_test,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:f311add3fdc7c16e8a50a63fe1dcce8b", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:trino,production_warehouse)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:trino,production_warehouse)" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:46baa6eebd802861e5ee3d043456e171", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:trino,production_warehouse)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:trino,production_warehouse)" + }, + { + "id": "urn:li:container:f311add3fdc7c16e8a50a63fe1dcce8b", + "urn": "urn:li:container:f311add3fdc7c16e8a50a63fe1dcce8b" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.array_struct_test,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:trino,production_warehouse)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:trino,production_warehouse)" + }, + { + "id": "urn:li:container:f311add3fdc7c16e8a50a63fe1dcce8b", + "urn": "urn:li:container:f311add3fdc7c16e8a50a63fe1dcce8b" + }, + { + "id": "urn:li:container:46baa6eebd802861e5ee3d043456e171", + "urn": "urn:li:container:46baa6eebd802861e5ee3d043456e171" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.map_test,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:trino,production_warehouse)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:trino,production_warehouse)" + }, + { + "id": "urn:li:container:f311add3fdc7c16e8a50a63fe1dcce8b", + "urn": "urn:li:container:f311add3fdc7c16e8a50a63fe1dcce8b" + }, + { + "id": "urn:li:container:46baa6eebd802861e5ee3d043456e171", + "urn": "urn:li:container:46baa6eebd802861e5ee3d043456e171" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.nested_struct_test,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:trino,production_warehouse)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:trino,production_warehouse)" + }, + { + "id": "urn:li:container:f311add3fdc7c16e8a50a63fe1dcce8b", + "urn": "urn:li:container:f311add3fdc7c16e8a50a63fe1dcce8b" + }, + { + "id": "urn:li:container:46baa6eebd802861e5ee3d043456e171", + "urn": "urn:li:container:46baa6eebd802861e5ee3d043456e171" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.pokes,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:trino,production_warehouse)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:trino,production_warehouse)" + }, + { + "id": "urn:li:container:f311add3fdc7c16e8a50a63fe1dcce8b", + "urn": "urn:li:container:f311add3fdc7c16e8a50a63fe1dcce8b" + }, + { + "id": "urn:li:container:46baa6eebd802861e5ee3d043456e171", + "urn": "urn:li:container:46baa6eebd802861e5ee3d043456e171" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.struct_test,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:trino,production_warehouse)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:trino,production_warehouse)" + }, + { + "id": "urn:li:container:f311add3fdc7c16e8a50a63fe1dcce8b", + "urn": "urn:li:container:f311add3fdc7c16e8a50a63fe1dcce8b" + }, + { + "id": "urn:li:container:46baa6eebd802861e5ee3d043456e171", + "urn": "urn:li:container:46baa6eebd802861e5ee3d043456e171" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.struct_test_view_materialized,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:trino,production_warehouse)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:trino,production_warehouse)" + }, + { + "id": "urn:li:container:f311add3fdc7c16e8a50a63fe1dcce8b", + "urn": "urn:li:container:f311add3fdc7c16e8a50a63fe1dcce8b" + }, + { + "id": "urn:li:container:46baa6eebd802861e5ee3d043456e171", + "urn": "urn:li:container:46baa6eebd802861e5ee3d043456e171" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1._test_table_underscore,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:trino,production_warehouse)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:trino,production_warehouse)" + }, + { + "id": "urn:li:container:f311add3fdc7c16e8a50a63fe1dcce8b", + "urn": "urn:li:container:f311add3fdc7c16e8a50a63fe1dcce8b" + }, + { + "id": "urn:li:container:46baa6eebd802861e5ee3d043456e171", + "urn": "urn:li:container:46baa6eebd802861e5ee3d043456e171" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.union_test,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:trino,production_warehouse)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:trino,production_warehouse)" + }, + { + "id": "urn:li:container:f311add3fdc7c16e8a50a63fe1dcce8b", + "urn": "urn:li:container:f311add3fdc7c16e8a50a63fe1dcce8b" + }, + { + "id": "urn:li:container:46baa6eebd802861e5ee3d043456e171", + "urn": "urn:li:container:46baa6eebd802861e5ee3d043456e171" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.array_struct_test_view,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:trino,production_warehouse)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:trino,production_warehouse)" + }, + { + "id": "urn:li:container:f311add3fdc7c16e8a50a63fe1dcce8b", + "urn": "urn:li:container:f311add3fdc7c16e8a50a63fe1dcce8b" + }, + { + "id": "urn:li:container:46baa6eebd802861e5ee3d043456e171", + "urn": "urn:li:container:46baa6eebd802861e5ee3d043456e171" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-instance-test", + "lastRunId": "no-run-id-provided" + } +} +] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/trino/trino_hive_mces_golden.json b/metadata-ingestion/tests/integration/trino/trino_hive_mces_golden.json index c82e403bb9c27..caf0c08c0343f 100644 --- a/metadata-ingestion/tests/integration/trino/trino_hive_mces_golden.json +++ b/metadata-ingestion/tests/integration/trino/trino_hive_mces_golden.json @@ -231,7 +231,7 @@ "numrows": "1", "rawdatasize": "32", "totalsize": "33", - "transient_lastddltime": "1707983411" + "transient_lastddltime": "1708001962" }, "name": "array_struct_test", "description": "This table has array of structs", @@ -357,7 +357,7 @@ "aspect": { "json": { "siblings": [ - "urn:li:dataset:(urn:li:dataPlatform:hive,local_server.db1.array_struct_test,PROD)" + "urn:li:dataset:(urn:li:dataPlatform:hive,db1.array_struct_test,PROD)" ], "primary": false } @@ -393,6 +393,50 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.array_struct_test,PROD)", + "changeType": "UPSERT", + "aspectName": "siblings", + "aspect": { + "json": { + "siblings": [ + "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.array_struct_test,PROD)" + ], + "primary": true + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.array_struct_test,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.array_struct_test,PROD)", + "type": "VIEW" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.map_test,PROD)", @@ -427,7 +471,7 @@ "numrows": "0", "rawdatasize": "0", "totalsize": "0", - "transient_lastddltime": "1707983413" + "transient_lastddltime": "1708001965" }, "name": "map_test", "tags": [] @@ -519,7 +563,7 @@ "aspect": { "json": { "siblings": [ - "urn:li:dataset:(urn:li:dataPlatform:hive,local_server.db1.map_test,PROD)" + "urn:li:dataset:(urn:li:dataPlatform:hive,db1.map_test,PROD)" ], "primary": false } @@ -555,6 +599,50 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.map_test,PROD)", + "changeType": "UPSERT", + "aspectName": "siblings", + "aspect": { + "json": { + "siblings": [ + "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.map_test,PROD)" + ], + "primary": true + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.map_test,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.map_test,PROD)", + "type": "VIEW" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.nested_struct_test,PROD)", @@ -589,7 +677,7 @@ "numrows": "0", "rawdatasize": "0", "totalsize": "0", - "transient_lastddltime": "1707983413" + "transient_lastddltime": "1708001965" }, "name": "nested_struct_test", "tags": [] @@ -730,7 +818,7 @@ "aspect": { "json": { "siblings": [ - "urn:li:dataset:(urn:li:dataPlatform:hive,local_server.db1.nested_struct_test,PROD)" + "urn:li:dataset:(urn:li:dataPlatform:hive,db1.nested_struct_test,PROD)" ], "primary": false } @@ -766,6 +854,50 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.nested_struct_test,PROD)", + "changeType": "UPSERT", + "aspectName": "siblings", + "aspect": { + "json": { + "siblings": [ + "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.nested_struct_test,PROD)" + ], + "primary": true + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.nested_struct_test,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.nested_struct_test,PROD)", + "type": "VIEW" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.pokes,PROD)", @@ -795,7 +927,7 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { - "transient_lastddltime": "1707983406" + "transient_lastddltime": "1708001956" }, "name": "pokes", "tags": [] @@ -895,7 +1027,7 @@ "aspect": { "json": { "siblings": [ - "urn:li:dataset:(urn:li:dataPlatform:hive,local_server.db1.pokes,PROD)" + "urn:li:dataset:(urn:li:dataPlatform:hive,db1.pokes,PROD)" ], "primary": false } @@ -931,6 +1063,50 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.pokes,PROD)", + "changeType": "UPSERT", + "aspectName": "siblings", + "aspect": { + "json": { + "siblings": [ + "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.pokes,PROD)" + ], + "primary": true + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.pokes,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.pokes,PROD)", + "type": "VIEW" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.struct_test,PROD)", @@ -965,7 +1141,7 @@ "numrows": "0", "rawdatasize": "0", "totalsize": "0", - "transient_lastddltime": "1707983408" + "transient_lastddltime": "1708001958" }, "name": "struct_test", "tags": [] @@ -1084,7 +1260,7 @@ "aspect": { "json": { "siblings": [ - "urn:li:dataset:(urn:li:dataPlatform:hive,local_server.db1.struct_test,PROD)" + "urn:li:dataset:(urn:li:dataPlatform:hive,db1.struct_test,PROD)" ], "primary": false } @@ -1120,6 +1296,50 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.struct_test,PROD)", + "changeType": "UPSERT", + "aspectName": "siblings", + "aspect": { + "json": { + "siblings": [ + "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.struct_test,PROD)" + ], + "primary": true + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.struct_test,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.struct_test,PROD)", + "type": "VIEW" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.struct_test_view_materialized,PROD)", @@ -1151,7 +1371,7 @@ "customProperties": { "numfiles": "0", "totalsize": "0", - "transient_lastddltime": "1707983413" + "transient_lastddltime": "1708001964" }, "name": "struct_test_view_materialized", "tags": [] @@ -1270,7 +1490,7 @@ "aspect": { "json": { "siblings": [ - "urn:li:dataset:(urn:li:dataPlatform:hive,local_server.db1.struct_test_view_materialized,PROD)" + "urn:li:dataset:(urn:li:dataPlatform:hive,db1.struct_test_view_materialized,PROD)" ], "primary": false } @@ -1306,6 +1526,50 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.struct_test_view_materialized,PROD)", + "changeType": "UPSERT", + "aspectName": "siblings", + "aspect": { + "json": { + "siblings": [ + "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.struct_test_view_materialized,PROD)" + ], + "primary": true + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.struct_test_view_materialized,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.struct_test_view_materialized,PROD)", + "type": "VIEW" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1._test_table_underscore,PROD)", @@ -1340,7 +1604,7 @@ "numrows": "0", "rawdatasize": "0", "totalsize": "0", - "transient_lastddltime": "1707983408" + "transient_lastddltime": "1708001958" }, "name": "_test_table_underscore", "tags": [] @@ -1428,7 +1692,7 @@ "aspect": { "json": { "siblings": [ - "urn:li:dataset:(urn:li:dataPlatform:hive,local_server.db1._test_table_underscore,PROD)" + "urn:li:dataset:(urn:li:dataPlatform:hive,db1._test_table_underscore,PROD)" ], "primary": false } @@ -1464,6 +1728,50 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1._test_table_underscore,PROD)", + "changeType": "UPSERT", + "aspectName": "siblings", + "aspect": { + "json": { + "siblings": [ + "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1._test_table_underscore,PROD)" + ], + "primary": true + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1._test_table_underscore,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:hive,db1._test_table_underscore,PROD)", + "type": "VIEW" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.union_test,PROD)", @@ -1498,7 +1806,7 @@ "numrows": "0", "rawdatasize": "0", "totalsize": "0", - "transient_lastddltime": "1707983413" + "transient_lastddltime": "1708001965" }, "name": "union_test", "tags": [] @@ -1670,7 +1978,7 @@ "aspect": { "json": { "siblings": [ - "urn:li:dataset:(urn:li:dataPlatform:hive,local_server.db1.union_test,PROD)" + "urn:li:dataset:(urn:li:dataPlatform:hive,db1.union_test,PROD)" ], "primary": false } @@ -1706,6 +2014,50 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.union_test,PROD)", + "changeType": "UPSERT", + "aspectName": "siblings", + "aspect": { + "json": { + "siblings": [ + "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.union_test,PROD)" + ], + "primary": true + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.union_test,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.union_test,PROD)", + "type": "VIEW" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.array_struct_test_view,PROD)", @@ -1735,7 +2087,7 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { - "transient_lastddltime": "1707983413", + "transient_lastddltime": "1708001965", "view_definition": "SELECT \"property_id\", \"service\"\nFROM \"db1\".\"array_struct_test\"", "is_view": "True" }, @@ -1878,7 +2230,7 @@ "aspect": { "json": { "siblings": [ - "urn:li:dataset:(urn:li:dataPlatform:hive,local_server.db1.array_struct_test_view,PROD)" + "urn:li:dataset:(urn:li:dataPlatform:hive,db1.array_struct_test_view,PROD)" ], "primary": false } @@ -1889,6 +2241,50 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.array_struct_test_view,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:c7a81f6ed9a7cdd0c74436ac2dc4d1f7", + "urn": "urn:li:container:c7a81f6ed9a7cdd0c74436ac2dc4d1f7" + }, + { + "id": "urn:li:container:304fd7ad57dc0ab32fb2cb778cbccd84", + "urn": "urn:li:container:304fd7ad57dc0ab32fb2cb778cbccd84" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.array_struct_test_view,PROD)", + "changeType": "UPSERT", + "aspectName": "siblings", + "aspect": { + "json": { + "siblings": [ + "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.array_struct_test_view,PROD)" + ], + "primary": true + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.array_struct_test_view,PROD)", @@ -1940,21 +2336,140 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.array_struct_test_view,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1._test_table_underscore,PROD)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "status", "aspect": { "json": { - "path": [ - { - "id": "urn:li:container:c7a81f6ed9a7cdd0c74436ac2dc4d1f7", - "urn": "urn:li:container:c7a81f6ed9a7cdd0c74436ac2dc4d1f7" - }, - { - "id": "urn:li:container:304fd7ad57dc0ab32fb2cb778cbccd84", - "urn": "urn:li:container:304fd7ad57dc0ab32fb2cb778cbccd84" - } - ] + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.array_struct_test,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.array_struct_test_view,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.map_test,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.nested_struct_test,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.pokes,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.struct_test,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.struct_test_view_materialized,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.union_test,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false } }, "systemMetadata": { diff --git a/metadata-ingestion/tests/integration/trino/trino_mces_golden.json b/metadata-ingestion/tests/integration/trino/trino_mces_golden.json index 0a3fdee342cad..5dcb28415606e 100644 --- a/metadata-ingestion/tests/integration/trino/trino_mces_golden.json +++ b/metadata-ingestion/tests/integration/trino/trino_mces_golden.json @@ -399,6 +399,50 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,local_server.postgres.librarydb.book,PROD)", + "changeType": "UPSERT", + "aspectName": "siblings", + "aspect": { + "json": { + "siblings": [ + "urn:li:dataset:(urn:li:dataPlatform:trino,postgresqldb.librarydb.book,PROD)" + ], + "primary": true + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,postgresqldb.librarydb.book,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,local_server.postgres.librarydb.book,PROD)", + "type": "VIEW" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,postgresqldb.librarydb.issue_history,PROD)", @@ -574,6 +618,50 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,local_server.postgres.librarydb.issue_history,PROD)", + "changeType": "UPSERT", + "aspectName": "siblings", + "aspect": { + "json": { + "siblings": [ + "urn:li:dataset:(urn:li:dataPlatform:trino,postgresqldb.librarydb.issue_history,PROD)" + ], + "primary": true + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,postgresqldb.librarydb.issue_history,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,local_server.postgres.librarydb.issue_history,PROD)", + "type": "VIEW" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,postgresqldb.librarydb.member,PROD)", @@ -725,6 +813,50 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,local_server.postgres.librarydb.member,PROD)", + "changeType": "UPSERT", + "aspectName": "siblings", + "aspect": { + "json": { + "siblings": [ + "urn:li:dataset:(urn:li:dataPlatform:trino,postgresqldb.librarydb.member,PROD)" + ], + "primary": true + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,postgresqldb.librarydb.member,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,local_server.postgres.librarydb.member,PROD)", + "type": "VIEW" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,postgresqldb.librarydb.book_in_circulation,PROD)", @@ -924,6 +1056,50 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,local_server.postgres.librarydb.book_in_circulation,PROD)", + "changeType": "UPSERT", + "aspectName": "siblings", + "aspect": { + "json": { + "siblings": [ + "urn:li:dataset:(urn:li:dataPlatform:trino,postgresqldb.librarydb.book_in_circulation,PROD)" + ], + "primary": true + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,postgresqldb.librarydb.book_in_circulation,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,local_server.postgres.librarydb.book_in_circulation,PROD)", + "type": "VIEW" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,postgresqldb.librarydb.book,PROD)", @@ -1240,6 +1416,70 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,local_server.postgres.librarydb.book,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,local_server.postgres.librarydb.book_in_circulation,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,local_server.postgres.librarydb.issue_history,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,local_server.postgres.librarydb.member,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "container", "entityUrn": "urn:li:container:ad9f7c5e0d4bf83d6278f62271c28761", From 26145d2f8ecab76424596e55873df16c99a8a746 Mon Sep 17 00:00:00 2001 From: shubhamjagtap639 Date: Fri, 16 Feb 2024 12:56:21 +0530 Subject: [PATCH 4/7] Address review comments --- .../src/datahub/ingestion/source/sql/trino.py | 48 +++++++++---------- .../tests/integration/trino/test_trino.py | 3 ++ 2 files changed, 25 insertions(+), 26 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/trino.py b/metadata-ingestion/src/datahub/ingestion/source/sql/trino.py index d0c35ff25842c..ee1dda9aeb346 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/trino.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/trino.py @@ -190,7 +190,7 @@ class TrinoConfig(BasicSQLAlchemyConfig): catalog_to_connector_details: Dict[str, ConnectorDetail] = Field( default={}, description="A mapping of trino catalog to its connector details like connector database, env and platform instance." - "This configuration is used to ingest lineage of datasets to connectors. Use catalog name as key.", + "This configuration is used to build lineage to the underlying connector. Use catalog name as key.", ) ingest_lineage_to_connectors: bool = Field( @@ -243,32 +243,28 @@ def _get_source_dataset_urn( ) -> Optional[str]: catalog_name = dataset_name.split(".")[0] connector_name = get_catalog_connector_name(catalog_name, inspector) - if connector_name: - connector_platform_name = KNOWN_CONNECTOR_PLATFORM_MAPPING.get( - connector_name + if not connector_name: + return None + connector_platform_name = KNOWN_CONNECTOR_PLATFORM_MAPPING.get(connector_name) + if not connector_platform_name: + return None + connector_details = self.config.catalog_to_connector_details.get( + catalog_name, ConnectorDetail() + ) + if connector_platform_name in TWO_TIER_CONNECTORS: # connector is two tier + return make_dataset_urn_with_platform_instance( + platform=connector_platform_name, + name=f"{schema}.{table}", + platform_instance=connector_details.platform_instance, + env=connector_details.env, + ) + elif connector_details.connector_database: # else connector is three tier + return make_dataset_urn_with_platform_instance( + platform=connector_platform_name, + name=f"{connector_details.connector_database}.{schema}.{table}", + platform_instance=connector_details.platform_instance, + env=connector_details.env, ) - if connector_platform_name: - connector_details = self.config.catalog_to_connector_details.get( - catalog_name, ConnectorDetail() - ) - if ( - connector_platform_name in TWO_TIER_CONNECTORS - ): # connector is two tier - return make_dataset_urn_with_platform_instance( - platform=connector_platform_name, - name=f"{schema}.{table}", - platform_instance=connector_details.platform_instance, - env=connector_details.env, - ) - elif ( - connector_details.connector_database - ): # else connector is three tier - return make_dataset_urn_with_platform_instance( - platform=connector_platform_name, - name=f"{connector_details.connector_database}.{schema}.{table}", - platform_instance=connector_details.platform_instance, - env=connector_details.env, - ) return None def gen_siblings_workunit( diff --git a/metadata-ingestion/tests/integration/trino/test_trino.py b/metadata-ingestion/tests/integration/trino/test_trino.py index e0d775f27b4ed..31d739127b03e 100644 --- a/metadata-ingestion/tests/integration/trino/test_trino.py +++ b/metadata-ingestion/tests/integration/trino/test_trino.py @@ -209,4 +209,7 @@ def test_trino_instance_ingest( pytestconfig, output_path=events_file, golden_path=test_resources_dir / "trino_hive_instance_mces_golden.json", + ignore_paths=[ + r"root\[\d+\]\['proposedSnapshot'\]\['com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot'\]\['aspects'\]\[\d+\]\['com.linkedin.pegasus2avro.dataset.DatasetProperties'\]\['customProperties'\]\['transient_lastddltime'\]", + ], ) From 84dd2af35252404f459f6e9c913b6f357803cfbf Mon Sep 17 00:00:00 2001 From: shubhamjagtap639 Date: Mon, 19 Feb 2024 19:26:50 +0530 Subject: [PATCH 5/7] Issue query failure with underlying snowflake connector resolved --- .../docs/sources/trino/trino_recipe.yml | 1 + .../src/datahub/ingestion/source/sql/trino.py | 111 ++++++++++-------- .../tests/integration/trino/test_trino.py | 1 + .../trino_hive_instance_mces_golden.json | 88 +++++++------- 4 files changed, 109 insertions(+), 92 deletions(-) diff --git a/metadata-ingestion/docs/sources/trino/trino_recipe.yml b/metadata-ingestion/docs/sources/trino/trino_recipe.yml index 2b874801b7c32..06158669143a9 100644 --- a/metadata-ingestion/docs/sources/trino/trino_recipe.yml +++ b/metadata-ingestion/docs/sources/trino/trino_recipe.yml @@ -19,6 +19,7 @@ source: # catalog_to_connector_details: # catalog_name: # connector_database: db_name + # connector_platform: connector_platform_name # platform_instance: cloud_instance # env: DEV diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/trino.py b/metadata-ingestion/src/datahub/ingestion/source/sql/trino.py index ee1dda9aeb346..f1f827f296102 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/trino.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/trino.py @@ -1,5 +1,6 @@ import functools import json +import logging import uuid from textwrap import dedent from typing import Any, Dict, Iterable, List, Optional, Union @@ -10,10 +11,10 @@ from pydantic.fields import Field from sqlalchemy import exc, sql from sqlalchemy.engine import reflection +from sqlalchemy.engine.base import Connection from sqlalchemy.engine.reflection import Inspector from sqlalchemy.sql import sqltypes from sqlalchemy.types import TypeEngine -from trino.exceptions import TrinoQueryError from trino.sqlalchemy import datatype from trino.sqlalchemy.dialect import TrinoDialect @@ -64,20 +65,41 @@ KNOWN_CONNECTOR_PLATFORM_MAPPING = { "clickhouse": "clickhouse", "hive": "hive", - "postgresql": "postgres", - "mysql": "mysql", + "glue": "glue", "iceberg": "iceberg", + "mysql": "mysql", + "postgresql": "postgres", "redshift": "redshift", "bigquery": "bigquery", + "snowflake_distributed": "snowflake", + "snowflake_parallel": "snowflake", + "snowflake_jdbc": "snowflake", } -TWO_TIER_CONNECTORS = ["clickhouse", "hive", "mysql", "iceberg"] +TWO_TIER_CONNECTORS = ["clickhouse", "hive", "glue", "mysql", "iceberg"] + +PROPERTIES_TABLE_SUPPORTED_CONNECTORS = ["hive", "iceberg"] # Type JSON was introduced in trino sqlalchemy dialect in version 0.317.0 if version.parse(trino.__version__) >= version.parse("0.317.0"): register_custom_type(datatype.JSON, RecordTypeClass) +@functools.lru_cache() +def get_catalog_connector_name( + connection: Connection, catalog_name: str +) -> Optional[str]: + query = dedent( + """ + SELECT * + FROM "system"."metadata"."catalogs" + """ + ).strip() + res = connection.execute(sql.text(query)) + catalog_connector_dict = {row.catalog_name: row.connector_name for row in res} + return catalog_connector_dict.get(catalog_name) + + # Read only table names and skip view names, as view names will also be returned # from get_view_names @reflection.cache # type: ignore @@ -100,26 +122,27 @@ def get_table_names(self, connection, schema: str = None, **kw): # type: ignore @reflection.cache # type: ignore def get_table_comment(self, connection, table_name: str, schema: str = None, **kw): # type: ignore try: - properties_table = self._get_full_table(f"{table_name}$properties", schema) - query = f"SELECT * FROM {properties_table}" - row = connection.execute(sql.text(query)).fetchone() - - # Generate properties dictionary. - properties = {} - if row: - for col_name, col_value in row.items(): - if col_value is not None: - properties[col_name] = col_value - - return {"text": properties.get("comment", None), "properties": properties} - # Fallback to default trino-sqlalchemy behaviour if `$properties` table doesn't exist - except TrinoQueryError: - return self.get_table_comment_default(connection, table_name, schema) - # Exception raised when using Starburst Delta Connector that falls back to a Hive Catalog - except exc.ProgrammingError as e: - if isinstance(e.orig, TrinoQueryError): + catalog_name = self._get_default_catalog_name(connection) + if catalog_name is None: + raise exc.NoSuchTableError("catalog is required in connection") + connector_name = get_catalog_connector_name(connection, catalog_name) + if connector_name is None: + return {} + if connector_name in PROPERTIES_TABLE_SUPPORTED_CONNECTORS: + properties_table = self._get_full_table(f"{table_name}$properties", schema) + query = f"SELECT * FROM {properties_table}" + row = connection.execute(sql.text(query)).fetchone() + + # Generate properties dictionary. + properties = {} + if row: + for col_name, col_value in row.items(): + if col_value is not None: + properties[col_name] = col_value + + return {"text": properties.get("comment", None), "properties": properties} + else: return self.get_table_comment_default(connection, table_name, schema) - raise except Exception: return {} @@ -162,30 +185,19 @@ def _get_columns(self, connection, table_name, schema: str = None, **kw): # typ TrinoDialect._get_columns = _get_columns -@functools.lru_cache() -def get_catalog_connector_name( - catalog_name: str, inspector: Inspector -) -> Optional[str]: - if inspector.engine: - query = dedent( - """ - SELECT * - FROM "system"."metadata"."catalogs" - """ - ).strip() - res = inspector.engine.execute(sql.text(query)) - catalog_connector_dict = {row.catalog_name: row.connector_name for row in res} - return catalog_connector_dict.get(catalog_name) - return None - - class ConnectorDetail(PlatformInstanceConfigMixin, EnvConfigMixin): connector_database: Optional[str] = Field(default=None, description="") + connector_platform: Optional[str] = Field( + default=None, + description="A connector's actual platform name. If not provided, will take from metadata tables" + "Eg: hive catalog can have a connector platform as 'hive' or 'glue' or some other metastore.", + ) class TrinoConfig(BasicSQLAlchemyConfig): # defaults scheme: str = Field(default="trino", description="", hidden_from_docs=True) + database: str = Field(description="database (catalog)") catalog_to_connector_details: Dict[str, ConnectorDetail] = Field( default={}, @@ -199,10 +211,7 @@ class TrinoConfig(BasicSQLAlchemyConfig): ) def get_identifier(self: BasicSQLAlchemyConfig, schema: str, table: str) -> str: - identifier = f"{schema}.{table}" - if self.database: # TODO: this should be required field - identifier = f"{self.database}.{identifier}" - return identifier + return f"{self.database}.{schema}.{table}" @platform_name("Trino", doc_order=1) @@ -242,15 +251,21 @@ def _get_source_dataset_urn( table: str, ) -> Optional[str]: catalog_name = dataset_name.split(".")[0] - connector_name = get_catalog_connector_name(catalog_name, inspector) + connector_name = get_catalog_connector_name(inspector.bind, catalog_name) if not connector_name: return None - connector_platform_name = KNOWN_CONNECTOR_PLATFORM_MAPPING.get(connector_name) - if not connector_platform_name: - return None connector_details = self.config.catalog_to_connector_details.get( catalog_name, ConnectorDetail() ) + connector_platform_name = KNOWN_CONNECTOR_PLATFORM_MAPPING.get( + connector_details.connector_platform + if connector_details.connector_platform + else connector_name + ) + if not connector_platform_name: + logging.debug(f"Platform '{connector_platform_name}' is not yet supported.") + return None + if connector_platform_name in TWO_TIER_CONNECTORS: # connector is two tier return make_dataset_urn_with_platform_instance( platform=connector_platform_name, diff --git a/metadata-ingestion/tests/integration/trino/test_trino.py b/metadata-ingestion/tests/integration/trino/test_trino.py index 31d739127b03e..4e24b5c887194 100644 --- a/metadata-ingestion/tests/integration/trino/test_trino.py +++ b/metadata-ingestion/tests/integration/trino/test_trino.py @@ -187,6 +187,7 @@ def test_trino_instance_ingest( schema_pattern=AllowDenyPattern(allow=["^db1"]), catalog_to_connector_details={ "hivedb": ConnectorDetail( + connector_platform="glue", platform_instance="local_server", ) }, diff --git a/metadata-ingestion/tests/integration/trino/trino_hive_instance_mces_golden.json b/metadata-ingestion/tests/integration/trino/trino_hive_instance_mces_golden.json index 32fe7efcbf918..9ea806d518caa 100644 --- a/metadata-ingestion/tests/integration/trino/trino_hive_instance_mces_golden.json +++ b/metadata-ingestion/tests/integration/trino/trino_hive_instance_mces_golden.json @@ -244,7 +244,7 @@ "numrows": "1", "rawdatasize": "32", "totalsize": "33", - "transient_lastddltime": "1708001962" + "transient_lastddltime": "1708350703" }, "name": "array_struct_test", "description": "This table has array of structs", @@ -387,7 +387,7 @@ "aspect": { "json": { "siblings": [ - "urn:li:dataset:(urn:li:dataPlatform:hive,local_server.db1.array_struct_test,PROD)" + "urn:li:dataset:(urn:li:dataPlatform:glue,local_server.db1.array_struct_test,PROD)" ], "primary": false } @@ -429,7 +429,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,local_server.db1.array_struct_test,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:glue,local_server.db1.array_struct_test,PROD)", "changeType": "UPSERT", "aspectName": "siblings", "aspect": { @@ -459,7 +459,7 @@ "time": 0, "actor": "urn:li:corpuser:unknown" }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:hive,local_server.db1.array_struct_test,PROD)", + "dataset": "urn:li:dataset:(urn:li:dataPlatform:glue,local_server.db1.array_struct_test,PROD)", "type": "VIEW" } ] @@ -505,7 +505,7 @@ "numrows": "0", "rawdatasize": "0", "totalsize": "0", - "transient_lastddltime": "1708001965" + "transient_lastddltime": "1708350705" }, "name": "map_test", "tags": [] @@ -614,7 +614,7 @@ "aspect": { "json": { "siblings": [ - "urn:li:dataset:(urn:li:dataPlatform:hive,local_server.db1.map_test,PROD)" + "urn:li:dataset:(urn:li:dataPlatform:glue,local_server.db1.map_test,PROD)" ], "primary": false } @@ -656,7 +656,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,local_server.db1.map_test,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:glue,local_server.db1.map_test,PROD)", "changeType": "UPSERT", "aspectName": "siblings", "aspect": { @@ -686,7 +686,7 @@ "time": 0, "actor": "urn:li:corpuser:unknown" }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:hive,local_server.db1.map_test,PROD)", + "dataset": "urn:li:dataset:(urn:li:dataPlatform:glue,local_server.db1.map_test,PROD)", "type": "VIEW" } ] @@ -732,7 +732,7 @@ "numrows": "0", "rawdatasize": "0", "totalsize": "0", - "transient_lastddltime": "1708001965" + "transient_lastddltime": "1708350705" }, "name": "nested_struct_test", "tags": [] @@ -890,7 +890,7 @@ "aspect": { "json": { "siblings": [ - "urn:li:dataset:(urn:li:dataPlatform:hive,local_server.db1.nested_struct_test,PROD)" + "urn:li:dataset:(urn:li:dataPlatform:glue,local_server.db1.nested_struct_test,PROD)" ], "primary": false } @@ -932,7 +932,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,local_server.db1.nested_struct_test,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:glue,local_server.db1.nested_struct_test,PROD)", "changeType": "UPSERT", "aspectName": "siblings", "aspect": { @@ -962,7 +962,7 @@ "time": 0, "actor": "urn:li:corpuser:unknown" }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:hive,local_server.db1.nested_struct_test,PROD)", + "dataset": "urn:li:dataset:(urn:li:dataPlatform:glue,local_server.db1.nested_struct_test,PROD)", "type": "VIEW" } ] @@ -1003,7 +1003,7 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { - "transient_lastddltime": "1708001956" + "transient_lastddltime": "1708350698" }, "name": "pokes", "tags": [] @@ -1120,7 +1120,7 @@ "aspect": { "json": { "siblings": [ - "urn:li:dataset:(urn:li:dataPlatform:hive,local_server.db1.pokes,PROD)" + "urn:li:dataset:(urn:li:dataPlatform:glue,local_server.db1.pokes,PROD)" ], "primary": false } @@ -1162,7 +1162,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,local_server.db1.pokes,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:glue,local_server.db1.pokes,PROD)", "changeType": "UPSERT", "aspectName": "siblings", "aspect": { @@ -1192,7 +1192,7 @@ "time": 0, "actor": "urn:li:corpuser:unknown" }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:hive,local_server.db1.pokes,PROD)", + "dataset": "urn:li:dataset:(urn:li:dataPlatform:glue,local_server.db1.pokes,PROD)", "type": "VIEW" } ] @@ -1238,7 +1238,7 @@ "numrows": "0", "rawdatasize": "0", "totalsize": "0", - "transient_lastddltime": "1708001958" + "transient_lastddltime": "1708350700" }, "name": "struct_test", "tags": [] @@ -1374,7 +1374,7 @@ "aspect": { "json": { "siblings": [ - "urn:li:dataset:(urn:li:dataPlatform:hive,local_server.db1.struct_test,PROD)" + "urn:li:dataset:(urn:li:dataPlatform:glue,local_server.db1.struct_test,PROD)" ], "primary": false } @@ -1416,7 +1416,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,local_server.db1.struct_test,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:glue,local_server.db1.struct_test,PROD)", "changeType": "UPSERT", "aspectName": "siblings", "aspect": { @@ -1446,7 +1446,7 @@ "time": 0, "actor": "urn:li:corpuser:unknown" }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:hive,local_server.db1.struct_test,PROD)", + "dataset": "urn:li:dataset:(urn:li:dataPlatform:glue,local_server.db1.struct_test,PROD)", "type": "VIEW" } ] @@ -1489,7 +1489,7 @@ "customProperties": { "numfiles": "0", "totalsize": "0", - "transient_lastddltime": "1708001964" + "transient_lastddltime": "1708350705" }, "name": "struct_test_view_materialized", "tags": [] @@ -1625,7 +1625,7 @@ "aspect": { "json": { "siblings": [ - "urn:li:dataset:(urn:li:dataPlatform:hive,local_server.db1.struct_test_view_materialized,PROD)" + "urn:li:dataset:(urn:li:dataPlatform:glue,local_server.db1.struct_test_view_materialized,PROD)" ], "primary": false } @@ -1667,7 +1667,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,local_server.db1.struct_test_view_materialized,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:glue,local_server.db1.struct_test_view_materialized,PROD)", "changeType": "UPSERT", "aspectName": "siblings", "aspect": { @@ -1697,7 +1697,7 @@ "time": 0, "actor": "urn:li:corpuser:unknown" }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:hive,local_server.db1.struct_test_view_materialized,PROD)", + "dataset": "urn:li:dataset:(urn:li:dataPlatform:glue,local_server.db1.struct_test_view_materialized,PROD)", "type": "VIEW" } ] @@ -1743,7 +1743,7 @@ "numrows": "0", "rawdatasize": "0", "totalsize": "0", - "transient_lastddltime": "1708001958" + "transient_lastddltime": "1708350700" }, "name": "_test_table_underscore", "tags": [] @@ -1848,7 +1848,7 @@ "aspect": { "json": { "siblings": [ - "urn:li:dataset:(urn:li:dataPlatform:hive,local_server.db1._test_table_underscore,PROD)" + "urn:li:dataset:(urn:li:dataPlatform:glue,local_server.db1._test_table_underscore,PROD)" ], "primary": false } @@ -1890,7 +1890,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,local_server.db1._test_table_underscore,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:glue,local_server.db1._test_table_underscore,PROD)", "changeType": "UPSERT", "aspectName": "siblings", "aspect": { @@ -1920,7 +1920,7 @@ "time": 0, "actor": "urn:li:corpuser:unknown" }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:hive,local_server.db1._test_table_underscore,PROD)", + "dataset": "urn:li:dataset:(urn:li:dataPlatform:glue,local_server.db1._test_table_underscore,PROD)", "type": "VIEW" } ] @@ -1966,7 +1966,7 @@ "numrows": "0", "rawdatasize": "0", "totalsize": "0", - "transient_lastddltime": "1708001965" + "transient_lastddltime": "1708350705" }, "name": "union_test", "tags": [] @@ -2155,7 +2155,7 @@ "aspect": { "json": { "siblings": [ - "urn:li:dataset:(urn:li:dataPlatform:hive,local_server.db1.union_test,PROD)" + "urn:li:dataset:(urn:li:dataPlatform:glue,local_server.db1.union_test,PROD)" ], "primary": false } @@ -2197,7 +2197,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,local_server.db1.union_test,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:glue,local_server.db1.union_test,PROD)", "changeType": "UPSERT", "aspectName": "siblings", "aspect": { @@ -2227,7 +2227,7 @@ "time": 0, "actor": "urn:li:corpuser:unknown" }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:hive,local_server.db1.union_test,PROD)", + "dataset": "urn:li:dataset:(urn:li:dataPlatform:glue,local_server.db1.union_test,PROD)", "type": "VIEW" } ] @@ -2268,7 +2268,7 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { - "transient_lastddltime": "1708001965", + "transient_lastddltime": "1708350705", "view_definition": "SELECT \"property_id\", \"service\"\nFROM \"db1\".\"array_struct_test\"", "is_view": "True" }, @@ -2428,7 +2428,7 @@ "aspect": { "json": { "siblings": [ - "urn:li:dataset:(urn:li:dataPlatform:hive,local_server.db1.array_struct_test_view,PROD)" + "urn:li:dataset:(urn:li:dataPlatform:glue,local_server.db1.array_struct_test_view,PROD)" ], "primary": false } @@ -2470,7 +2470,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,local_server.db1.array_struct_test_view,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:glue,local_server.db1.array_struct_test_view,PROD)", "changeType": "UPSERT", "aspectName": "siblings", "aspect": { @@ -2538,7 +2538,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,local_server.db1._test_table_underscore,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:glue,local_server.db1._test_table_underscore,PROD)", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -2554,7 +2554,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,local_server.db1.array_struct_test,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:glue,local_server.db1.array_struct_test,PROD)", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -2570,7 +2570,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,local_server.db1.array_struct_test_view,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:glue,local_server.db1.array_struct_test_view,PROD)", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -2586,7 +2586,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,local_server.db1.map_test,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:glue,local_server.db1.map_test,PROD)", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -2602,7 +2602,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,local_server.db1.nested_struct_test,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:glue,local_server.db1.nested_struct_test,PROD)", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -2618,7 +2618,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,local_server.db1.pokes,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:glue,local_server.db1.pokes,PROD)", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -2634,7 +2634,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,local_server.db1.struct_test,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:glue,local_server.db1.struct_test,PROD)", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -2650,7 +2650,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,local_server.db1.struct_test_view_materialized,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:glue,local_server.db1.struct_test_view_materialized,PROD)", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -2666,7 +2666,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,local_server.db1.union_test,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:glue,local_server.db1.union_test,PROD)", "changeType": "UPSERT", "aspectName": "status", "aspect": { From 3e22624a6eafdd0f20adc330565c4f75b03e095a Mon Sep 17 00:00:00 2001 From: shubhamjagtap639 Date: Wed, 21 Feb 2024 12:30:55 +0530 Subject: [PATCH 6/7] Address review comments --- .../src/datahub/ingestion/source/sql/trino.py | 25 ++++++++++--------- 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/trino.py b/metadata-ingestion/src/datahub/ingestion/source/sql/trino.py index f1f827f296102..e0adc9740665e 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/trino.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/trino.py @@ -11,7 +11,7 @@ from pydantic.fields import Field from sqlalchemy import exc, sql from sqlalchemy.engine import reflection -from sqlalchemy.engine.base import Connection +from sqlalchemy.engine.base import Engine from sqlalchemy.engine.reflection import Inspector from sqlalchemy.sql import sqltypes from sqlalchemy.types import TypeEngine @@ -86,18 +86,19 @@ @functools.lru_cache() -def get_catalog_connector_name( - connection: Connection, catalog_name: str -) -> Optional[str]: +def gen_catalog_connector_dict(engine: Engine) -> Dict[str, str]: query = dedent( """ SELECT * FROM "system"."metadata"."catalogs" """ ).strip() - res = connection.execute(sql.text(query)) - catalog_connector_dict = {row.catalog_name: row.connector_name for row in res} - return catalog_connector_dict.get(catalog_name) + res = engine.execute(sql.text(query)) + return {row.catalog_name: row.connector_name for row in res} + + +def get_catalog_connector_name(engine: Engine, catalog_name: str) -> Optional[str]: + return gen_catalog_connector_dict(engine).get(catalog_name) # Read only table names and skip view names, as view names will also be returned @@ -125,7 +126,7 @@ def get_table_comment(self, connection, table_name: str, schema: str = None, **k catalog_name = self._get_default_catalog_name(connection) if catalog_name is None: raise exc.NoSuchTableError("catalog is required in connection") - connector_name = get_catalog_connector_name(connection, catalog_name) + connector_name = get_catalog_connector_name(connection.engine, catalog_name) if connector_name is None: return {} if connector_name in PROPERTIES_TABLE_SUPPORTED_CONNECTORS: @@ -251,16 +252,14 @@ def _get_source_dataset_urn( table: str, ) -> Optional[str]: catalog_name = dataset_name.split(".")[0] - connector_name = get_catalog_connector_name(inspector.bind, catalog_name) + connector_name = get_catalog_connector_name(inspector.engine, catalog_name) if not connector_name: return None connector_details = self.config.catalog_to_connector_details.get( catalog_name, ConnectorDetail() ) connector_platform_name = KNOWN_CONNECTOR_PLATFORM_MAPPING.get( - connector_details.connector_platform - if connector_details.connector_platform - else connector_name + connector_details.connector_platform or connector_name ) if not connector_platform_name: logging.debug(f"Platform '{connector_platform_name}' is not yet supported.") @@ -280,6 +279,8 @@ def _get_source_dataset_urn( platform_instance=connector_details.platform_instance, env=connector_details.env, ) + else: + logging.warning(f"Connector database missing for Catalog '{catalog_name}'.") return None def gen_siblings_workunit( From 3dc2962aaf868f0de1a0bc19ad4f45fd3493dfcd Mon Sep 17 00:00:00 2001 From: shubhamjagtap639 Date: Mon, 26 Feb 2024 11:24:25 +0530 Subject: [PATCH 7/7] Add trino_as_primary config --- .../src/datahub/ingestion/source/sql/trino.py | 13 ++++- .../trino_hive_instance_mces_golden.json | 54 +++++++++---------- .../trino/trino_hive_mces_golden.json | 54 +++++++++---------- .../integration/trino/trino_mces_golden.json | 16 +++--- 4 files changed, 73 insertions(+), 64 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/trino.py b/metadata-ingestion/src/datahub/ingestion/source/sql/trino.py index e0adc9740665e..cf199237e3041 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/trino.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/trino.py @@ -211,6 +211,11 @@ class TrinoConfig(BasicSQLAlchemyConfig): description="Whether lineage of datasets to connectors should be ingested", ) + trino_as_primary: bool = Field( + default=True, + description="Experimental feature. Whether trino dataset should be primary entity of the set of siblings", + ) + def get_identifier(self: BasicSQLAlchemyConfig, schema: str, table: str) -> str: return f"{self.database}.{schema}.{table}" @@ -293,12 +298,16 @@ def gen_siblings_workunit( """ yield MetadataChangeProposalWrapper( entityUrn=dataset_urn, - aspect=Siblings(primary=False, siblings=[source_dataset_urn]), + aspect=Siblings( + primary=self.config.trino_as_primary, siblings=[source_dataset_urn] + ), ).as_workunit() yield MetadataChangeProposalWrapper( entityUrn=source_dataset_urn, - aspect=Siblings(primary=True, siblings=[dataset_urn]), + aspect=Siblings( + primary=not self.config.trino_as_primary, siblings=[dataset_urn] + ), ).as_workunit() def gen_lineage_workunit( diff --git a/metadata-ingestion/tests/integration/trino/trino_hive_instance_mces_golden.json b/metadata-ingestion/tests/integration/trino/trino_hive_instance_mces_golden.json index 9ea806d518caa..d63995506cb9c 100644 --- a/metadata-ingestion/tests/integration/trino/trino_hive_instance_mces_golden.json +++ b/metadata-ingestion/tests/integration/trino/trino_hive_instance_mces_golden.json @@ -244,7 +244,7 @@ "numrows": "1", "rawdatasize": "32", "totalsize": "33", - "transient_lastddltime": "1708350703" + "transient_lastddltime": "1708925463" }, "name": "array_struct_test", "description": "This table has array of structs", @@ -389,7 +389,7 @@ "siblings": [ "urn:li:dataset:(urn:li:dataPlatform:glue,local_server.db1.array_struct_test,PROD)" ], - "primary": false + "primary": true } }, "systemMetadata": { @@ -437,7 +437,7 @@ "siblings": [ "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.array_struct_test,PROD)" ], - "primary": true + "primary": false } }, "systemMetadata": { @@ -505,7 +505,7 @@ "numrows": "0", "rawdatasize": "0", "totalsize": "0", - "transient_lastddltime": "1708350705" + "transient_lastddltime": "1708925466" }, "name": "map_test", "tags": [] @@ -616,7 +616,7 @@ "siblings": [ "urn:li:dataset:(urn:li:dataPlatform:glue,local_server.db1.map_test,PROD)" ], - "primary": false + "primary": true } }, "systemMetadata": { @@ -664,7 +664,7 @@ "siblings": [ "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.map_test,PROD)" ], - "primary": true + "primary": false } }, "systemMetadata": { @@ -732,7 +732,7 @@ "numrows": "0", "rawdatasize": "0", "totalsize": "0", - "transient_lastddltime": "1708350705" + "transient_lastddltime": "1708925466" }, "name": "nested_struct_test", "tags": [] @@ -892,7 +892,7 @@ "siblings": [ "urn:li:dataset:(urn:li:dataPlatform:glue,local_server.db1.nested_struct_test,PROD)" ], - "primary": false + "primary": true } }, "systemMetadata": { @@ -940,7 +940,7 @@ "siblings": [ "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.nested_struct_test,PROD)" ], - "primary": true + "primary": false } }, "systemMetadata": { @@ -1003,7 +1003,7 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { - "transient_lastddltime": "1708350698" + "transient_lastddltime": "1708925457" }, "name": "pokes", "tags": [] @@ -1122,7 +1122,7 @@ "siblings": [ "urn:li:dataset:(urn:li:dataPlatform:glue,local_server.db1.pokes,PROD)" ], - "primary": false + "primary": true } }, "systemMetadata": { @@ -1170,7 +1170,7 @@ "siblings": [ "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.pokes,PROD)" ], - "primary": true + "primary": false } }, "systemMetadata": { @@ -1238,7 +1238,7 @@ "numrows": "0", "rawdatasize": "0", "totalsize": "0", - "transient_lastddltime": "1708350700" + "transient_lastddltime": "1708925459" }, "name": "struct_test", "tags": [] @@ -1376,7 +1376,7 @@ "siblings": [ "urn:li:dataset:(urn:li:dataPlatform:glue,local_server.db1.struct_test,PROD)" ], - "primary": false + "primary": true } }, "systemMetadata": { @@ -1424,7 +1424,7 @@ "siblings": [ "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.struct_test,PROD)" ], - "primary": true + "primary": false } }, "systemMetadata": { @@ -1489,7 +1489,7 @@ "customProperties": { "numfiles": "0", "totalsize": "0", - "transient_lastddltime": "1708350705" + "transient_lastddltime": "1708925466" }, "name": "struct_test_view_materialized", "tags": [] @@ -1627,7 +1627,7 @@ "siblings": [ "urn:li:dataset:(urn:li:dataPlatform:glue,local_server.db1.struct_test_view_materialized,PROD)" ], - "primary": false + "primary": true } }, "systemMetadata": { @@ -1675,7 +1675,7 @@ "siblings": [ "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.struct_test_view_materialized,PROD)" ], - "primary": true + "primary": false } }, "systemMetadata": { @@ -1743,7 +1743,7 @@ "numrows": "0", "rawdatasize": "0", "totalsize": "0", - "transient_lastddltime": "1708350700" + "transient_lastddltime": "1708925459" }, "name": "_test_table_underscore", "tags": [] @@ -1850,7 +1850,7 @@ "siblings": [ "urn:li:dataset:(urn:li:dataPlatform:glue,local_server.db1._test_table_underscore,PROD)" ], - "primary": false + "primary": true } }, "systemMetadata": { @@ -1898,7 +1898,7 @@ "siblings": [ "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1._test_table_underscore,PROD)" ], - "primary": true + "primary": false } }, "systemMetadata": { @@ -1966,7 +1966,7 @@ "numrows": "0", "rawdatasize": "0", "totalsize": "0", - "transient_lastddltime": "1708350705" + "transient_lastddltime": "1708925466" }, "name": "union_test", "tags": [] @@ -2157,7 +2157,7 @@ "siblings": [ "urn:li:dataset:(urn:li:dataPlatform:glue,local_server.db1.union_test,PROD)" ], - "primary": false + "primary": true } }, "systemMetadata": { @@ -2205,7 +2205,7 @@ "siblings": [ "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.union_test,PROD)" ], - "primary": true + "primary": false } }, "systemMetadata": { @@ -2268,7 +2268,7 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { - "transient_lastddltime": "1708350705", + "transient_lastddltime": "1708925466", "view_definition": "SELECT \"property_id\", \"service\"\nFROM \"db1\".\"array_struct_test\"", "is_view": "True" }, @@ -2430,7 +2430,7 @@ "siblings": [ "urn:li:dataset:(urn:li:dataPlatform:glue,local_server.db1.array_struct_test_view,PROD)" ], - "primary": false + "primary": true } }, "systemMetadata": { @@ -2478,7 +2478,7 @@ "siblings": [ "urn:li:dataset:(urn:li:dataPlatform:trino,production_warehouse.hivedb.db1.array_struct_test_view,PROD)" ], - "primary": true + "primary": false } }, "systemMetadata": { diff --git a/metadata-ingestion/tests/integration/trino/trino_hive_mces_golden.json b/metadata-ingestion/tests/integration/trino/trino_hive_mces_golden.json index caf0c08c0343f..3e79c8721486e 100644 --- a/metadata-ingestion/tests/integration/trino/trino_hive_mces_golden.json +++ b/metadata-ingestion/tests/integration/trino/trino_hive_mces_golden.json @@ -231,7 +231,7 @@ "numrows": "1", "rawdatasize": "32", "totalsize": "33", - "transient_lastddltime": "1708001962" + "transient_lastddltime": "1708925463" }, "name": "array_struct_test", "description": "This table has array of structs", @@ -359,7 +359,7 @@ "siblings": [ "urn:li:dataset:(urn:li:dataPlatform:hive,db1.array_struct_test,PROD)" ], - "primary": false + "primary": true } }, "systemMetadata": { @@ -403,7 +403,7 @@ "siblings": [ "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.array_struct_test,PROD)" ], - "primary": true + "primary": false } }, "systemMetadata": { @@ -471,7 +471,7 @@ "numrows": "0", "rawdatasize": "0", "totalsize": "0", - "transient_lastddltime": "1708001965" + "transient_lastddltime": "1708925466" }, "name": "map_test", "tags": [] @@ -565,7 +565,7 @@ "siblings": [ "urn:li:dataset:(urn:li:dataPlatform:hive,db1.map_test,PROD)" ], - "primary": false + "primary": true } }, "systemMetadata": { @@ -609,7 +609,7 @@ "siblings": [ "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.map_test,PROD)" ], - "primary": true + "primary": false } }, "systemMetadata": { @@ -677,7 +677,7 @@ "numrows": "0", "rawdatasize": "0", "totalsize": "0", - "transient_lastddltime": "1708001965" + "transient_lastddltime": "1708925466" }, "name": "nested_struct_test", "tags": [] @@ -820,7 +820,7 @@ "siblings": [ "urn:li:dataset:(urn:li:dataPlatform:hive,db1.nested_struct_test,PROD)" ], - "primary": false + "primary": true } }, "systemMetadata": { @@ -864,7 +864,7 @@ "siblings": [ "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.nested_struct_test,PROD)" ], - "primary": true + "primary": false } }, "systemMetadata": { @@ -927,7 +927,7 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { - "transient_lastddltime": "1708001956" + "transient_lastddltime": "1708925457" }, "name": "pokes", "tags": [] @@ -1029,7 +1029,7 @@ "siblings": [ "urn:li:dataset:(urn:li:dataPlatform:hive,db1.pokes,PROD)" ], - "primary": false + "primary": true } }, "systemMetadata": { @@ -1073,7 +1073,7 @@ "siblings": [ "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.pokes,PROD)" ], - "primary": true + "primary": false } }, "systemMetadata": { @@ -1141,7 +1141,7 @@ "numrows": "0", "rawdatasize": "0", "totalsize": "0", - "transient_lastddltime": "1708001958" + "transient_lastddltime": "1708925459" }, "name": "struct_test", "tags": [] @@ -1262,7 +1262,7 @@ "siblings": [ "urn:li:dataset:(urn:li:dataPlatform:hive,db1.struct_test,PROD)" ], - "primary": false + "primary": true } }, "systemMetadata": { @@ -1306,7 +1306,7 @@ "siblings": [ "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.struct_test,PROD)" ], - "primary": true + "primary": false } }, "systemMetadata": { @@ -1371,7 +1371,7 @@ "customProperties": { "numfiles": "0", "totalsize": "0", - "transient_lastddltime": "1708001964" + "transient_lastddltime": "1708925466" }, "name": "struct_test_view_materialized", "tags": [] @@ -1492,7 +1492,7 @@ "siblings": [ "urn:li:dataset:(urn:li:dataPlatform:hive,db1.struct_test_view_materialized,PROD)" ], - "primary": false + "primary": true } }, "systemMetadata": { @@ -1536,7 +1536,7 @@ "siblings": [ "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.struct_test_view_materialized,PROD)" ], - "primary": true + "primary": false } }, "systemMetadata": { @@ -1604,7 +1604,7 @@ "numrows": "0", "rawdatasize": "0", "totalsize": "0", - "transient_lastddltime": "1708001958" + "transient_lastddltime": "1708925459" }, "name": "_test_table_underscore", "tags": [] @@ -1694,7 +1694,7 @@ "siblings": [ "urn:li:dataset:(urn:li:dataPlatform:hive,db1._test_table_underscore,PROD)" ], - "primary": false + "primary": true } }, "systemMetadata": { @@ -1738,7 +1738,7 @@ "siblings": [ "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1._test_table_underscore,PROD)" ], - "primary": true + "primary": false } }, "systemMetadata": { @@ -1806,7 +1806,7 @@ "numrows": "0", "rawdatasize": "0", "totalsize": "0", - "transient_lastddltime": "1708001965" + "transient_lastddltime": "1708925466" }, "name": "union_test", "tags": [] @@ -1980,7 +1980,7 @@ "siblings": [ "urn:li:dataset:(urn:li:dataPlatform:hive,db1.union_test,PROD)" ], - "primary": false + "primary": true } }, "systemMetadata": { @@ -2024,7 +2024,7 @@ "siblings": [ "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.union_test,PROD)" ], - "primary": true + "primary": false } }, "systemMetadata": { @@ -2087,7 +2087,7 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { - "transient_lastddltime": "1708001965", + "transient_lastddltime": "1708925466", "view_definition": "SELECT \"property_id\", \"service\"\nFROM \"db1\".\"array_struct_test\"", "is_view": "True" }, @@ -2232,7 +2232,7 @@ "siblings": [ "urn:li:dataset:(urn:li:dataPlatform:hive,db1.array_struct_test_view,PROD)" ], - "primary": false + "primary": true } }, "systemMetadata": { @@ -2276,7 +2276,7 @@ "siblings": [ "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.array_struct_test_view,PROD)" ], - "primary": true + "primary": false } }, "systemMetadata": { diff --git a/metadata-ingestion/tests/integration/trino/trino_mces_golden.json b/metadata-ingestion/tests/integration/trino/trino_mces_golden.json index 5dcb28415606e..1f03f02fa9408 100644 --- a/metadata-ingestion/tests/integration/trino/trino_mces_golden.json +++ b/metadata-ingestion/tests/integration/trino/trino_mces_golden.json @@ -365,7 +365,7 @@ "siblings": [ "urn:li:dataset:(urn:li:dataPlatform:postgres,local_server.postgres.librarydb.book,PROD)" ], - "primary": false + "primary": true } }, "systemMetadata": { @@ -409,7 +409,7 @@ "siblings": [ "urn:li:dataset:(urn:li:dataPlatform:trino,postgresqldb.librarydb.book,PROD)" ], - "primary": true + "primary": false } }, "systemMetadata": { @@ -584,7 +584,7 @@ "siblings": [ "urn:li:dataset:(urn:li:dataPlatform:postgres,local_server.postgres.librarydb.issue_history,PROD)" ], - "primary": false + "primary": true } }, "systemMetadata": { @@ -628,7 +628,7 @@ "siblings": [ "urn:li:dataset:(urn:li:dataPlatform:trino,postgresqldb.librarydb.issue_history,PROD)" ], - "primary": true + "primary": false } }, "systemMetadata": { @@ -779,7 +779,7 @@ "siblings": [ "urn:li:dataset:(urn:li:dataPlatform:postgres,local_server.postgres.librarydb.member,PROD)" ], - "primary": false + "primary": true } }, "systemMetadata": { @@ -823,7 +823,7 @@ "siblings": [ "urn:li:dataset:(urn:li:dataPlatform:trino,postgresqldb.librarydb.member,PROD)" ], - "primary": true + "primary": false } }, "systemMetadata": { @@ -1022,7 +1022,7 @@ "siblings": [ "urn:li:dataset:(urn:li:dataPlatform:postgres,local_server.postgres.librarydb.book_in_circulation,PROD)" ], - "primary": false + "primary": true } }, "systemMetadata": { @@ -1066,7 +1066,7 @@ "siblings": [ "urn:li:dataset:(urn:li:dataPlatform:trino,postgresqldb.librarydb.book_in_circulation,PROD)" ], - "primary": true + "primary": false } }, "systemMetadata": {