From 9d08c38a07d161979ef78b499aea4e04af40d8ec Mon Sep 17 00:00:00 2001 From: John Joyce Date: Thu, 17 Aug 2023 08:44:45 -0700 Subject: [PATCH 01/10] feat(graphql) Support exists operator in GraphQL Search API (#8652) --- datahub-graphql-core/src/main/resources/search.graphql | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/datahub-graphql-core/src/main/resources/search.graphql b/datahub-graphql-core/src/main/resources/search.graphql index f15535bfb4eb8..fbea66f738955 100644 --- a/datahub-graphql-core/src/main/resources/search.graphql +++ b/datahub-graphql-core/src/main/resources/search.graphql @@ -448,6 +448,11 @@ enum FilterOperator { * Represent the relation: String field is one of the array values to, e.g. name in ["Profile", "Event"] """ IN + + """ + Represents the relation: The field exists. If the field is an array, the field is either not present or empty. + """ + EXISTS } """ From 84f0a8f12747ae8c056d3fc757e6b7b2ac380afd Mon Sep 17 00:00:00 2001 From: John Joyce Date: Thu, 17 Aug 2023 08:44:53 -0700 Subject: [PATCH 02/10] [fix] [health ui] Removing ghost 0 for health signals on search cards (#8587) --- datahub-web-react/src/app/preview/DefaultPreviewCard.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datahub-web-react/src/app/preview/DefaultPreviewCard.tsx b/datahub-web-react/src/app/preview/DefaultPreviewCard.tsx index f776082e3f905..36713cfb7ffcf 100644 --- a/datahub-web-react/src/app/preview/DefaultPreviewCard.tsx +++ b/datahub-web-react/src/app/preview/DefaultPreviewCard.tsx @@ -296,7 +296,7 @@ export default function DefaultPreviewCard({ {deprecation?.deprecated && ( )} - {health && health.length > 0 && } + {health && health.length > 0 ? : null} {externalUrl && ( Date: Thu, 17 Aug 2023 08:45:29 -0700 Subject: [PATCH 03/10] fix(data products): removing data products filter in search as its not indexed on entity documents (#8650) --- .../search/AdvancedFilterSelectValueModal.tsx | 19 ------------------- .../src/app/search/utils/constants.ts | 3 --- 2 files changed, 22 deletions(-) diff --git a/datahub-web-react/src/app/search/AdvancedFilterSelectValueModal.tsx b/datahub-web-react/src/app/search/AdvancedFilterSelectValueModal.tsx index e5f58a8662acc..c562fc6e8349a 100644 --- a/datahub-web-react/src/app/search/AdvancedFilterSelectValueModal.tsx +++ b/datahub-web-react/src/app/search/AdvancedFilterSelectValueModal.tsx @@ -23,9 +23,7 @@ import { REMOVED_FILTER_NAME, TAGS_FILTER_NAME, TYPE_NAMES_FILTER_NAME, - DATA_PRODUCTS_FILTER_NAME, } from './utils/constants'; -import SetDataProductModal from '../entity/shared/containers/profile/sidebar/DataProduct/SetDataProductModal'; type Props = { facet?: FacetMetadata | null; @@ -80,23 +78,6 @@ export const AdvancedFilterSelectValueModal = ({ ); } - if (filterField === DATA_PRODUCTS_FILTER_NAME) { - return ( - initialValues?.includes(agg?.entity?.urn || ''))?.entity || null - } - onModalClose={onCloseModal} - onOkOverride={(dataProductUrn) => { - onSelect([dataProductUrn]); - onCloseModal(); - }} - /> - ); - } - if (filterField === CONTAINER_FILTER_NAME) { return ( Date: Thu, 17 Aug 2023 12:44:15 -0400 Subject: [PATCH 04/10] feat(ingest/bigquery): add tag to BigQuery clustering columns (#8495) Co-authored-by: Andrew Sikowitz --- .../ingestion/source/bigquery_v2/bigquery.py | 26 ++++++++++++------- .../source/bigquery_v2/bigquery_schema.py | 8 +++++- .../tests/unit/test_bigquery_profiler.py | 3 +++ 3 files changed, 27 insertions(+), 10 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py index d1f39a3ba1ba6..7725d63ce0e1e 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py +++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py @@ -129,6 +129,7 @@ # Handle table snapshots # See https://cloud.google.com/bigquery/docs/table-snapshots-intro. SNAPSHOT_TABLE_REGEX = re.compile(r"^(.+)@(\d{13})$") +CLUSTERING_COLUMN_TAG = "CLUSTERING_COLUMN" # We can't use close as it is not called if the ingestion is not successful @@ -1151,6 +1152,21 @@ def gen_schema_fields(self, columns: List[BigqueryColumn]) -> List[SchemaField]: field.description = col.comment schema_fields[idx] = field else: + tags = [] + if col.is_partition_column: + tags.append( + TagAssociationClass(make_tag_urn(Constants.TAG_PARTITION_KEY)) + ) + + if col.cluster_column_position is not None: + tags.append( + TagAssociationClass( + make_tag_urn( + f"{CLUSTERING_COLUMN_TAG}_{col.cluster_column_position}" + ) + ) + ) + field = SchemaField( fieldPath=col.name, type=SchemaFieldDataType( @@ -1160,15 +1176,7 @@ def gen_schema_fields(self, columns: List[BigqueryColumn]) -> List[SchemaField]: nativeDataType=col.data_type, description=col.comment, nullable=col.is_nullable, - globalTags=GlobalTagsClass( - tags=[ - TagAssociationClass( - make_tag_urn(Constants.TAG_PARTITION_KEY) - ) - ] - ) - if col.is_partition_column - else GlobalTagsClass(tags=[]), + globalTags=GlobalTagsClass(tags=tags), ) schema_fields.append(field) last_id = col.ordinal_position diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_schema.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_schema.py index 2450dbd0e2391..f8256f8e6fed6 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_schema.py +++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_schema.py @@ -33,6 +33,7 @@ class BigqueryTableType: class BigqueryColumn(BaseColumn): field_path: str is_partition_column: bool + cluster_column_position: Optional[int] RANGE_PARTITION_NAME: str = "RANGE" @@ -285,7 +286,8 @@ class BigqueryQuery: CASE WHEN CONTAINS_SUBSTR(field_path, ".") THEN NULL ELSE c.data_type END as data_type, description as comment, c.is_hidden as is_hidden, - c.is_partitioning_column as is_partitioning_column + c.is_partitioning_column as is_partitioning_column, + c.clustering_ordinal_position as clustering_ordinal_position, from `{project_id}`.`{dataset_name}`.INFORMATION_SCHEMA.COLUMNS c join `{project_id}`.`{dataset_name}`.INFORMATION_SCHEMA.COLUMN_FIELD_PATHS as cfp on cfp.table_name = c.table_name @@ -307,6 +309,7 @@ class BigqueryQuery: description as comment, c.is_hidden as is_hidden, c.is_partitioning_column as is_partitioning_column, + c.clustering_ordinal_position as clustering_ordinal_position, -- We count the columns to be able limit it later row_number() over (partition by c.table_catalog, c.table_schema, c.table_name order by c.ordinal_position asc, c.data_type DESC) as column_num, -- Getting the maximum shard for each table @@ -333,6 +336,7 @@ class BigqueryQuery: CASE WHEN CONTAINS_SUBSTR(field_path, ".") THEN NULL ELSE c.data_type END as data_type, c.is_hidden as is_hidden, c.is_partitioning_column as is_partitioning_column, + c.clustering_ordinal_position as clustering_ordinal_position, description as comment from `{table_identifier.project_id}`.`{table_identifier.dataset}`.INFORMATION_SCHEMA.COLUMNS as c @@ -583,6 +587,7 @@ def get_columns_for_dataset( data_type=column.data_type, comment=column.comment, is_partition_column=column.is_partitioning_column == "YES", + cluster_column_position=column.clustering_ordinal_position, ) ) @@ -621,6 +626,7 @@ def get_columns_for_table( data_type=column.data_type, comment=column.comment, is_partition_column=column.is_partitioning_column == "YES", + cluster_column_position=column.clustering_ordinal_position, ) ) last_seen_table = column.table_name diff --git a/metadata-ingestion/tests/unit/test_bigquery_profiler.py b/metadata-ingestion/tests/unit/test_bigquery_profiler.py index a2aec8df93d09..44ce5f0a02e37 100644 --- a/metadata-ingestion/tests/unit/test_bigquery_profiler.py +++ b/metadata-ingestion/tests/unit/test_bigquery_profiler.py @@ -37,6 +37,7 @@ def test_generate_day_partitioned_partition_profiler_query(): ordinal_position=1, data_type="TIMESTAMP", is_partition_column=True, + cluster_column_position=None, comment=None, is_nullable=False, ) @@ -79,6 +80,7 @@ def test_generate_day_partitioned_partition_profiler_query_with_set_partition_ti ordinal_position=1, data_type="TIMESTAMP", is_partition_column=True, + cluster_column_position=None, comment=None, is_nullable=False, ) @@ -120,6 +122,7 @@ def test_generate_hour_partitioned_partition_profiler_query(): ordinal_position=1, data_type="TIMESTAMP", is_partition_column=True, + cluster_column_position=None, comment=None, is_nullable=False, ) From 130f908a492ace1072c6e41979737237c6859f0e Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Thu, 17 Aug 2023 10:27:19 -0700 Subject: [PATCH 05/10] fix(ingest/snowflake): fix usage enum bug (#8649) --- .../source/snowflake/snowflake_query.py | 16 ++++++++++------ .../source/snowflake/snowflake_usage_v2.py | 1 - 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_query.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_query.py index 039eac1e93819..587c71a98be67 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_query.py +++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_query.py @@ -1,5 +1,6 @@ from typing import List, Optional +from datahub.configuration.time_window_config import BucketDuration from datahub.ingestion.source.snowflake.constants import SnowflakeObjectDomain from datahub.ingestion.source.snowflake.snowflake_config import DEFAULT_TABLES_DENY_LIST @@ -575,14 +576,17 @@ def get_access_history_date_range() -> str: def usage_per_object_per_time_bucket_for_time_window( start_time_millis: int, end_time_millis: int, - time_bucket_size: str, + time_bucket_size: BucketDuration, use_base_objects: bool, top_n_queries: int, include_top_n_queries: bool, ) -> str: if not include_top_n_queries: top_n_queries = 0 - assert time_bucket_size == "DAY" or time_bucket_size == "HOUR" + assert ( + time_bucket_size == BucketDuration.DAY + or time_bucket_size == BucketDuration.HOUR + ) objects_column = ( "BASE_OBJECTS_ACCESSED" if use_base_objects else "DIRECT_OBJECTS_ACCESSED" ) @@ -629,7 +633,7 @@ def usage_per_object_per_time_bucket_for_time_window( SELECT object_name, ANY_VALUE(object_domain) AS object_domain, - DATE_TRUNC('{time_bucket_size}', CONVERT_TIMEZONE('UTC', query_start_time)) AS bucket_start_time, + DATE_TRUNC('{time_bucket_size.value}', CONVERT_TIMEZONE('UTC', query_start_time)) AS bucket_start_time, count(distinct(query_id)) AS total_queries, count( distinct(user_name) ) AS total_users FROM @@ -644,7 +648,7 @@ def usage_per_object_per_time_bucket_for_time_window( SELECT object_name, column_name, - DATE_TRUNC('{time_bucket_size}', CONVERT_TIMEZONE('UTC', query_start_time)) AS bucket_start_time, + DATE_TRUNC('{time_bucket_size.value}', CONVERT_TIMEZONE('UTC', query_start_time)) AS bucket_start_time, count(distinct(query_id)) AS total_queries FROM field_access_history @@ -658,7 +662,7 @@ def usage_per_object_per_time_bucket_for_time_window( ( SELECT object_name, - DATE_TRUNC('{time_bucket_size}', CONVERT_TIMEZONE('UTC', query_start_time)) AS bucket_start_time, + DATE_TRUNC('{time_bucket_size.value}', CONVERT_TIMEZONE('UTC', query_start_time)) AS bucket_start_time, count(distinct(query_id)) AS total_queries, user_name, ANY_VALUE(users.email) AS user_email @@ -677,7 +681,7 @@ def usage_per_object_per_time_bucket_for_time_window( ( SELECT object_name, - DATE_TRUNC('{time_bucket_size}', CONVERT_TIMEZONE('UTC', query_start_time)) AS bucket_start_time, + DATE_TRUNC('{time_bucket_size.value}', CONVERT_TIMEZONE('UTC', query_start_time)) AS bucket_start_time, query_history.query_text AS query_text, count(distinct(access_history.query_id)) AS total_queries FROM diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_usage_v2.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_usage_v2.py index 3605205b6055c..f8dfa612952d8 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_usage_v2.py +++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_usage_v2.py @@ -356,7 +356,6 @@ def _check_usage_date_ranges(self) -> Any: def _get_operation_aspect_work_unit( self, event: SnowflakeJoinedAccessEvent, discovered_datasets: List[str] ) -> Iterable[MetadataWorkUnit]: - if event.query_start_time and event.query_type: start_time = event.query_start_time query_type = event.query_type From c5a2e8923ed676106badbff3b21da2abf5276c93 Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Thu, 17 Aug 2023 10:29:10 -0700 Subject: [PATCH 06/10] feat(ingest/dbt-cloud): use job-based graphql queries (#8647) --- .../src/datahub/ingestion/source/dbt/dbt_cloud.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_cloud.py b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_cloud.py index 1cd5ed8164854..af9769bc9d94c 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_cloud.py +++ b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_cloud.py @@ -162,9 +162,11 @@ class DBTCloudConfig(DBTCommonConfig): } _DBT_GRAPHQL_QUERY = """ -query DatahubMetadataQuery_{type}($jobId: Int!, $runId: Int) {{ - {type}(jobId: $jobId, runId: $runId) {{ +query DatahubMetadataQuery_{type}($jobId: BigInt!, $runId: BigInt) {{ + job(id: $jobId, runId: $runId) {{ + {type} {{ {fields} + }} }} }} """ @@ -218,7 +220,7 @@ def load_nodes(self) -> Tuple[List[DBTNode], Dict[str, Optional[str]]]: }, ) - raw_nodes.extend(data[node_type]) + raw_nodes.extend(data["job"][node_type]) nodes = [self._parse_into_dbt_node(node) for node in raw_nodes] From 272dcfe7366a91e6ba6a8cc6435aba45bcdf9323 Mon Sep 17 00:00:00 2001 From: Kos Korchak <97058061+kkorchak@users.noreply.github.com> Date: Thu, 17 Aug 2023 15:21:36 -0400 Subject: [PATCH 07/10] Add and remove documentation and link for dataset (#8604) --- .../e2e/mutations/edit_documentation.js | 71 +++++++++++++++++++ smoke-test/tests/cypress/data.json | 2 +- 2 files changed, 72 insertions(+), 1 deletion(-) create mode 100644 smoke-test/tests/cypress/cypress/e2e/mutations/edit_documentation.js diff --git a/smoke-test/tests/cypress/cypress/e2e/mutations/edit_documentation.js b/smoke-test/tests/cypress/cypress/e2e/mutations/edit_documentation.js new file mode 100644 index 0000000000000..1f40cdf602062 --- /dev/null +++ b/smoke-test/tests/cypress/cypress/e2e/mutations/edit_documentation.js @@ -0,0 +1,71 @@ +const test_id = Math.floor(Math.random() * 100000); +const documentation_edited = `This is test${test_id} documentation EDITED`; +const wrong_url = "https://www.linkedincom"; +const correct_url = "https://www.linkedin.com"; + +describe("edit documentation and link to dataset", () => { + + it("open test dataset page, edit documentation", () => { + //edit documentation and verify changes saved + cy.loginWithCredentials(); + cy.visit("/dataset/urn:li:dataset:(urn:li:dataPlatform:hive,SampleCypressHiveDataset,PROD)/Schema"); + cy.get("[role='tab']").contains("Documentation").click(); + cy.waitTextVisible("my hive dataset"); + cy.waitTextVisible("Sample doc"); + cy.clickOptionWithText("Edit"); + cy.focused().clear(); + cy.focused().type(documentation_edited); + cy.get("button").contains("Save").click(); + cy.waitTextVisible("Description Updated"); + cy.waitTextVisible(documentation_edited); + //return documentation to original state + cy.clickOptionWithText("Edit"); + cy.focused().clear().wait(1000); + cy.focused().type("my hive dataset"); + cy.get("button").contains("Save").click(); + cy.waitTextVisible("Description Updated"); + cy.waitTextVisible("my hive dataset"); + }); + + it("open test dataset page, remove and add dataset link", () => { + cy.loginWithCredentials(); + cy.visit("/dataset/urn:li:dataset:(urn:li:dataPlatform:hive,SampleCypressHiveDataset,PROD)/Schema"); + cy.get("[role='tab']").contains("Documentation").click(); + cy.contains("Sample doc").trigger("mouseover", { force: true }); + cy.get('[data-icon="delete"]').click(); + cy.waitTextVisible("Link Removed"); + cy.get("button").contains("Add Link").click(); + cy.get("#addLinkForm_url").type(wrong_url); + cy.waitTextVisible("This field must be a valid url."); + cy.focused().clear(); + cy.waitTextVisible("A URL is required."); + cy.focused().type(correct_url); + cy.ensureTextNotPresent("This field must be a valid url."); + cy.get("#addLinkForm_label").type("Sample doc"); + cy.get('[role="dialog"] button').contains("Add").click(); + cy.waitTextVisible("Link Added"); + cy.get("[role='tab']").contains("Documentation").click(); + cy.get(`[href='${correct_url}']`).should("be.visible"); + }); + + it("edit field documentation", () => { + cy.loginWithCredentials(); + cy.visit("/dataset/urn:li:dataset:(urn:li:dataPlatform:hive,SampleCypressHiveDataset,PROD)/Schema"); + cy.get("tbody [data-icon='edit']").first().click({ force: true }); + cy.waitTextVisible("Update description"); + cy.waitTextVisible("Foo field description has changed"); + cy.focused().clear().wait(1000); + cy.focused().type(documentation_edited); + cy.get("button").contains("Update").click(); + cy.waitTextVisible("Updated!"); + cy.waitTextVisible(documentation_edited); + cy.waitTextVisible("(edited)"); + cy.get("tbody [data-icon='edit']").first().click({ force: true }); + cy.focused().clear().wait(1000); + cy.focused().type("Foo field description has changed"); + cy.get("button").contains("Update").click(); + cy.waitTextVisible("Updated!"); + cy.waitTextVisible("Foo field description has changed"); + cy.waitTextVisible("(edited)"); + }); +}); \ No newline at end of file diff --git a/smoke-test/tests/cypress/data.json b/smoke-test/tests/cypress/data.json index c6606519e8d73..3b2ee1afaba58 100644 --- a/smoke-test/tests/cypress/data.json +++ b/smoke-test/tests/cypress/data.json @@ -2012,4 +2012,4 @@ }, "systemMetadata": null } -] +] \ No newline at end of file From 7b779982518935cddcd2ff76eaabae0cfa378161 Mon Sep 17 00:00:00 2001 From: Kos Korchak <97058061+kkorchak@users.noreply.github.com> Date: Thu, 17 Aug 2023 15:22:28 -0400 Subject: [PATCH 08/10] test(): Lineage column level test (#8641) --- .../e2e/lineage/lineage_column_level.js | 51 +++++++++++++++++++ 1 file changed, 51 insertions(+) create mode 100644 smoke-test/tests/cypress/cypress/e2e/lineage/lineage_column_level.js diff --git a/smoke-test/tests/cypress/cypress/e2e/lineage/lineage_column_level.js b/smoke-test/tests/cypress/cypress/e2e/lineage/lineage_column_level.js new file mode 100644 index 0000000000000..2a8fe045f154e --- /dev/null +++ b/smoke-test/tests/cypress/cypress/e2e/lineage/lineage_column_level.js @@ -0,0 +1,51 @@ +const DATASET_ENTITY_TYPE = 'dataset'; +const DATASET_URN = 'urn:li:dataset:(urn:li:dataPlatform:hive,SampleCypressHiveDataset,PROD)'; + +describe("column-level lineage graph test", () => { + + it("navigate to lineage graph view and verify that column-level lineage is showing correctly", () => { + cy.login(); + cy.goToEntityLineageGraph(DATASET_ENTITY_TYPE, DATASET_URN); + //verify columns not shown by default + cy.waitTextVisible("SampleCypressHdfs"); + cy.waitTextVisible("SampleCypressHive"); + cy.waitTextVisible("cypress_logging"); + cy.ensureTextNotPresent("shipment_info"); + cy.ensureTextNotPresent("field_foo"); + cy.ensureTextNotPresent("field_baz"); + cy.ensureTextNotPresent("event_name"); + cy.ensureTextNotPresent("event_data"); + cy.ensureTextNotPresent("timestamp"); + cy.ensureTextNotPresent("browser"); + cy.clickOptionWithTestId("column-toggle") + //verify columns appear and belong co correct dataset + cy.waitTextVisible("shipment_info"); + cy.waitTextVisible("shipment_info.date"); + cy.waitTextVisible("shipment_info.target"); + cy.waitTextVisible("shipment_info.destination"); + cy.waitTextVisible("shipment_info.geo_info"); + cy.waitTextVisible("field_foo"); + cy.waitTextVisible("field_baz"); + cy.waitTextVisible("event_name"); + cy.waitTextVisible("event_data"); + cy.waitTextVisible("timestamp"); + cy.waitTextVisible("browser"); + //verify columns can be hidden and shown again + cy.contains("Hide").click({ force:true }); + cy.ensureTextNotPresent("field_foo"); + cy.ensureTextNotPresent("field_baz"); + cy.get("[aria-label='down']").eq(1).click({ force:true }); + cy.waitTextVisible("field_foo"); + cy.waitTextVisible("field_baz"); + //verify columns can be disabled successfully + cy.clickOptionWithTestId("column-toggle") + cy.ensureTextNotPresent("shipment_info"); + cy.ensureTextNotPresent("field_foo"); + cy.ensureTextNotPresent("field_baz"); + cy.ensureTextNotPresent("event_name"); + cy.ensureTextNotPresent("event_data"); + cy.ensureTextNotPresent("timestamp"); + cy.ensureTextNotPresent("browser"); + }); + +}); \ No newline at end of file From 5f5f51f5151b9dbe105bd92e940c8c612b353f26 Mon Sep 17 00:00:00 2001 From: Ellie O'Neil <110510035+eboneil@users.noreply.github.com> Date: Thu, 17 Aug 2023 12:41:57 -0700 Subject: [PATCH 09/10] tests(search): search golden tests (#8605) --- .../metadata/ESSampleDataFixture.java | 106 ++++++++++++- .../metadata/ESTestConfiguration.java | 6 + .../com/linkedin/metadata/ESTestUtils.java | 5 + .../fixtures/ElasticSearchGoldenTest.java | 143 ++++++++++++++++++ .../fixtures/SampleDataFixtureTests.java | 1 + 5 files changed, 256 insertions(+), 5 deletions(-) create mode 100644 metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/fixtures/ElasticSearchGoldenTest.java diff --git a/metadata-io/src/test/java/com/linkedin/metadata/ESSampleDataFixture.java b/metadata-io/src/test/java/com/linkedin/metadata/ESSampleDataFixture.java index 847029bc180eb..20501225ef787 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/ESSampleDataFixture.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/ESSampleDataFixture.java @@ -54,6 +54,13 @@ @TestConfiguration @Import(ESTestConfiguration.class) public class ESSampleDataFixture { + /** + * Interested in adding more fixtures? Here's what you will need to update? + * 1. Create a new indexPrefix and FixtureName. Both are needed or else all fixtures will load on top of each other, + * overwriting each other + * 2. Create a new IndexConvention, IndexBuilder, and EntityClient. These are needed + * to index a different set of entities. + */ @Autowired private ESBulkProcessor _bulkProcessor; @@ -61,6 +68,9 @@ public class ESSampleDataFixture { @Autowired private RestHighLevelClient _searchClient; + @Autowired + private RestHighLevelClient _longTailSearchClient; + @Autowired private SearchConfiguration _searchConfiguration; @@ -68,24 +78,54 @@ public class ESSampleDataFixture { private CustomSearchConfiguration _customSearchConfiguration; @Bean(name = "sampleDataPrefix") - protected String indexPrefix() { + protected String sampleDataPrefix() { return "smpldat"; } + @Bean(name = "longTailPrefix") + protected String longTailIndexPrefix() { + return "lngtl"; + } + @Bean(name = "sampleDataIndexConvention") protected IndexConvention indexConvention(@Qualifier("sampleDataPrefix") String prefix) { return new IndexConventionImpl(prefix); } + @Bean(name = "longTailIndexConvention") + protected IndexConvention longTailIndexConvention(@Qualifier("longTailPrefix") String prefix) { + return new IndexConventionImpl(prefix); + } + @Bean(name = "sampleDataFixtureName") - protected String fixtureName() { + protected String sampleDataFixtureName() { return "sample_data"; } + @Bean(name = "longTailFixtureName") + protected String longTailFixtureName() { + return "long_tail"; + } + @Bean(name = "sampleDataEntityIndexBuilders") protected EntityIndexBuilders entityIndexBuilders( @Qualifier("entityRegistry") EntityRegistry entityRegistry, @Qualifier("sampleDataIndexConvention") IndexConvention indexConvention + ) { + return entityIndexBuildersHelper(entityRegistry, indexConvention); + } + + @Bean(name = "longTailEntityIndexBuilders") + protected EntityIndexBuilders longTailEntityIndexBuilders( + @Qualifier("longTailEntityRegistry") EntityRegistry longTailEntityRegistry, + @Qualifier("longTailIndexConvention") IndexConvention indexConvention + ) { + return entityIndexBuildersHelper(longTailEntityRegistry, indexConvention); + } + + protected EntityIndexBuilders entityIndexBuildersHelper( + EntityRegistry entityRegistry, + IndexConvention indexConvention ) { GitVersion gitVersion = new GitVersion("0.0.0-test", "123456", Optional.empty()); ESIndexBuilder indexBuilder = new ESIndexBuilder(_searchClient, 1, 0, 1, @@ -100,6 +140,23 @@ protected ElasticSearchService entitySearchService( @Qualifier("entityRegistry") EntityRegistry entityRegistry, @Qualifier("sampleDataEntityIndexBuilders") EntityIndexBuilders indexBuilders, @Qualifier("sampleDataIndexConvention") IndexConvention indexConvention + ) throws IOException { + return entitySearchServiceHelper(entityRegistry, indexBuilders, indexConvention); + } + + @Bean(name = "longTailEntitySearchService") + protected ElasticSearchService longTailEntitySearchService( + @Qualifier("longTailEntityRegistry") EntityRegistry longTailEntityRegistry, + @Qualifier("longTailEntityIndexBuilders") EntityIndexBuilders longTailEndexBuilders, + @Qualifier("longTailIndexConvention") IndexConvention longTailIndexConvention + ) throws IOException { + return entitySearchServiceHelper(longTailEntityRegistry, longTailEndexBuilders, longTailIndexConvention); + } + + protected ElasticSearchService entitySearchServiceHelper( + EntityRegistry entityRegistry, + EntityIndexBuilders indexBuilders, + IndexConvention indexConvention ) throws IOException { CustomConfiguration customConfiguration = new CustomConfiguration(); customConfiguration.setEnabled(true); @@ -107,7 +164,7 @@ protected ElasticSearchService entitySearchService( CustomSearchConfiguration customSearchConfiguration = customConfiguration.resolve(new YAMLMapper()); ESSearchDAO searchDAO = new ESSearchDAO(entityRegistry, _searchClient, indexConvention, false, - ELASTICSEARCH_IMPLEMENTATION_ELASTICSEARCH, _searchConfiguration, customSearchConfiguration); + ELASTICSEARCH_IMPLEMENTATION_ELASTICSEARCH, _searchConfiguration, customSearchConfiguration); ESBrowseDAO browseDAO = new ESBrowseDAO(entityRegistry, _searchClient, indexConvention, _searchConfiguration, _customSearchConfiguration); ESWriteDAO writeDAO = new ESWriteDAO(entityRegistry, _searchClient, indexConvention, _bulkProcessor, 1); return new ElasticSearchService(indexBuilders, searchDAO, browseDAO, writeDAO); @@ -120,9 +177,30 @@ protected SearchService searchService( @Qualifier("sampleDataEntitySearchService") ElasticSearchService entitySearchService, @Qualifier("sampleDataEntityIndexBuilders") EntityIndexBuilders indexBuilders, @Qualifier("sampleDataPrefix") String prefix, - @Qualifier("sampleDataFixtureName") String fixtureName + @Qualifier("sampleDataFixtureName") String sampleDataFixtureName ) throws IOException { + return searchServiceHelper(entityRegistry, entitySearchService, indexBuilders, prefix, sampleDataFixtureName); + } + @Bean(name = "longTailSearchService") + @Nonnull + protected SearchService longTailSearchService( + @Qualifier("longTailEntityRegistry") EntityRegistry longTailEntityRegistry, + @Qualifier("longTailEntitySearchService") ElasticSearchService longTailEntitySearchService, + @Qualifier("longTailEntityIndexBuilders") EntityIndexBuilders longTailIndexBuilders, + @Qualifier("longTailPrefix") String longTailPrefix, + @Qualifier("longTailFixtureName") String longTailFixtureName + ) throws IOException { + return searchServiceHelper(longTailEntityRegistry, longTailEntitySearchService, longTailIndexBuilders, longTailPrefix, longTailFixtureName); + } + + public SearchService searchServiceHelper( + EntityRegistry entityRegistry, + ElasticSearchService entitySearchService, + EntityIndexBuilders indexBuilders, + String prefix, + String fixtureName + ) throws IOException { int batchSize = 100; SearchRanker ranker = new SimpleRanker(); CacheManager cacheManager = new ConcurrentMapCacheManager(); @@ -159,6 +237,24 @@ protected EntityClient entityClient( @Qualifier("sampleDataSearchService") SearchService searchService, @Qualifier("sampleDataEntitySearchService") ElasticSearchService entitySearchService, @Qualifier("entityRegistry") EntityRegistry entityRegistry + ) { + return entityClientHelper(searchService, entitySearchService, entityRegistry); + } + + @Bean(name = "longTailEntityClient") + @Nonnull + protected EntityClient longTailEntityClient( + @Qualifier("sampleDataSearchService") SearchService searchService, + @Qualifier("sampleDataEntitySearchService") ElasticSearchService entitySearchService, + @Qualifier("longTailEntityRegistry") EntityRegistry longTailEntityRegistry + ) { + return entityClientHelper(searchService, entitySearchService, longTailEntityRegistry); + } + + private EntityClient entityClientHelper( + SearchService searchService, + ElasticSearchService entitySearchService, + EntityRegistry entityRegistry ) { CachingEntitySearchService cachingEntitySearchService = new CachingEntitySearchService( new ConcurrentMapCacheManager(), @@ -173,7 +269,7 @@ protected EntityClient entityClient( preProcessHooks.setUiEnabled(true); return new JavaEntityClient( new EntityServiceImpl(mockAspectDao, null, entityRegistry, true, null, - preProcessHooks), + preProcessHooks), null, entitySearchService, cachingEntitySearchService, diff --git a/metadata-io/src/test/java/com/linkedin/metadata/ESTestConfiguration.java b/metadata-io/src/test/java/com/linkedin/metadata/ESTestConfiguration.java index 0d7ac506599af..1e5b860b581fc 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/ESTestConfiguration.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/ESTestConfiguration.java @@ -137,4 +137,10 @@ public EntityRegistry entityRegistry() throws EntityRegistryException { return new ConfigEntityRegistry( ESTestConfiguration.class.getClassLoader().getResourceAsStream("entity-registry.yml")); } + + @Bean(name = "longTailEntityRegistry") + public EntityRegistry longTailEntityRegistry() throws EntityRegistryException { + return new ConfigEntityRegistry( + ESTestConfiguration.class.getClassLoader().getResourceAsStream("entity-registry.yml")); + } } diff --git a/metadata-io/src/test/java/com/linkedin/metadata/ESTestUtils.java b/metadata-io/src/test/java/com/linkedin/metadata/ESTestUtils.java index 79496888650e1..45c4c16864b07 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/ESTestUtils.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/ESTestUtils.java @@ -77,6 +77,11 @@ public static SearchResult searchAcrossEntities(SearchService searchService, Str 100, new SearchFlags().setFulltext(true).setSkipCache(true), facets); } + public static SearchResult searchAcrossCustomEntities(SearchService searchService, String query, List searchableEntities) { + return searchService.searchAcrossEntities(searchableEntities, query, null, null, 0, + 100, new SearchFlags().setFulltext(true).setSkipCache(true)); + } + public static SearchResult search(SearchService searchService, String query) { return search(searchService, SEARCHABLE_ENTITIES, query); } diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/fixtures/ElasticSearchGoldenTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/fixtures/ElasticSearchGoldenTest.java new file mode 100644 index 0000000000000..cc0d9dca6ae5f --- /dev/null +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/fixtures/ElasticSearchGoldenTest.java @@ -0,0 +1,143 @@ +package com.linkedin.metadata.search.elasticsearch.fixtures; + +import com.linkedin.common.urn.Urn; +import com.linkedin.datahub.graphql.generated.EntityType; +import com.linkedin.datahub.graphql.resolvers.EntityTypeMapper; +import com.linkedin.entity.client.EntityClient; +import com.linkedin.metadata.ESSampleDataFixture; +import com.linkedin.metadata.models.registry.EntityRegistry; +import com.linkedin.metadata.search.MatchedFieldArray; +import com.linkedin.metadata.search.SearchEntityArray; +import com.linkedin.metadata.search.SearchResult; +import com.linkedin.metadata.search.SearchService; +import org.elasticsearch.client.RestHighLevelClient; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.beans.factory.annotation.Qualifier; +import org.springframework.context.annotation.Import; +import org.springframework.test.context.testng.AbstractTestNGSpringContextTests; +import org.testng.annotations.Test; + +import java.util.List; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +import static com.linkedin.metadata.ESTestUtils.*; +import static org.testng.Assert.assertTrue; +import static org.testng.AssertJUnit.*; + +@Import(ESSampleDataFixture.class) +public class ElasticSearchGoldenTest extends AbstractTestNGSpringContextTests { + + private static final List SEARCHABLE_LONGTAIL_ENTITIES = Stream.of(EntityType.CHART, EntityType.CONTAINER, + EntityType.DASHBOARD, EntityType.DATASET, EntityType.DOMAIN, EntityType.TAG + ).map(EntityTypeMapper::getName) + .collect(Collectors.toList()); + @Autowired + private RestHighLevelClient _searchClient; + + @Autowired + @Qualifier("longTailSearchService") + protected SearchService searchService; + + @Autowired + @Qualifier("longTailEntityClient") + protected EntityClient entityClient; + + @Autowired + @Qualifier("longTailEntityRegistry") + private EntityRegistry entityRegistry; + + @Test + public void testNameMatchPetProfiles() { + /* + Searching for "pet profiles" should return "pet_profiles" as the first 2 search results + */ + assertNotNull(searchService); + assertNotNull(entityRegistry); + SearchResult searchResult = searchAcrossCustomEntities(searchService, "pet profiles", SEARCHABLE_LONGTAIL_ENTITIES); + assertTrue(searchResult.getEntities().size() >= 2); + Urn firstResultUrn = searchResult.getEntities().get(0).getEntity(); + Urn secondResultUrn = searchResult.getEntities().get(1).getEntity(); + + assertTrue(firstResultUrn.toString().contains("pet_profiles")); + assertTrue(secondResultUrn.toString().contains("pet_profiles")); + } + + @Test + public void testNameMatchPetProfile() { + /* + Searching for "pet profile" should return "pet_profiles" as the first 2 search results + */ + assertNotNull(searchService); + SearchResult searchResult = searchAcrossEntities(searchService, "pet profile", SEARCHABLE_LONGTAIL_ENTITIES); + assertTrue(searchResult.getEntities().size() >= 2); + Urn firstResultUrn = searchResult.getEntities().get(0).getEntity(); + Urn secondResultUrn = searchResult.getEntities().get(1).getEntity(); + + assertTrue(firstResultUrn.toString().contains("pet_profiles")); + assertTrue(secondResultUrn.toString().contains("pet_profiles")); + } + + @Test + public void testNameMatchMemberInWorkspace() { + /* + Searching for "collaborative actionitems" should return "collaborative_actionitems" as the first search + result, followed by "collaborative_actionitems_old" + */ + assertNotNull(searchService); + SearchResult searchResult = searchAcrossEntities(searchService, "collaborative actionitems", SEARCHABLE_LONGTAIL_ENTITIES); + assertTrue(searchResult.getEntities().size() >= 2); + Urn firstResultUrn = searchResult.getEntities().get(0).getEntity(); + Urn secondResultUrn = searchResult.getEntities().get(1).getEntity(); + + // Checks that the table name is not suffixed with anything + assertTrue(firstResultUrn.toString().contains("collaborative_actionitems,")); + assertTrue(secondResultUrn.toString().contains("collaborative_actionitems_old")); + } + + @Test + public void testGlossaryTerms() { + /* + Searching for "ReturnRate" should return all tables that have the glossary term applied before + anything else + */ + assertNotNull(searchService); + SearchResult searchResult = searchAcrossEntities(searchService, "ReturnRate", SEARCHABLE_LONGTAIL_ENTITIES); + SearchEntityArray entities = searchResult.getEntities(); + assertTrue(searchResult.getEntities().size() >= 4); + MatchedFieldArray firstResultMatchedFields = entities.get(0).getMatchedFields(); + MatchedFieldArray secondResultMatchedFields = entities.get(1).getMatchedFields(); + MatchedFieldArray thirdResultMatchedFields = entities.get(2).getMatchedFields(); + MatchedFieldArray fourthResultMatchedFields = entities.get(3).getMatchedFields(); + + assertTrue(firstResultMatchedFields.toString().contains("ReturnRate")); + assertTrue(secondResultMatchedFields.toString().contains("ReturnRate")); + assertTrue(thirdResultMatchedFields.toString().contains("ReturnRate")); + assertTrue(fourthResultMatchedFields.toString().contains("ReturnRate")); + } + + /** + * + * The test below should be added back in as improvements are made to search, + * via the linked tickets. + * + **/ + + // TODO: enable once PFP-481 is complete + @Test(enabled = false) + public void testNameMatchPartiallyQualified() { + /* + Searching for "analytics.pet_details" (partially qualified) should return the fully qualified table + name as the first search results before any others + */ + assertNotNull(searchService); + SearchResult searchResult = searchAcrossEntities(searchService, "analytics.pet_details", SEARCHABLE_LONGTAIL_ENTITIES); + assertTrue(searchResult.getEntities().size() >= 2); + Urn firstResultUrn = searchResult.getEntities().get(0).getEntity(); + Urn secondResultUrn = searchResult.getEntities().get(1).getEntity(); + + assertTrue(firstResultUrn.toString().contains("snowflake,long_tail_companions.analytics.pet_details")); + assertTrue(secondResultUrn.toString().contains("dbt,long_tail_companions.analytics.pet_details")); + } + +} diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/fixtures/SampleDataFixtureTests.java b/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/fixtures/SampleDataFixtureTests.java index dada13bd6f479..2f1e48c18450d 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/fixtures/SampleDataFixtureTests.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/fixtures/SampleDataFixtureTests.java @@ -82,6 +82,7 @@ public class SampleDataFixtureTests extends AbstractTestNGSpringContextTests { protected EntityClient entityClient; @Autowired + @Qualifier("entityRegistry") private EntityRegistry entityRegistry; @Test From 10d3edc6eb72dd74b3bcff93b5ccf16b956c866d Mon Sep 17 00:00:00 2001 From: Kos Korchak <97058061+kkorchak@users.noreply.github.com> Date: Thu, 17 Aug 2023 16:24:03 -0400 Subject: [PATCH 10/10] test(): Add test case for dataset deprecation test (#8646) Co-authored-by: John Joyce --- .../cypress/e2e/mutations/deprecations.js | 24 +++++++++++++------ 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/smoke-test/tests/cypress/cypress/e2e/mutations/deprecations.js b/smoke-test/tests/cypress/cypress/e2e/mutations/deprecations.js index 1d41d155440e8..2fa11654a3c3e 100644 --- a/smoke-test/tests/cypress/cypress/e2e/mutations/deprecations.js +++ b/smoke-test/tests/cypress/cypress/e2e/mutations/deprecations.js @@ -1,19 +1,29 @@ -describe("deprecation", () => { +describe("dataset deprecation", () => { it("go to dataset and check deprecation works", () => { const urn = "urn:li:dataset:(urn:li:dataPlatform:hive,cypress_logging_events,PROD)"; const datasetName = "cypress_logging_events"; cy.login(); - cy.goToDataset(urn, datasetName); cy.openThreeDotDropdown(); cy.clickOptionWithText("Mark as deprecated"); cy.addViaFormModal("test deprecation", "Add Deprecation Details"); - - cy.goToDataset(urn, datasetName); - cy.contains("DEPRECATED"); - + cy.waitTextVisible("Deprecation Updated"); + cy.waitTextVisible("DEPRECATED") cy.openThreeDotDropdown(); cy.clickOptionWithText("Mark as un-deprecated"); + cy.waitTextVisible("Deprecation Updated"); + cy.ensureTextNotPresent("DEPRECATED"); + cy.openThreeDotDropdown(); + cy.clickOptionWithText("Mark as deprecated"); + cy.addViaFormModal("test deprecation", "Add Deprecation Details"); + cy.waitTextVisible("Deprecation Updated"); + cy.waitTextVisible("DEPRECATED"); + cy.contains("DEPRECATED").trigger("mouseover", { force: true }); + cy.waitTextVisible("Deprecation note"); + cy.get("[role='tooltip']").contains("Mark as un-deprecated").click(); + cy.waitTextVisible("Confirm Mark as un-deprecated"); + cy.get("button").contains("Yes").click(); + cy.waitTextVisible("Marked assets as un-deprecated!"); cy.ensureTextNotPresent("DEPRECATED"); - }); + }); });