diff --git a/datahub-graphql-core/src/main/resources/search.graphql b/datahub-graphql-core/src/main/resources/search.graphql index f15535bfb4eb8..fbea66f738955 100644 --- a/datahub-graphql-core/src/main/resources/search.graphql +++ b/datahub-graphql-core/src/main/resources/search.graphql @@ -448,6 +448,11 @@ enum FilterOperator { * Represent the relation: String field is one of the array values to, e.g. name in ["Profile", "Event"] """ IN + + """ + Represents the relation: The field exists. If the field is an array, the field is either not present or empty. + """ + EXISTS } """ diff --git a/datahub-web-react/src/app/preview/DefaultPreviewCard.tsx b/datahub-web-react/src/app/preview/DefaultPreviewCard.tsx index f776082e3f905..36713cfb7ffcf 100644 --- a/datahub-web-react/src/app/preview/DefaultPreviewCard.tsx +++ b/datahub-web-react/src/app/preview/DefaultPreviewCard.tsx @@ -296,7 +296,7 @@ export default function DefaultPreviewCard({ {deprecation?.deprecated && ( )} - {health && health.length > 0 && } + {health && health.length > 0 ? : null} {externalUrl && ( initialValues?.includes(agg?.entity?.urn || ''))?.entity || null - } - onModalClose={onCloseModal} - onOkOverride={(dataProductUrn) => { - onSelect([dataProductUrn]); - onCloseModal(); - }} - /> - ); - } - if (filterField === CONTAINER_FILTER_NAME) { return ( List[SchemaField]: field.description = col.comment schema_fields[idx] = field else: + tags = [] + if col.is_partition_column: + tags.append( + TagAssociationClass(make_tag_urn(Constants.TAG_PARTITION_KEY)) + ) + + if col.cluster_column_position is not None: + tags.append( + TagAssociationClass( + make_tag_urn( + f"{CLUSTERING_COLUMN_TAG}_{col.cluster_column_position}" + ) + ) + ) + field = SchemaField( fieldPath=col.name, type=SchemaFieldDataType( @@ -1160,15 +1176,7 @@ def gen_schema_fields(self, columns: List[BigqueryColumn]) -> List[SchemaField]: nativeDataType=col.data_type, description=col.comment, nullable=col.is_nullable, - globalTags=GlobalTagsClass( - tags=[ - TagAssociationClass( - make_tag_urn(Constants.TAG_PARTITION_KEY) - ) - ] - ) - if col.is_partition_column - else GlobalTagsClass(tags=[]), + globalTags=GlobalTagsClass(tags=tags), ) schema_fields.append(field) last_id = col.ordinal_position diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_schema.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_schema.py index 2450dbd0e2391..f8256f8e6fed6 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_schema.py +++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_schema.py @@ -33,6 +33,7 @@ class BigqueryTableType: class BigqueryColumn(BaseColumn): field_path: str is_partition_column: bool + cluster_column_position: Optional[int] RANGE_PARTITION_NAME: str = "RANGE" @@ -285,7 +286,8 @@ class BigqueryQuery: CASE WHEN CONTAINS_SUBSTR(field_path, ".") THEN NULL ELSE c.data_type END as data_type, description as comment, c.is_hidden as is_hidden, - c.is_partitioning_column as is_partitioning_column + c.is_partitioning_column as is_partitioning_column, + c.clustering_ordinal_position as clustering_ordinal_position, from `{project_id}`.`{dataset_name}`.INFORMATION_SCHEMA.COLUMNS c join `{project_id}`.`{dataset_name}`.INFORMATION_SCHEMA.COLUMN_FIELD_PATHS as cfp on cfp.table_name = c.table_name @@ -307,6 +309,7 @@ class BigqueryQuery: description as comment, c.is_hidden as is_hidden, c.is_partitioning_column as is_partitioning_column, + c.clustering_ordinal_position as clustering_ordinal_position, -- We count the columns to be able limit it later row_number() over (partition by c.table_catalog, c.table_schema, c.table_name order by c.ordinal_position asc, c.data_type DESC) as column_num, -- Getting the maximum shard for each table @@ -333,6 +336,7 @@ class BigqueryQuery: CASE WHEN CONTAINS_SUBSTR(field_path, ".") THEN NULL ELSE c.data_type END as data_type, c.is_hidden as is_hidden, c.is_partitioning_column as is_partitioning_column, + c.clustering_ordinal_position as clustering_ordinal_position, description as comment from `{table_identifier.project_id}`.`{table_identifier.dataset}`.INFORMATION_SCHEMA.COLUMNS as c @@ -583,6 +587,7 @@ def get_columns_for_dataset( data_type=column.data_type, comment=column.comment, is_partition_column=column.is_partitioning_column == "YES", + cluster_column_position=column.clustering_ordinal_position, ) ) @@ -621,6 +626,7 @@ def get_columns_for_table( data_type=column.data_type, comment=column.comment, is_partition_column=column.is_partitioning_column == "YES", + cluster_column_position=column.clustering_ordinal_position, ) ) last_seen_table = column.table_name diff --git a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_cloud.py b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_cloud.py index 1cd5ed8164854..af9769bc9d94c 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_cloud.py +++ b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_cloud.py @@ -162,9 +162,11 @@ class DBTCloudConfig(DBTCommonConfig): } _DBT_GRAPHQL_QUERY = """ -query DatahubMetadataQuery_{type}($jobId: Int!, $runId: Int) {{ - {type}(jobId: $jobId, runId: $runId) {{ +query DatahubMetadataQuery_{type}($jobId: BigInt!, $runId: BigInt) {{ + job(id: $jobId, runId: $runId) {{ + {type} {{ {fields} + }} }} }} """ @@ -218,7 +220,7 @@ def load_nodes(self) -> Tuple[List[DBTNode], Dict[str, Optional[str]]]: }, ) - raw_nodes.extend(data[node_type]) + raw_nodes.extend(data["job"][node_type]) nodes = [self._parse_into_dbt_node(node) for node in raw_nodes] diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_query.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_query.py index 039eac1e93819..587c71a98be67 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_query.py +++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_query.py @@ -1,5 +1,6 @@ from typing import List, Optional +from datahub.configuration.time_window_config import BucketDuration from datahub.ingestion.source.snowflake.constants import SnowflakeObjectDomain from datahub.ingestion.source.snowflake.snowflake_config import DEFAULT_TABLES_DENY_LIST @@ -575,14 +576,17 @@ def get_access_history_date_range() -> str: def usage_per_object_per_time_bucket_for_time_window( start_time_millis: int, end_time_millis: int, - time_bucket_size: str, + time_bucket_size: BucketDuration, use_base_objects: bool, top_n_queries: int, include_top_n_queries: bool, ) -> str: if not include_top_n_queries: top_n_queries = 0 - assert time_bucket_size == "DAY" or time_bucket_size == "HOUR" + assert ( + time_bucket_size == BucketDuration.DAY + or time_bucket_size == BucketDuration.HOUR + ) objects_column = ( "BASE_OBJECTS_ACCESSED" if use_base_objects else "DIRECT_OBJECTS_ACCESSED" ) @@ -629,7 +633,7 @@ def usage_per_object_per_time_bucket_for_time_window( SELECT object_name, ANY_VALUE(object_domain) AS object_domain, - DATE_TRUNC('{time_bucket_size}', CONVERT_TIMEZONE('UTC', query_start_time)) AS bucket_start_time, + DATE_TRUNC('{time_bucket_size.value}', CONVERT_TIMEZONE('UTC', query_start_time)) AS bucket_start_time, count(distinct(query_id)) AS total_queries, count( distinct(user_name) ) AS total_users FROM @@ -644,7 +648,7 @@ def usage_per_object_per_time_bucket_for_time_window( SELECT object_name, column_name, - DATE_TRUNC('{time_bucket_size}', CONVERT_TIMEZONE('UTC', query_start_time)) AS bucket_start_time, + DATE_TRUNC('{time_bucket_size.value}', CONVERT_TIMEZONE('UTC', query_start_time)) AS bucket_start_time, count(distinct(query_id)) AS total_queries FROM field_access_history @@ -658,7 +662,7 @@ def usage_per_object_per_time_bucket_for_time_window( ( SELECT object_name, - DATE_TRUNC('{time_bucket_size}', CONVERT_TIMEZONE('UTC', query_start_time)) AS bucket_start_time, + DATE_TRUNC('{time_bucket_size.value}', CONVERT_TIMEZONE('UTC', query_start_time)) AS bucket_start_time, count(distinct(query_id)) AS total_queries, user_name, ANY_VALUE(users.email) AS user_email @@ -677,7 +681,7 @@ def usage_per_object_per_time_bucket_for_time_window( ( SELECT object_name, - DATE_TRUNC('{time_bucket_size}', CONVERT_TIMEZONE('UTC', query_start_time)) AS bucket_start_time, + DATE_TRUNC('{time_bucket_size.value}', CONVERT_TIMEZONE('UTC', query_start_time)) AS bucket_start_time, query_history.query_text AS query_text, count(distinct(access_history.query_id)) AS total_queries FROM diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_usage_v2.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_usage_v2.py index 3605205b6055c..f8dfa612952d8 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_usage_v2.py +++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_usage_v2.py @@ -356,7 +356,6 @@ def _check_usage_date_ranges(self) -> Any: def _get_operation_aspect_work_unit( self, event: SnowflakeJoinedAccessEvent, discovered_datasets: List[str] ) -> Iterable[MetadataWorkUnit]: - if event.query_start_time and event.query_type: start_time = event.query_start_time query_type = event.query_type diff --git a/metadata-ingestion/tests/unit/test_bigquery_profiler.py b/metadata-ingestion/tests/unit/test_bigquery_profiler.py index a2aec8df93d09..44ce5f0a02e37 100644 --- a/metadata-ingestion/tests/unit/test_bigquery_profiler.py +++ b/metadata-ingestion/tests/unit/test_bigquery_profiler.py @@ -37,6 +37,7 @@ def test_generate_day_partitioned_partition_profiler_query(): ordinal_position=1, data_type="TIMESTAMP", is_partition_column=True, + cluster_column_position=None, comment=None, is_nullable=False, ) @@ -79,6 +80,7 @@ def test_generate_day_partitioned_partition_profiler_query_with_set_partition_ti ordinal_position=1, data_type="TIMESTAMP", is_partition_column=True, + cluster_column_position=None, comment=None, is_nullable=False, ) @@ -120,6 +122,7 @@ def test_generate_hour_partitioned_partition_profiler_query(): ordinal_position=1, data_type="TIMESTAMP", is_partition_column=True, + cluster_column_position=None, comment=None, is_nullable=False, ) diff --git a/metadata-io/src/test/java/com/linkedin/metadata/ESSampleDataFixture.java b/metadata-io/src/test/java/com/linkedin/metadata/ESSampleDataFixture.java index 847029bc180eb..20501225ef787 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/ESSampleDataFixture.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/ESSampleDataFixture.java @@ -54,6 +54,13 @@ @TestConfiguration @Import(ESTestConfiguration.class) public class ESSampleDataFixture { + /** + * Interested in adding more fixtures? Here's what you will need to update? + * 1. Create a new indexPrefix and FixtureName. Both are needed or else all fixtures will load on top of each other, + * overwriting each other + * 2. Create a new IndexConvention, IndexBuilder, and EntityClient. These are needed + * to index a different set of entities. + */ @Autowired private ESBulkProcessor _bulkProcessor; @@ -61,6 +68,9 @@ public class ESSampleDataFixture { @Autowired private RestHighLevelClient _searchClient; + @Autowired + private RestHighLevelClient _longTailSearchClient; + @Autowired private SearchConfiguration _searchConfiguration; @@ -68,24 +78,54 @@ public class ESSampleDataFixture { private CustomSearchConfiguration _customSearchConfiguration; @Bean(name = "sampleDataPrefix") - protected String indexPrefix() { + protected String sampleDataPrefix() { return "smpldat"; } + @Bean(name = "longTailPrefix") + protected String longTailIndexPrefix() { + return "lngtl"; + } + @Bean(name = "sampleDataIndexConvention") protected IndexConvention indexConvention(@Qualifier("sampleDataPrefix") String prefix) { return new IndexConventionImpl(prefix); } + @Bean(name = "longTailIndexConvention") + protected IndexConvention longTailIndexConvention(@Qualifier("longTailPrefix") String prefix) { + return new IndexConventionImpl(prefix); + } + @Bean(name = "sampleDataFixtureName") - protected String fixtureName() { + protected String sampleDataFixtureName() { return "sample_data"; } + @Bean(name = "longTailFixtureName") + protected String longTailFixtureName() { + return "long_tail"; + } + @Bean(name = "sampleDataEntityIndexBuilders") protected EntityIndexBuilders entityIndexBuilders( @Qualifier("entityRegistry") EntityRegistry entityRegistry, @Qualifier("sampleDataIndexConvention") IndexConvention indexConvention + ) { + return entityIndexBuildersHelper(entityRegistry, indexConvention); + } + + @Bean(name = "longTailEntityIndexBuilders") + protected EntityIndexBuilders longTailEntityIndexBuilders( + @Qualifier("longTailEntityRegistry") EntityRegistry longTailEntityRegistry, + @Qualifier("longTailIndexConvention") IndexConvention indexConvention + ) { + return entityIndexBuildersHelper(longTailEntityRegistry, indexConvention); + } + + protected EntityIndexBuilders entityIndexBuildersHelper( + EntityRegistry entityRegistry, + IndexConvention indexConvention ) { GitVersion gitVersion = new GitVersion("0.0.0-test", "123456", Optional.empty()); ESIndexBuilder indexBuilder = new ESIndexBuilder(_searchClient, 1, 0, 1, @@ -100,6 +140,23 @@ protected ElasticSearchService entitySearchService( @Qualifier("entityRegistry") EntityRegistry entityRegistry, @Qualifier("sampleDataEntityIndexBuilders") EntityIndexBuilders indexBuilders, @Qualifier("sampleDataIndexConvention") IndexConvention indexConvention + ) throws IOException { + return entitySearchServiceHelper(entityRegistry, indexBuilders, indexConvention); + } + + @Bean(name = "longTailEntitySearchService") + protected ElasticSearchService longTailEntitySearchService( + @Qualifier("longTailEntityRegistry") EntityRegistry longTailEntityRegistry, + @Qualifier("longTailEntityIndexBuilders") EntityIndexBuilders longTailEndexBuilders, + @Qualifier("longTailIndexConvention") IndexConvention longTailIndexConvention + ) throws IOException { + return entitySearchServiceHelper(longTailEntityRegistry, longTailEndexBuilders, longTailIndexConvention); + } + + protected ElasticSearchService entitySearchServiceHelper( + EntityRegistry entityRegistry, + EntityIndexBuilders indexBuilders, + IndexConvention indexConvention ) throws IOException { CustomConfiguration customConfiguration = new CustomConfiguration(); customConfiguration.setEnabled(true); @@ -107,7 +164,7 @@ protected ElasticSearchService entitySearchService( CustomSearchConfiguration customSearchConfiguration = customConfiguration.resolve(new YAMLMapper()); ESSearchDAO searchDAO = new ESSearchDAO(entityRegistry, _searchClient, indexConvention, false, - ELASTICSEARCH_IMPLEMENTATION_ELASTICSEARCH, _searchConfiguration, customSearchConfiguration); + ELASTICSEARCH_IMPLEMENTATION_ELASTICSEARCH, _searchConfiguration, customSearchConfiguration); ESBrowseDAO browseDAO = new ESBrowseDAO(entityRegistry, _searchClient, indexConvention, _searchConfiguration, _customSearchConfiguration); ESWriteDAO writeDAO = new ESWriteDAO(entityRegistry, _searchClient, indexConvention, _bulkProcessor, 1); return new ElasticSearchService(indexBuilders, searchDAO, browseDAO, writeDAO); @@ -120,9 +177,30 @@ protected SearchService searchService( @Qualifier("sampleDataEntitySearchService") ElasticSearchService entitySearchService, @Qualifier("sampleDataEntityIndexBuilders") EntityIndexBuilders indexBuilders, @Qualifier("sampleDataPrefix") String prefix, - @Qualifier("sampleDataFixtureName") String fixtureName + @Qualifier("sampleDataFixtureName") String sampleDataFixtureName ) throws IOException { + return searchServiceHelper(entityRegistry, entitySearchService, indexBuilders, prefix, sampleDataFixtureName); + } + @Bean(name = "longTailSearchService") + @Nonnull + protected SearchService longTailSearchService( + @Qualifier("longTailEntityRegistry") EntityRegistry longTailEntityRegistry, + @Qualifier("longTailEntitySearchService") ElasticSearchService longTailEntitySearchService, + @Qualifier("longTailEntityIndexBuilders") EntityIndexBuilders longTailIndexBuilders, + @Qualifier("longTailPrefix") String longTailPrefix, + @Qualifier("longTailFixtureName") String longTailFixtureName + ) throws IOException { + return searchServiceHelper(longTailEntityRegistry, longTailEntitySearchService, longTailIndexBuilders, longTailPrefix, longTailFixtureName); + } + + public SearchService searchServiceHelper( + EntityRegistry entityRegistry, + ElasticSearchService entitySearchService, + EntityIndexBuilders indexBuilders, + String prefix, + String fixtureName + ) throws IOException { int batchSize = 100; SearchRanker ranker = new SimpleRanker(); CacheManager cacheManager = new ConcurrentMapCacheManager(); @@ -159,6 +237,24 @@ protected EntityClient entityClient( @Qualifier("sampleDataSearchService") SearchService searchService, @Qualifier("sampleDataEntitySearchService") ElasticSearchService entitySearchService, @Qualifier("entityRegistry") EntityRegistry entityRegistry + ) { + return entityClientHelper(searchService, entitySearchService, entityRegistry); + } + + @Bean(name = "longTailEntityClient") + @Nonnull + protected EntityClient longTailEntityClient( + @Qualifier("sampleDataSearchService") SearchService searchService, + @Qualifier("sampleDataEntitySearchService") ElasticSearchService entitySearchService, + @Qualifier("longTailEntityRegistry") EntityRegistry longTailEntityRegistry + ) { + return entityClientHelper(searchService, entitySearchService, longTailEntityRegistry); + } + + private EntityClient entityClientHelper( + SearchService searchService, + ElasticSearchService entitySearchService, + EntityRegistry entityRegistry ) { CachingEntitySearchService cachingEntitySearchService = new CachingEntitySearchService( new ConcurrentMapCacheManager(), @@ -173,7 +269,7 @@ protected EntityClient entityClient( preProcessHooks.setUiEnabled(true); return new JavaEntityClient( new EntityServiceImpl(mockAspectDao, null, entityRegistry, true, null, - preProcessHooks), + preProcessHooks), null, entitySearchService, cachingEntitySearchService, diff --git a/metadata-io/src/test/java/com/linkedin/metadata/ESTestConfiguration.java b/metadata-io/src/test/java/com/linkedin/metadata/ESTestConfiguration.java index 0d7ac506599af..1e5b860b581fc 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/ESTestConfiguration.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/ESTestConfiguration.java @@ -137,4 +137,10 @@ public EntityRegistry entityRegistry() throws EntityRegistryException { return new ConfigEntityRegistry( ESTestConfiguration.class.getClassLoader().getResourceAsStream("entity-registry.yml")); } + + @Bean(name = "longTailEntityRegistry") + public EntityRegistry longTailEntityRegistry() throws EntityRegistryException { + return new ConfigEntityRegistry( + ESTestConfiguration.class.getClassLoader().getResourceAsStream("entity-registry.yml")); + } } diff --git a/metadata-io/src/test/java/com/linkedin/metadata/ESTestUtils.java b/metadata-io/src/test/java/com/linkedin/metadata/ESTestUtils.java index 79496888650e1..45c4c16864b07 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/ESTestUtils.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/ESTestUtils.java @@ -77,6 +77,11 @@ public static SearchResult searchAcrossEntities(SearchService searchService, Str 100, new SearchFlags().setFulltext(true).setSkipCache(true), facets); } + public static SearchResult searchAcrossCustomEntities(SearchService searchService, String query, List searchableEntities) { + return searchService.searchAcrossEntities(searchableEntities, query, null, null, 0, + 100, new SearchFlags().setFulltext(true).setSkipCache(true)); + } + public static SearchResult search(SearchService searchService, String query) { return search(searchService, SEARCHABLE_ENTITIES, query); } diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/fixtures/ElasticSearchGoldenTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/fixtures/ElasticSearchGoldenTest.java new file mode 100644 index 0000000000000..cc0d9dca6ae5f --- /dev/null +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/fixtures/ElasticSearchGoldenTest.java @@ -0,0 +1,143 @@ +package com.linkedin.metadata.search.elasticsearch.fixtures; + +import com.linkedin.common.urn.Urn; +import com.linkedin.datahub.graphql.generated.EntityType; +import com.linkedin.datahub.graphql.resolvers.EntityTypeMapper; +import com.linkedin.entity.client.EntityClient; +import com.linkedin.metadata.ESSampleDataFixture; +import com.linkedin.metadata.models.registry.EntityRegistry; +import com.linkedin.metadata.search.MatchedFieldArray; +import com.linkedin.metadata.search.SearchEntityArray; +import com.linkedin.metadata.search.SearchResult; +import com.linkedin.metadata.search.SearchService; +import org.elasticsearch.client.RestHighLevelClient; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.beans.factory.annotation.Qualifier; +import org.springframework.context.annotation.Import; +import org.springframework.test.context.testng.AbstractTestNGSpringContextTests; +import org.testng.annotations.Test; + +import java.util.List; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +import static com.linkedin.metadata.ESTestUtils.*; +import static org.testng.Assert.assertTrue; +import static org.testng.AssertJUnit.*; + +@Import(ESSampleDataFixture.class) +public class ElasticSearchGoldenTest extends AbstractTestNGSpringContextTests { + + private static final List SEARCHABLE_LONGTAIL_ENTITIES = Stream.of(EntityType.CHART, EntityType.CONTAINER, + EntityType.DASHBOARD, EntityType.DATASET, EntityType.DOMAIN, EntityType.TAG + ).map(EntityTypeMapper::getName) + .collect(Collectors.toList()); + @Autowired + private RestHighLevelClient _searchClient; + + @Autowired + @Qualifier("longTailSearchService") + protected SearchService searchService; + + @Autowired + @Qualifier("longTailEntityClient") + protected EntityClient entityClient; + + @Autowired + @Qualifier("longTailEntityRegistry") + private EntityRegistry entityRegistry; + + @Test + public void testNameMatchPetProfiles() { + /* + Searching for "pet profiles" should return "pet_profiles" as the first 2 search results + */ + assertNotNull(searchService); + assertNotNull(entityRegistry); + SearchResult searchResult = searchAcrossCustomEntities(searchService, "pet profiles", SEARCHABLE_LONGTAIL_ENTITIES); + assertTrue(searchResult.getEntities().size() >= 2); + Urn firstResultUrn = searchResult.getEntities().get(0).getEntity(); + Urn secondResultUrn = searchResult.getEntities().get(1).getEntity(); + + assertTrue(firstResultUrn.toString().contains("pet_profiles")); + assertTrue(secondResultUrn.toString().contains("pet_profiles")); + } + + @Test + public void testNameMatchPetProfile() { + /* + Searching for "pet profile" should return "pet_profiles" as the first 2 search results + */ + assertNotNull(searchService); + SearchResult searchResult = searchAcrossEntities(searchService, "pet profile", SEARCHABLE_LONGTAIL_ENTITIES); + assertTrue(searchResult.getEntities().size() >= 2); + Urn firstResultUrn = searchResult.getEntities().get(0).getEntity(); + Urn secondResultUrn = searchResult.getEntities().get(1).getEntity(); + + assertTrue(firstResultUrn.toString().contains("pet_profiles")); + assertTrue(secondResultUrn.toString().contains("pet_profiles")); + } + + @Test + public void testNameMatchMemberInWorkspace() { + /* + Searching for "collaborative actionitems" should return "collaborative_actionitems" as the first search + result, followed by "collaborative_actionitems_old" + */ + assertNotNull(searchService); + SearchResult searchResult = searchAcrossEntities(searchService, "collaborative actionitems", SEARCHABLE_LONGTAIL_ENTITIES); + assertTrue(searchResult.getEntities().size() >= 2); + Urn firstResultUrn = searchResult.getEntities().get(0).getEntity(); + Urn secondResultUrn = searchResult.getEntities().get(1).getEntity(); + + // Checks that the table name is not suffixed with anything + assertTrue(firstResultUrn.toString().contains("collaborative_actionitems,")); + assertTrue(secondResultUrn.toString().contains("collaborative_actionitems_old")); + } + + @Test + public void testGlossaryTerms() { + /* + Searching for "ReturnRate" should return all tables that have the glossary term applied before + anything else + */ + assertNotNull(searchService); + SearchResult searchResult = searchAcrossEntities(searchService, "ReturnRate", SEARCHABLE_LONGTAIL_ENTITIES); + SearchEntityArray entities = searchResult.getEntities(); + assertTrue(searchResult.getEntities().size() >= 4); + MatchedFieldArray firstResultMatchedFields = entities.get(0).getMatchedFields(); + MatchedFieldArray secondResultMatchedFields = entities.get(1).getMatchedFields(); + MatchedFieldArray thirdResultMatchedFields = entities.get(2).getMatchedFields(); + MatchedFieldArray fourthResultMatchedFields = entities.get(3).getMatchedFields(); + + assertTrue(firstResultMatchedFields.toString().contains("ReturnRate")); + assertTrue(secondResultMatchedFields.toString().contains("ReturnRate")); + assertTrue(thirdResultMatchedFields.toString().contains("ReturnRate")); + assertTrue(fourthResultMatchedFields.toString().contains("ReturnRate")); + } + + /** + * + * The test below should be added back in as improvements are made to search, + * via the linked tickets. + * + **/ + + // TODO: enable once PFP-481 is complete + @Test(enabled = false) + public void testNameMatchPartiallyQualified() { + /* + Searching for "analytics.pet_details" (partially qualified) should return the fully qualified table + name as the first search results before any others + */ + assertNotNull(searchService); + SearchResult searchResult = searchAcrossEntities(searchService, "analytics.pet_details", SEARCHABLE_LONGTAIL_ENTITIES); + assertTrue(searchResult.getEntities().size() >= 2); + Urn firstResultUrn = searchResult.getEntities().get(0).getEntity(); + Urn secondResultUrn = searchResult.getEntities().get(1).getEntity(); + + assertTrue(firstResultUrn.toString().contains("snowflake,long_tail_companions.analytics.pet_details")); + assertTrue(secondResultUrn.toString().contains("dbt,long_tail_companions.analytics.pet_details")); + } + +} diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/fixtures/SampleDataFixtureTests.java b/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/fixtures/SampleDataFixtureTests.java index dada13bd6f479..2f1e48c18450d 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/fixtures/SampleDataFixtureTests.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/fixtures/SampleDataFixtureTests.java @@ -82,6 +82,7 @@ public class SampleDataFixtureTests extends AbstractTestNGSpringContextTests { protected EntityClient entityClient; @Autowired + @Qualifier("entityRegistry") private EntityRegistry entityRegistry; @Test diff --git a/smoke-test/tests/cypress/cypress/e2e/lineage/lineage_column_level.js b/smoke-test/tests/cypress/cypress/e2e/lineage/lineage_column_level.js new file mode 100644 index 0000000000000..2a8fe045f154e --- /dev/null +++ b/smoke-test/tests/cypress/cypress/e2e/lineage/lineage_column_level.js @@ -0,0 +1,51 @@ +const DATASET_ENTITY_TYPE = 'dataset'; +const DATASET_URN = 'urn:li:dataset:(urn:li:dataPlatform:hive,SampleCypressHiveDataset,PROD)'; + +describe("column-level lineage graph test", () => { + + it("navigate to lineage graph view and verify that column-level lineage is showing correctly", () => { + cy.login(); + cy.goToEntityLineageGraph(DATASET_ENTITY_TYPE, DATASET_URN); + //verify columns not shown by default + cy.waitTextVisible("SampleCypressHdfs"); + cy.waitTextVisible("SampleCypressHive"); + cy.waitTextVisible("cypress_logging"); + cy.ensureTextNotPresent("shipment_info"); + cy.ensureTextNotPresent("field_foo"); + cy.ensureTextNotPresent("field_baz"); + cy.ensureTextNotPresent("event_name"); + cy.ensureTextNotPresent("event_data"); + cy.ensureTextNotPresent("timestamp"); + cy.ensureTextNotPresent("browser"); + cy.clickOptionWithTestId("column-toggle") + //verify columns appear and belong co correct dataset + cy.waitTextVisible("shipment_info"); + cy.waitTextVisible("shipment_info.date"); + cy.waitTextVisible("shipment_info.target"); + cy.waitTextVisible("shipment_info.destination"); + cy.waitTextVisible("shipment_info.geo_info"); + cy.waitTextVisible("field_foo"); + cy.waitTextVisible("field_baz"); + cy.waitTextVisible("event_name"); + cy.waitTextVisible("event_data"); + cy.waitTextVisible("timestamp"); + cy.waitTextVisible("browser"); + //verify columns can be hidden and shown again + cy.contains("Hide").click({ force:true }); + cy.ensureTextNotPresent("field_foo"); + cy.ensureTextNotPresent("field_baz"); + cy.get("[aria-label='down']").eq(1).click({ force:true }); + cy.waitTextVisible("field_foo"); + cy.waitTextVisible("field_baz"); + //verify columns can be disabled successfully + cy.clickOptionWithTestId("column-toggle") + cy.ensureTextNotPresent("shipment_info"); + cy.ensureTextNotPresent("field_foo"); + cy.ensureTextNotPresent("field_baz"); + cy.ensureTextNotPresent("event_name"); + cy.ensureTextNotPresent("event_data"); + cy.ensureTextNotPresent("timestamp"); + cy.ensureTextNotPresent("browser"); + }); + +}); \ No newline at end of file diff --git a/smoke-test/tests/cypress/cypress/e2e/mutations/deprecations.js b/smoke-test/tests/cypress/cypress/e2e/mutations/deprecations.js index 1d41d155440e8..2fa11654a3c3e 100644 --- a/smoke-test/tests/cypress/cypress/e2e/mutations/deprecations.js +++ b/smoke-test/tests/cypress/cypress/e2e/mutations/deprecations.js @@ -1,19 +1,29 @@ -describe("deprecation", () => { +describe("dataset deprecation", () => { it("go to dataset and check deprecation works", () => { const urn = "urn:li:dataset:(urn:li:dataPlatform:hive,cypress_logging_events,PROD)"; const datasetName = "cypress_logging_events"; cy.login(); - cy.goToDataset(urn, datasetName); cy.openThreeDotDropdown(); cy.clickOptionWithText("Mark as deprecated"); cy.addViaFormModal("test deprecation", "Add Deprecation Details"); - - cy.goToDataset(urn, datasetName); - cy.contains("DEPRECATED"); - + cy.waitTextVisible("Deprecation Updated"); + cy.waitTextVisible("DEPRECATED") cy.openThreeDotDropdown(); cy.clickOptionWithText("Mark as un-deprecated"); + cy.waitTextVisible("Deprecation Updated"); + cy.ensureTextNotPresent("DEPRECATED"); + cy.openThreeDotDropdown(); + cy.clickOptionWithText("Mark as deprecated"); + cy.addViaFormModal("test deprecation", "Add Deprecation Details"); + cy.waitTextVisible("Deprecation Updated"); + cy.waitTextVisible("DEPRECATED"); + cy.contains("DEPRECATED").trigger("mouseover", { force: true }); + cy.waitTextVisible("Deprecation note"); + cy.get("[role='tooltip']").contains("Mark as un-deprecated").click(); + cy.waitTextVisible("Confirm Mark as un-deprecated"); + cy.get("button").contains("Yes").click(); + cy.waitTextVisible("Marked assets as un-deprecated!"); cy.ensureTextNotPresent("DEPRECATED"); - }); + }); }); diff --git a/smoke-test/tests/cypress/cypress/e2e/mutations/edit_documentation.js b/smoke-test/tests/cypress/cypress/e2e/mutations/edit_documentation.js new file mode 100644 index 0000000000000..1f40cdf602062 --- /dev/null +++ b/smoke-test/tests/cypress/cypress/e2e/mutations/edit_documentation.js @@ -0,0 +1,71 @@ +const test_id = Math.floor(Math.random() * 100000); +const documentation_edited = `This is test${test_id} documentation EDITED`; +const wrong_url = "https://www.linkedincom"; +const correct_url = "https://www.linkedin.com"; + +describe("edit documentation and link to dataset", () => { + + it("open test dataset page, edit documentation", () => { + //edit documentation and verify changes saved + cy.loginWithCredentials(); + cy.visit("/dataset/urn:li:dataset:(urn:li:dataPlatform:hive,SampleCypressHiveDataset,PROD)/Schema"); + cy.get("[role='tab']").contains("Documentation").click(); + cy.waitTextVisible("my hive dataset"); + cy.waitTextVisible("Sample doc"); + cy.clickOptionWithText("Edit"); + cy.focused().clear(); + cy.focused().type(documentation_edited); + cy.get("button").contains("Save").click(); + cy.waitTextVisible("Description Updated"); + cy.waitTextVisible(documentation_edited); + //return documentation to original state + cy.clickOptionWithText("Edit"); + cy.focused().clear().wait(1000); + cy.focused().type("my hive dataset"); + cy.get("button").contains("Save").click(); + cy.waitTextVisible("Description Updated"); + cy.waitTextVisible("my hive dataset"); + }); + + it("open test dataset page, remove and add dataset link", () => { + cy.loginWithCredentials(); + cy.visit("/dataset/urn:li:dataset:(urn:li:dataPlatform:hive,SampleCypressHiveDataset,PROD)/Schema"); + cy.get("[role='tab']").contains("Documentation").click(); + cy.contains("Sample doc").trigger("mouseover", { force: true }); + cy.get('[data-icon="delete"]').click(); + cy.waitTextVisible("Link Removed"); + cy.get("button").contains("Add Link").click(); + cy.get("#addLinkForm_url").type(wrong_url); + cy.waitTextVisible("This field must be a valid url."); + cy.focused().clear(); + cy.waitTextVisible("A URL is required."); + cy.focused().type(correct_url); + cy.ensureTextNotPresent("This field must be a valid url."); + cy.get("#addLinkForm_label").type("Sample doc"); + cy.get('[role="dialog"] button').contains("Add").click(); + cy.waitTextVisible("Link Added"); + cy.get("[role='tab']").contains("Documentation").click(); + cy.get(`[href='${correct_url}']`).should("be.visible"); + }); + + it("edit field documentation", () => { + cy.loginWithCredentials(); + cy.visit("/dataset/urn:li:dataset:(urn:li:dataPlatform:hive,SampleCypressHiveDataset,PROD)/Schema"); + cy.get("tbody [data-icon='edit']").first().click({ force: true }); + cy.waitTextVisible("Update description"); + cy.waitTextVisible("Foo field description has changed"); + cy.focused().clear().wait(1000); + cy.focused().type(documentation_edited); + cy.get("button").contains("Update").click(); + cy.waitTextVisible("Updated!"); + cy.waitTextVisible(documentation_edited); + cy.waitTextVisible("(edited)"); + cy.get("tbody [data-icon='edit']").first().click({ force: true }); + cy.focused().clear().wait(1000); + cy.focused().type("Foo field description has changed"); + cy.get("button").contains("Update").click(); + cy.waitTextVisible("Updated!"); + cy.waitTextVisible("Foo field description has changed"); + cy.waitTextVisible("(edited)"); + }); +}); \ No newline at end of file diff --git a/smoke-test/tests/cypress/data.json b/smoke-test/tests/cypress/data.json index c6606519e8d73..3b2ee1afaba58 100644 --- a/smoke-test/tests/cypress/data.json +++ b/smoke-test/tests/cypress/data.json @@ -2012,4 +2012,4 @@ }, "systemMetadata": null } -] +] \ No newline at end of file