diff --git a/datahub-graphql-core/src/main/resources/search.graphql b/datahub-graphql-core/src/main/resources/search.graphql
index f15535bfb4eb8..fbea66f738955 100644
--- a/datahub-graphql-core/src/main/resources/search.graphql
+++ b/datahub-graphql-core/src/main/resources/search.graphql
@@ -448,6 +448,11 @@ enum FilterOperator {
* Represent the relation: String field is one of the array values to, e.g. name in ["Profile", "Event"]
"""
IN
+
+ """
+ Represents the relation: The field exists. If the field is an array, the field is either not present or empty.
+ """
+ EXISTS
}
"""
diff --git a/datahub-web-react/src/app/preview/DefaultPreviewCard.tsx b/datahub-web-react/src/app/preview/DefaultPreviewCard.tsx
index f776082e3f905..36713cfb7ffcf 100644
--- a/datahub-web-react/src/app/preview/DefaultPreviewCard.tsx
+++ b/datahub-web-react/src/app/preview/DefaultPreviewCard.tsx
@@ -296,7 +296,7 @@ export default function DefaultPreviewCard({
{deprecation?.deprecated && (
)}
- {health && health.length > 0 && }
+ {health && health.length > 0 ? : null}
{externalUrl && (
initialValues?.includes(agg?.entity?.urn || ''))?.entity || null
- }
- onModalClose={onCloseModal}
- onOkOverride={(dataProductUrn) => {
- onSelect([dataProductUrn]);
- onCloseModal();
- }}
- />
- );
- }
-
if (filterField === CONTAINER_FILTER_NAME) {
return (
List[SchemaField]:
field.description = col.comment
schema_fields[idx] = field
else:
+ tags = []
+ if col.is_partition_column:
+ tags.append(
+ TagAssociationClass(make_tag_urn(Constants.TAG_PARTITION_KEY))
+ )
+
+ if col.cluster_column_position is not None:
+ tags.append(
+ TagAssociationClass(
+ make_tag_urn(
+ f"{CLUSTERING_COLUMN_TAG}_{col.cluster_column_position}"
+ )
+ )
+ )
+
field = SchemaField(
fieldPath=col.name,
type=SchemaFieldDataType(
@@ -1160,15 +1176,7 @@ def gen_schema_fields(self, columns: List[BigqueryColumn]) -> List[SchemaField]:
nativeDataType=col.data_type,
description=col.comment,
nullable=col.is_nullable,
- globalTags=GlobalTagsClass(
- tags=[
- TagAssociationClass(
- make_tag_urn(Constants.TAG_PARTITION_KEY)
- )
- ]
- )
- if col.is_partition_column
- else GlobalTagsClass(tags=[]),
+ globalTags=GlobalTagsClass(tags=tags),
)
schema_fields.append(field)
last_id = col.ordinal_position
diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_schema.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_schema.py
index 2450dbd0e2391..f8256f8e6fed6 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_schema.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_schema.py
@@ -33,6 +33,7 @@ class BigqueryTableType:
class BigqueryColumn(BaseColumn):
field_path: str
is_partition_column: bool
+ cluster_column_position: Optional[int]
RANGE_PARTITION_NAME: str = "RANGE"
@@ -285,7 +286,8 @@ class BigqueryQuery:
CASE WHEN CONTAINS_SUBSTR(field_path, ".") THEN NULL ELSE c.data_type END as data_type,
description as comment,
c.is_hidden as is_hidden,
- c.is_partitioning_column as is_partitioning_column
+ c.is_partitioning_column as is_partitioning_column,
+ c.clustering_ordinal_position as clustering_ordinal_position,
from
`{project_id}`.`{dataset_name}`.INFORMATION_SCHEMA.COLUMNS c
join `{project_id}`.`{dataset_name}`.INFORMATION_SCHEMA.COLUMN_FIELD_PATHS as cfp on cfp.table_name = c.table_name
@@ -307,6 +309,7 @@ class BigqueryQuery:
description as comment,
c.is_hidden as is_hidden,
c.is_partitioning_column as is_partitioning_column,
+ c.clustering_ordinal_position as clustering_ordinal_position,
-- We count the columns to be able limit it later
row_number() over (partition by c.table_catalog, c.table_schema, c.table_name order by c.ordinal_position asc, c.data_type DESC) as column_num,
-- Getting the maximum shard for each table
@@ -333,6 +336,7 @@ class BigqueryQuery:
CASE WHEN CONTAINS_SUBSTR(field_path, ".") THEN NULL ELSE c.data_type END as data_type,
c.is_hidden as is_hidden,
c.is_partitioning_column as is_partitioning_column,
+ c.clustering_ordinal_position as clustering_ordinal_position,
description as comment
from
`{table_identifier.project_id}`.`{table_identifier.dataset}`.INFORMATION_SCHEMA.COLUMNS as c
@@ -583,6 +587,7 @@ def get_columns_for_dataset(
data_type=column.data_type,
comment=column.comment,
is_partition_column=column.is_partitioning_column == "YES",
+ cluster_column_position=column.clustering_ordinal_position,
)
)
@@ -621,6 +626,7 @@ def get_columns_for_table(
data_type=column.data_type,
comment=column.comment,
is_partition_column=column.is_partitioning_column == "YES",
+ cluster_column_position=column.clustering_ordinal_position,
)
)
last_seen_table = column.table_name
diff --git a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_cloud.py b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_cloud.py
index 1cd5ed8164854..af9769bc9d94c 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_cloud.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_cloud.py
@@ -162,9 +162,11 @@ class DBTCloudConfig(DBTCommonConfig):
}
_DBT_GRAPHQL_QUERY = """
-query DatahubMetadataQuery_{type}($jobId: Int!, $runId: Int) {{
- {type}(jobId: $jobId, runId: $runId) {{
+query DatahubMetadataQuery_{type}($jobId: BigInt!, $runId: BigInt) {{
+ job(id: $jobId, runId: $runId) {{
+ {type} {{
{fields}
+ }}
}}
}}
"""
@@ -218,7 +220,7 @@ def load_nodes(self) -> Tuple[List[DBTNode], Dict[str, Optional[str]]]:
},
)
- raw_nodes.extend(data[node_type])
+ raw_nodes.extend(data["job"][node_type])
nodes = [self._parse_into_dbt_node(node) for node in raw_nodes]
diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_query.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_query.py
index 039eac1e93819..587c71a98be67 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_query.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_query.py
@@ -1,5 +1,6 @@
from typing import List, Optional
+from datahub.configuration.time_window_config import BucketDuration
from datahub.ingestion.source.snowflake.constants import SnowflakeObjectDomain
from datahub.ingestion.source.snowflake.snowflake_config import DEFAULT_TABLES_DENY_LIST
@@ -575,14 +576,17 @@ def get_access_history_date_range() -> str:
def usage_per_object_per_time_bucket_for_time_window(
start_time_millis: int,
end_time_millis: int,
- time_bucket_size: str,
+ time_bucket_size: BucketDuration,
use_base_objects: bool,
top_n_queries: int,
include_top_n_queries: bool,
) -> str:
if not include_top_n_queries:
top_n_queries = 0
- assert time_bucket_size == "DAY" or time_bucket_size == "HOUR"
+ assert (
+ time_bucket_size == BucketDuration.DAY
+ or time_bucket_size == BucketDuration.HOUR
+ )
objects_column = (
"BASE_OBJECTS_ACCESSED" if use_base_objects else "DIRECT_OBJECTS_ACCESSED"
)
@@ -629,7 +633,7 @@ def usage_per_object_per_time_bucket_for_time_window(
SELECT
object_name,
ANY_VALUE(object_domain) AS object_domain,
- DATE_TRUNC('{time_bucket_size}', CONVERT_TIMEZONE('UTC', query_start_time)) AS bucket_start_time,
+ DATE_TRUNC('{time_bucket_size.value}', CONVERT_TIMEZONE('UTC', query_start_time)) AS bucket_start_time,
count(distinct(query_id)) AS total_queries,
count( distinct(user_name) ) AS total_users
FROM
@@ -644,7 +648,7 @@ def usage_per_object_per_time_bucket_for_time_window(
SELECT
object_name,
column_name,
- DATE_TRUNC('{time_bucket_size}', CONVERT_TIMEZONE('UTC', query_start_time)) AS bucket_start_time,
+ DATE_TRUNC('{time_bucket_size.value}', CONVERT_TIMEZONE('UTC', query_start_time)) AS bucket_start_time,
count(distinct(query_id)) AS total_queries
FROM
field_access_history
@@ -658,7 +662,7 @@ def usage_per_object_per_time_bucket_for_time_window(
(
SELECT
object_name,
- DATE_TRUNC('{time_bucket_size}', CONVERT_TIMEZONE('UTC', query_start_time)) AS bucket_start_time,
+ DATE_TRUNC('{time_bucket_size.value}', CONVERT_TIMEZONE('UTC', query_start_time)) AS bucket_start_time,
count(distinct(query_id)) AS total_queries,
user_name,
ANY_VALUE(users.email) AS user_email
@@ -677,7 +681,7 @@ def usage_per_object_per_time_bucket_for_time_window(
(
SELECT
object_name,
- DATE_TRUNC('{time_bucket_size}', CONVERT_TIMEZONE('UTC', query_start_time)) AS bucket_start_time,
+ DATE_TRUNC('{time_bucket_size.value}', CONVERT_TIMEZONE('UTC', query_start_time)) AS bucket_start_time,
query_history.query_text AS query_text,
count(distinct(access_history.query_id)) AS total_queries
FROM
diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_usage_v2.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_usage_v2.py
index 3605205b6055c..f8dfa612952d8 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_usage_v2.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_usage_v2.py
@@ -356,7 +356,6 @@ def _check_usage_date_ranges(self) -> Any:
def _get_operation_aspect_work_unit(
self, event: SnowflakeJoinedAccessEvent, discovered_datasets: List[str]
) -> Iterable[MetadataWorkUnit]:
-
if event.query_start_time and event.query_type:
start_time = event.query_start_time
query_type = event.query_type
diff --git a/metadata-ingestion/tests/unit/test_bigquery_profiler.py b/metadata-ingestion/tests/unit/test_bigquery_profiler.py
index a2aec8df93d09..44ce5f0a02e37 100644
--- a/metadata-ingestion/tests/unit/test_bigquery_profiler.py
+++ b/metadata-ingestion/tests/unit/test_bigquery_profiler.py
@@ -37,6 +37,7 @@ def test_generate_day_partitioned_partition_profiler_query():
ordinal_position=1,
data_type="TIMESTAMP",
is_partition_column=True,
+ cluster_column_position=None,
comment=None,
is_nullable=False,
)
@@ -79,6 +80,7 @@ def test_generate_day_partitioned_partition_profiler_query_with_set_partition_ti
ordinal_position=1,
data_type="TIMESTAMP",
is_partition_column=True,
+ cluster_column_position=None,
comment=None,
is_nullable=False,
)
@@ -120,6 +122,7 @@ def test_generate_hour_partitioned_partition_profiler_query():
ordinal_position=1,
data_type="TIMESTAMP",
is_partition_column=True,
+ cluster_column_position=None,
comment=None,
is_nullable=False,
)
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/ESSampleDataFixture.java b/metadata-io/src/test/java/com/linkedin/metadata/ESSampleDataFixture.java
index 847029bc180eb..20501225ef787 100644
--- a/metadata-io/src/test/java/com/linkedin/metadata/ESSampleDataFixture.java
+++ b/metadata-io/src/test/java/com/linkedin/metadata/ESSampleDataFixture.java
@@ -54,6 +54,13 @@
@TestConfiguration
@Import(ESTestConfiguration.class)
public class ESSampleDataFixture {
+ /**
+ * Interested in adding more fixtures? Here's what you will need to update?
+ * 1. Create a new indexPrefix and FixtureName. Both are needed or else all fixtures will load on top of each other,
+ * overwriting each other
+ * 2. Create a new IndexConvention, IndexBuilder, and EntityClient. These are needed
+ * to index a different set of entities.
+ */
@Autowired
private ESBulkProcessor _bulkProcessor;
@@ -61,6 +68,9 @@ public class ESSampleDataFixture {
@Autowired
private RestHighLevelClient _searchClient;
+ @Autowired
+ private RestHighLevelClient _longTailSearchClient;
+
@Autowired
private SearchConfiguration _searchConfiguration;
@@ -68,24 +78,54 @@ public class ESSampleDataFixture {
private CustomSearchConfiguration _customSearchConfiguration;
@Bean(name = "sampleDataPrefix")
- protected String indexPrefix() {
+ protected String sampleDataPrefix() {
return "smpldat";
}
+ @Bean(name = "longTailPrefix")
+ protected String longTailIndexPrefix() {
+ return "lngtl";
+ }
+
@Bean(name = "sampleDataIndexConvention")
protected IndexConvention indexConvention(@Qualifier("sampleDataPrefix") String prefix) {
return new IndexConventionImpl(prefix);
}
+ @Bean(name = "longTailIndexConvention")
+ protected IndexConvention longTailIndexConvention(@Qualifier("longTailPrefix") String prefix) {
+ return new IndexConventionImpl(prefix);
+ }
+
@Bean(name = "sampleDataFixtureName")
- protected String fixtureName() {
+ protected String sampleDataFixtureName() {
return "sample_data";
}
+ @Bean(name = "longTailFixtureName")
+ protected String longTailFixtureName() {
+ return "long_tail";
+ }
+
@Bean(name = "sampleDataEntityIndexBuilders")
protected EntityIndexBuilders entityIndexBuilders(
@Qualifier("entityRegistry") EntityRegistry entityRegistry,
@Qualifier("sampleDataIndexConvention") IndexConvention indexConvention
+ ) {
+ return entityIndexBuildersHelper(entityRegistry, indexConvention);
+ }
+
+ @Bean(name = "longTailEntityIndexBuilders")
+ protected EntityIndexBuilders longTailEntityIndexBuilders(
+ @Qualifier("longTailEntityRegistry") EntityRegistry longTailEntityRegistry,
+ @Qualifier("longTailIndexConvention") IndexConvention indexConvention
+ ) {
+ return entityIndexBuildersHelper(longTailEntityRegistry, indexConvention);
+ }
+
+ protected EntityIndexBuilders entityIndexBuildersHelper(
+ EntityRegistry entityRegistry,
+ IndexConvention indexConvention
) {
GitVersion gitVersion = new GitVersion("0.0.0-test", "123456", Optional.empty());
ESIndexBuilder indexBuilder = new ESIndexBuilder(_searchClient, 1, 0, 1,
@@ -100,6 +140,23 @@ protected ElasticSearchService entitySearchService(
@Qualifier("entityRegistry") EntityRegistry entityRegistry,
@Qualifier("sampleDataEntityIndexBuilders") EntityIndexBuilders indexBuilders,
@Qualifier("sampleDataIndexConvention") IndexConvention indexConvention
+ ) throws IOException {
+ return entitySearchServiceHelper(entityRegistry, indexBuilders, indexConvention);
+ }
+
+ @Bean(name = "longTailEntitySearchService")
+ protected ElasticSearchService longTailEntitySearchService(
+ @Qualifier("longTailEntityRegistry") EntityRegistry longTailEntityRegistry,
+ @Qualifier("longTailEntityIndexBuilders") EntityIndexBuilders longTailEndexBuilders,
+ @Qualifier("longTailIndexConvention") IndexConvention longTailIndexConvention
+ ) throws IOException {
+ return entitySearchServiceHelper(longTailEntityRegistry, longTailEndexBuilders, longTailIndexConvention);
+ }
+
+ protected ElasticSearchService entitySearchServiceHelper(
+ EntityRegistry entityRegistry,
+ EntityIndexBuilders indexBuilders,
+ IndexConvention indexConvention
) throws IOException {
CustomConfiguration customConfiguration = new CustomConfiguration();
customConfiguration.setEnabled(true);
@@ -107,7 +164,7 @@ protected ElasticSearchService entitySearchService(
CustomSearchConfiguration customSearchConfiguration = customConfiguration.resolve(new YAMLMapper());
ESSearchDAO searchDAO = new ESSearchDAO(entityRegistry, _searchClient, indexConvention, false,
- ELASTICSEARCH_IMPLEMENTATION_ELASTICSEARCH, _searchConfiguration, customSearchConfiguration);
+ ELASTICSEARCH_IMPLEMENTATION_ELASTICSEARCH, _searchConfiguration, customSearchConfiguration);
ESBrowseDAO browseDAO = new ESBrowseDAO(entityRegistry, _searchClient, indexConvention, _searchConfiguration, _customSearchConfiguration);
ESWriteDAO writeDAO = new ESWriteDAO(entityRegistry, _searchClient, indexConvention, _bulkProcessor, 1);
return new ElasticSearchService(indexBuilders, searchDAO, browseDAO, writeDAO);
@@ -120,9 +177,30 @@ protected SearchService searchService(
@Qualifier("sampleDataEntitySearchService") ElasticSearchService entitySearchService,
@Qualifier("sampleDataEntityIndexBuilders") EntityIndexBuilders indexBuilders,
@Qualifier("sampleDataPrefix") String prefix,
- @Qualifier("sampleDataFixtureName") String fixtureName
+ @Qualifier("sampleDataFixtureName") String sampleDataFixtureName
) throws IOException {
+ return searchServiceHelper(entityRegistry, entitySearchService, indexBuilders, prefix, sampleDataFixtureName);
+ }
+ @Bean(name = "longTailSearchService")
+ @Nonnull
+ protected SearchService longTailSearchService(
+ @Qualifier("longTailEntityRegistry") EntityRegistry longTailEntityRegistry,
+ @Qualifier("longTailEntitySearchService") ElasticSearchService longTailEntitySearchService,
+ @Qualifier("longTailEntityIndexBuilders") EntityIndexBuilders longTailIndexBuilders,
+ @Qualifier("longTailPrefix") String longTailPrefix,
+ @Qualifier("longTailFixtureName") String longTailFixtureName
+ ) throws IOException {
+ return searchServiceHelper(longTailEntityRegistry, longTailEntitySearchService, longTailIndexBuilders, longTailPrefix, longTailFixtureName);
+ }
+
+ public SearchService searchServiceHelper(
+ EntityRegistry entityRegistry,
+ ElasticSearchService entitySearchService,
+ EntityIndexBuilders indexBuilders,
+ String prefix,
+ String fixtureName
+ ) throws IOException {
int batchSize = 100;
SearchRanker ranker = new SimpleRanker();
CacheManager cacheManager = new ConcurrentMapCacheManager();
@@ -159,6 +237,24 @@ protected EntityClient entityClient(
@Qualifier("sampleDataSearchService") SearchService searchService,
@Qualifier("sampleDataEntitySearchService") ElasticSearchService entitySearchService,
@Qualifier("entityRegistry") EntityRegistry entityRegistry
+ ) {
+ return entityClientHelper(searchService, entitySearchService, entityRegistry);
+ }
+
+ @Bean(name = "longTailEntityClient")
+ @Nonnull
+ protected EntityClient longTailEntityClient(
+ @Qualifier("sampleDataSearchService") SearchService searchService,
+ @Qualifier("sampleDataEntitySearchService") ElasticSearchService entitySearchService,
+ @Qualifier("longTailEntityRegistry") EntityRegistry longTailEntityRegistry
+ ) {
+ return entityClientHelper(searchService, entitySearchService, longTailEntityRegistry);
+ }
+
+ private EntityClient entityClientHelper(
+ SearchService searchService,
+ ElasticSearchService entitySearchService,
+ EntityRegistry entityRegistry
) {
CachingEntitySearchService cachingEntitySearchService = new CachingEntitySearchService(
new ConcurrentMapCacheManager(),
@@ -173,7 +269,7 @@ protected EntityClient entityClient(
preProcessHooks.setUiEnabled(true);
return new JavaEntityClient(
new EntityServiceImpl(mockAspectDao, null, entityRegistry, true, null,
- preProcessHooks),
+ preProcessHooks),
null,
entitySearchService,
cachingEntitySearchService,
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/ESTestConfiguration.java b/metadata-io/src/test/java/com/linkedin/metadata/ESTestConfiguration.java
index 0d7ac506599af..1e5b860b581fc 100644
--- a/metadata-io/src/test/java/com/linkedin/metadata/ESTestConfiguration.java
+++ b/metadata-io/src/test/java/com/linkedin/metadata/ESTestConfiguration.java
@@ -137,4 +137,10 @@ public EntityRegistry entityRegistry() throws EntityRegistryException {
return new ConfigEntityRegistry(
ESTestConfiguration.class.getClassLoader().getResourceAsStream("entity-registry.yml"));
}
+
+ @Bean(name = "longTailEntityRegistry")
+ public EntityRegistry longTailEntityRegistry() throws EntityRegistryException {
+ return new ConfigEntityRegistry(
+ ESTestConfiguration.class.getClassLoader().getResourceAsStream("entity-registry.yml"));
+ }
}
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/ESTestUtils.java b/metadata-io/src/test/java/com/linkedin/metadata/ESTestUtils.java
index 79496888650e1..45c4c16864b07 100644
--- a/metadata-io/src/test/java/com/linkedin/metadata/ESTestUtils.java
+++ b/metadata-io/src/test/java/com/linkedin/metadata/ESTestUtils.java
@@ -77,6 +77,11 @@ public static SearchResult searchAcrossEntities(SearchService searchService, Str
100, new SearchFlags().setFulltext(true).setSkipCache(true), facets);
}
+ public static SearchResult searchAcrossCustomEntities(SearchService searchService, String query, List searchableEntities) {
+ return searchService.searchAcrossEntities(searchableEntities, query, null, null, 0,
+ 100, new SearchFlags().setFulltext(true).setSkipCache(true));
+ }
+
public static SearchResult search(SearchService searchService, String query) {
return search(searchService, SEARCHABLE_ENTITIES, query);
}
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/fixtures/ElasticSearchGoldenTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/fixtures/ElasticSearchGoldenTest.java
new file mode 100644
index 0000000000000..cc0d9dca6ae5f
--- /dev/null
+++ b/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/fixtures/ElasticSearchGoldenTest.java
@@ -0,0 +1,143 @@
+package com.linkedin.metadata.search.elasticsearch.fixtures;
+
+import com.linkedin.common.urn.Urn;
+import com.linkedin.datahub.graphql.generated.EntityType;
+import com.linkedin.datahub.graphql.resolvers.EntityTypeMapper;
+import com.linkedin.entity.client.EntityClient;
+import com.linkedin.metadata.ESSampleDataFixture;
+import com.linkedin.metadata.models.registry.EntityRegistry;
+import com.linkedin.metadata.search.MatchedFieldArray;
+import com.linkedin.metadata.search.SearchEntityArray;
+import com.linkedin.metadata.search.SearchResult;
+import com.linkedin.metadata.search.SearchService;
+import org.elasticsearch.client.RestHighLevelClient;
+import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.beans.factory.annotation.Qualifier;
+import org.springframework.context.annotation.Import;
+import org.springframework.test.context.testng.AbstractTestNGSpringContextTests;
+import org.testng.annotations.Test;
+
+import java.util.List;
+import java.util.stream.Collectors;
+import java.util.stream.Stream;
+
+import static com.linkedin.metadata.ESTestUtils.*;
+import static org.testng.Assert.assertTrue;
+import static org.testng.AssertJUnit.*;
+
+@Import(ESSampleDataFixture.class)
+public class ElasticSearchGoldenTest extends AbstractTestNGSpringContextTests {
+
+ private static final List SEARCHABLE_LONGTAIL_ENTITIES = Stream.of(EntityType.CHART, EntityType.CONTAINER,
+ EntityType.DASHBOARD, EntityType.DATASET, EntityType.DOMAIN, EntityType.TAG
+ ).map(EntityTypeMapper::getName)
+ .collect(Collectors.toList());
+ @Autowired
+ private RestHighLevelClient _searchClient;
+
+ @Autowired
+ @Qualifier("longTailSearchService")
+ protected SearchService searchService;
+
+ @Autowired
+ @Qualifier("longTailEntityClient")
+ protected EntityClient entityClient;
+
+ @Autowired
+ @Qualifier("longTailEntityRegistry")
+ private EntityRegistry entityRegistry;
+
+ @Test
+ public void testNameMatchPetProfiles() {
+ /*
+ Searching for "pet profiles" should return "pet_profiles" as the first 2 search results
+ */
+ assertNotNull(searchService);
+ assertNotNull(entityRegistry);
+ SearchResult searchResult = searchAcrossCustomEntities(searchService, "pet profiles", SEARCHABLE_LONGTAIL_ENTITIES);
+ assertTrue(searchResult.getEntities().size() >= 2);
+ Urn firstResultUrn = searchResult.getEntities().get(0).getEntity();
+ Urn secondResultUrn = searchResult.getEntities().get(1).getEntity();
+
+ assertTrue(firstResultUrn.toString().contains("pet_profiles"));
+ assertTrue(secondResultUrn.toString().contains("pet_profiles"));
+ }
+
+ @Test
+ public void testNameMatchPetProfile() {
+ /*
+ Searching for "pet profile" should return "pet_profiles" as the first 2 search results
+ */
+ assertNotNull(searchService);
+ SearchResult searchResult = searchAcrossEntities(searchService, "pet profile", SEARCHABLE_LONGTAIL_ENTITIES);
+ assertTrue(searchResult.getEntities().size() >= 2);
+ Urn firstResultUrn = searchResult.getEntities().get(0).getEntity();
+ Urn secondResultUrn = searchResult.getEntities().get(1).getEntity();
+
+ assertTrue(firstResultUrn.toString().contains("pet_profiles"));
+ assertTrue(secondResultUrn.toString().contains("pet_profiles"));
+ }
+
+ @Test
+ public void testNameMatchMemberInWorkspace() {
+ /*
+ Searching for "collaborative actionitems" should return "collaborative_actionitems" as the first search
+ result, followed by "collaborative_actionitems_old"
+ */
+ assertNotNull(searchService);
+ SearchResult searchResult = searchAcrossEntities(searchService, "collaborative actionitems", SEARCHABLE_LONGTAIL_ENTITIES);
+ assertTrue(searchResult.getEntities().size() >= 2);
+ Urn firstResultUrn = searchResult.getEntities().get(0).getEntity();
+ Urn secondResultUrn = searchResult.getEntities().get(1).getEntity();
+
+ // Checks that the table name is not suffixed with anything
+ assertTrue(firstResultUrn.toString().contains("collaborative_actionitems,"));
+ assertTrue(secondResultUrn.toString().contains("collaborative_actionitems_old"));
+ }
+
+ @Test
+ public void testGlossaryTerms() {
+ /*
+ Searching for "ReturnRate" should return all tables that have the glossary term applied before
+ anything else
+ */
+ assertNotNull(searchService);
+ SearchResult searchResult = searchAcrossEntities(searchService, "ReturnRate", SEARCHABLE_LONGTAIL_ENTITIES);
+ SearchEntityArray entities = searchResult.getEntities();
+ assertTrue(searchResult.getEntities().size() >= 4);
+ MatchedFieldArray firstResultMatchedFields = entities.get(0).getMatchedFields();
+ MatchedFieldArray secondResultMatchedFields = entities.get(1).getMatchedFields();
+ MatchedFieldArray thirdResultMatchedFields = entities.get(2).getMatchedFields();
+ MatchedFieldArray fourthResultMatchedFields = entities.get(3).getMatchedFields();
+
+ assertTrue(firstResultMatchedFields.toString().contains("ReturnRate"));
+ assertTrue(secondResultMatchedFields.toString().contains("ReturnRate"));
+ assertTrue(thirdResultMatchedFields.toString().contains("ReturnRate"));
+ assertTrue(fourthResultMatchedFields.toString().contains("ReturnRate"));
+ }
+
+ /**
+ *
+ * The test below should be added back in as improvements are made to search,
+ * via the linked tickets.
+ *
+ **/
+
+ // TODO: enable once PFP-481 is complete
+ @Test(enabled = false)
+ public void testNameMatchPartiallyQualified() {
+ /*
+ Searching for "analytics.pet_details" (partially qualified) should return the fully qualified table
+ name as the first search results before any others
+ */
+ assertNotNull(searchService);
+ SearchResult searchResult = searchAcrossEntities(searchService, "analytics.pet_details", SEARCHABLE_LONGTAIL_ENTITIES);
+ assertTrue(searchResult.getEntities().size() >= 2);
+ Urn firstResultUrn = searchResult.getEntities().get(0).getEntity();
+ Urn secondResultUrn = searchResult.getEntities().get(1).getEntity();
+
+ assertTrue(firstResultUrn.toString().contains("snowflake,long_tail_companions.analytics.pet_details"));
+ assertTrue(secondResultUrn.toString().contains("dbt,long_tail_companions.analytics.pet_details"));
+ }
+
+}
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/fixtures/SampleDataFixtureTests.java b/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/fixtures/SampleDataFixtureTests.java
index dada13bd6f479..2f1e48c18450d 100644
--- a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/fixtures/SampleDataFixtureTests.java
+++ b/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/fixtures/SampleDataFixtureTests.java
@@ -82,6 +82,7 @@ public class SampleDataFixtureTests extends AbstractTestNGSpringContextTests {
protected EntityClient entityClient;
@Autowired
+ @Qualifier("entityRegistry")
private EntityRegistry entityRegistry;
@Test
diff --git a/smoke-test/tests/cypress/cypress/e2e/lineage/lineage_column_level.js b/smoke-test/tests/cypress/cypress/e2e/lineage/lineage_column_level.js
new file mode 100644
index 0000000000000..2a8fe045f154e
--- /dev/null
+++ b/smoke-test/tests/cypress/cypress/e2e/lineage/lineage_column_level.js
@@ -0,0 +1,51 @@
+const DATASET_ENTITY_TYPE = 'dataset';
+const DATASET_URN = 'urn:li:dataset:(urn:li:dataPlatform:hive,SampleCypressHiveDataset,PROD)';
+
+describe("column-level lineage graph test", () => {
+
+ it("navigate to lineage graph view and verify that column-level lineage is showing correctly", () => {
+ cy.login();
+ cy.goToEntityLineageGraph(DATASET_ENTITY_TYPE, DATASET_URN);
+ //verify columns not shown by default
+ cy.waitTextVisible("SampleCypressHdfs");
+ cy.waitTextVisible("SampleCypressHive");
+ cy.waitTextVisible("cypress_logging");
+ cy.ensureTextNotPresent("shipment_info");
+ cy.ensureTextNotPresent("field_foo");
+ cy.ensureTextNotPresent("field_baz");
+ cy.ensureTextNotPresent("event_name");
+ cy.ensureTextNotPresent("event_data");
+ cy.ensureTextNotPresent("timestamp");
+ cy.ensureTextNotPresent("browser");
+ cy.clickOptionWithTestId("column-toggle")
+ //verify columns appear and belong co correct dataset
+ cy.waitTextVisible("shipment_info");
+ cy.waitTextVisible("shipment_info.date");
+ cy.waitTextVisible("shipment_info.target");
+ cy.waitTextVisible("shipment_info.destination");
+ cy.waitTextVisible("shipment_info.geo_info");
+ cy.waitTextVisible("field_foo");
+ cy.waitTextVisible("field_baz");
+ cy.waitTextVisible("event_name");
+ cy.waitTextVisible("event_data");
+ cy.waitTextVisible("timestamp");
+ cy.waitTextVisible("browser");
+ //verify columns can be hidden and shown again
+ cy.contains("Hide").click({ force:true });
+ cy.ensureTextNotPresent("field_foo");
+ cy.ensureTextNotPresent("field_baz");
+ cy.get("[aria-label='down']").eq(1).click({ force:true });
+ cy.waitTextVisible("field_foo");
+ cy.waitTextVisible("field_baz");
+ //verify columns can be disabled successfully
+ cy.clickOptionWithTestId("column-toggle")
+ cy.ensureTextNotPresent("shipment_info");
+ cy.ensureTextNotPresent("field_foo");
+ cy.ensureTextNotPresent("field_baz");
+ cy.ensureTextNotPresent("event_name");
+ cy.ensureTextNotPresent("event_data");
+ cy.ensureTextNotPresent("timestamp");
+ cy.ensureTextNotPresent("browser");
+ });
+
+});
\ No newline at end of file
diff --git a/smoke-test/tests/cypress/cypress/e2e/mutations/deprecations.js b/smoke-test/tests/cypress/cypress/e2e/mutations/deprecations.js
index 1d41d155440e8..2fa11654a3c3e 100644
--- a/smoke-test/tests/cypress/cypress/e2e/mutations/deprecations.js
+++ b/smoke-test/tests/cypress/cypress/e2e/mutations/deprecations.js
@@ -1,19 +1,29 @@
-describe("deprecation", () => {
+describe("dataset deprecation", () => {
it("go to dataset and check deprecation works", () => {
const urn = "urn:li:dataset:(urn:li:dataPlatform:hive,cypress_logging_events,PROD)";
const datasetName = "cypress_logging_events";
cy.login();
-
cy.goToDataset(urn, datasetName);
cy.openThreeDotDropdown();
cy.clickOptionWithText("Mark as deprecated");
cy.addViaFormModal("test deprecation", "Add Deprecation Details");
-
- cy.goToDataset(urn, datasetName);
- cy.contains("DEPRECATED");
-
+ cy.waitTextVisible("Deprecation Updated");
+ cy.waitTextVisible("DEPRECATED")
cy.openThreeDotDropdown();
cy.clickOptionWithText("Mark as un-deprecated");
+ cy.waitTextVisible("Deprecation Updated");
+ cy.ensureTextNotPresent("DEPRECATED");
+ cy.openThreeDotDropdown();
+ cy.clickOptionWithText("Mark as deprecated");
+ cy.addViaFormModal("test deprecation", "Add Deprecation Details");
+ cy.waitTextVisible("Deprecation Updated");
+ cy.waitTextVisible("DEPRECATED");
+ cy.contains("DEPRECATED").trigger("mouseover", { force: true });
+ cy.waitTextVisible("Deprecation note");
+ cy.get("[role='tooltip']").contains("Mark as un-deprecated").click();
+ cy.waitTextVisible("Confirm Mark as un-deprecated");
+ cy.get("button").contains("Yes").click();
+ cy.waitTextVisible("Marked assets as un-deprecated!");
cy.ensureTextNotPresent("DEPRECATED");
- });
+ });
});
diff --git a/smoke-test/tests/cypress/cypress/e2e/mutations/edit_documentation.js b/smoke-test/tests/cypress/cypress/e2e/mutations/edit_documentation.js
new file mode 100644
index 0000000000000..1f40cdf602062
--- /dev/null
+++ b/smoke-test/tests/cypress/cypress/e2e/mutations/edit_documentation.js
@@ -0,0 +1,71 @@
+const test_id = Math.floor(Math.random() * 100000);
+const documentation_edited = `This is test${test_id} documentation EDITED`;
+const wrong_url = "https://www.linkedincom";
+const correct_url = "https://www.linkedin.com";
+
+describe("edit documentation and link to dataset", () => {
+
+ it("open test dataset page, edit documentation", () => {
+ //edit documentation and verify changes saved
+ cy.loginWithCredentials();
+ cy.visit("/dataset/urn:li:dataset:(urn:li:dataPlatform:hive,SampleCypressHiveDataset,PROD)/Schema");
+ cy.get("[role='tab']").contains("Documentation").click();
+ cy.waitTextVisible("my hive dataset");
+ cy.waitTextVisible("Sample doc");
+ cy.clickOptionWithText("Edit");
+ cy.focused().clear();
+ cy.focused().type(documentation_edited);
+ cy.get("button").contains("Save").click();
+ cy.waitTextVisible("Description Updated");
+ cy.waitTextVisible(documentation_edited);
+ //return documentation to original state
+ cy.clickOptionWithText("Edit");
+ cy.focused().clear().wait(1000);
+ cy.focused().type("my hive dataset");
+ cy.get("button").contains("Save").click();
+ cy.waitTextVisible("Description Updated");
+ cy.waitTextVisible("my hive dataset");
+ });
+
+ it("open test dataset page, remove and add dataset link", () => {
+ cy.loginWithCredentials();
+ cy.visit("/dataset/urn:li:dataset:(urn:li:dataPlatform:hive,SampleCypressHiveDataset,PROD)/Schema");
+ cy.get("[role='tab']").contains("Documentation").click();
+ cy.contains("Sample doc").trigger("mouseover", { force: true });
+ cy.get('[data-icon="delete"]').click();
+ cy.waitTextVisible("Link Removed");
+ cy.get("button").contains("Add Link").click();
+ cy.get("#addLinkForm_url").type(wrong_url);
+ cy.waitTextVisible("This field must be a valid url.");
+ cy.focused().clear();
+ cy.waitTextVisible("A URL is required.");
+ cy.focused().type(correct_url);
+ cy.ensureTextNotPresent("This field must be a valid url.");
+ cy.get("#addLinkForm_label").type("Sample doc");
+ cy.get('[role="dialog"] button').contains("Add").click();
+ cy.waitTextVisible("Link Added");
+ cy.get("[role='tab']").contains("Documentation").click();
+ cy.get(`[href='${correct_url}']`).should("be.visible");
+ });
+
+ it("edit field documentation", () => {
+ cy.loginWithCredentials();
+ cy.visit("/dataset/urn:li:dataset:(urn:li:dataPlatform:hive,SampleCypressHiveDataset,PROD)/Schema");
+ cy.get("tbody [data-icon='edit']").first().click({ force: true });
+ cy.waitTextVisible("Update description");
+ cy.waitTextVisible("Foo field description has changed");
+ cy.focused().clear().wait(1000);
+ cy.focused().type(documentation_edited);
+ cy.get("button").contains("Update").click();
+ cy.waitTextVisible("Updated!");
+ cy.waitTextVisible(documentation_edited);
+ cy.waitTextVisible("(edited)");
+ cy.get("tbody [data-icon='edit']").first().click({ force: true });
+ cy.focused().clear().wait(1000);
+ cy.focused().type("Foo field description has changed");
+ cy.get("button").contains("Update").click();
+ cy.waitTextVisible("Updated!");
+ cy.waitTextVisible("Foo field description has changed");
+ cy.waitTextVisible("(edited)");
+ });
+});
\ No newline at end of file
diff --git a/smoke-test/tests/cypress/data.json b/smoke-test/tests/cypress/data.json
index c6606519e8d73..3b2ee1afaba58 100644
--- a/smoke-test/tests/cypress/data.json
+++ b/smoke-test/tests/cypress/data.json
@@ -2012,4 +2012,4 @@
},
"systemMetadata": null
}
-]
+]
\ No newline at end of file