Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Upstream #4

Merged
merged 5 commits into from
Mar 11, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ Here are the companies that have officially adopted DataHub. Please feel free to

- [Adevinta](https://www.adevinta.com/)
- [Banksalad](https://www.banksalad.com)
- [Cabify](https://www.cabify.com)
- [Cabify](https://cabify.tech/)
- [DefinedCrowd](http://www.definedcrowd.com)
- [DFDS](https://www.dfds.com/)
- [Expedia Group](http://expedia.com)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ private List<AnalyticsChart> getProductAnalyticsCharts(Authentication authentica

final List<NamedLine> wauTimeseries =
_analyticsService.getTimeseriesChart(_analyticsService.getUsageIndexName(), twoMonthsDateRange, weeklyInterval,
Optional.empty(), ImmutableMap.of(), Optional.of("browserId"));
Optional.empty(), ImmutableMap.of(), Collections.emptyMap(), Optional.of("browserId"));
charts.add(TimeSeriesChart.builder()
.setTitle(wauTitle)
.setDateRange(twoMonthsDateRange)
Expand All @@ -90,7 +90,8 @@ private List<AnalyticsChart> getProductAnalyticsCharts(Authentication authentica

final List<NamedLine> searchesTimeseries =
_analyticsService.getTimeseriesChart(_analyticsService.getUsageIndexName(), lastWeekDateRange, dailyInterval,
Optional.empty(), ImmutableMap.of("type", ImmutableList.of(searchEventType)), Optional.empty());
Optional.empty(), ImmutableMap.of("type", ImmutableList.of(searchEventType)), Collections.emptyMap(),
Optional.empty());
charts.add(TimeSeriesChart.builder()
.setTitle(searchesTitle)
.setDateRange(lastWeekDateRange)
Expand All @@ -104,24 +105,26 @@ private List<AnalyticsChart> getProductAnalyticsCharts(Authentication authentica

final List<Row> topSearchQueries =
_analyticsService.getTopNTableChart(_analyticsService.getUsageIndexName(), Optional.of(lastWeekDateRange),
"query.keyword", ImmutableMap.of("type", ImmutableList.of(searchEventType)), Optional.empty(), 10,
AnalyticsUtil::buildCellWithSearchLandingPage);
"query.keyword", ImmutableMap.of("type", ImmutableList.of(searchEventType)), Collections.emptyMap(),
Optional.empty(), 10, AnalyticsUtil::buildCellWithSearchLandingPage);
charts.add(TableChart.builder().setTitle(topSearchTitle).setColumns(columns).setRows(topSearchQueries).build());

// Chart 4: Bar Graph Chart
final String sectionViewsTitle = "Section Views across Entity Types";
final List<NamedBar> sectionViewsPerEntityType =
_analyticsService.getBarChart(_analyticsService.getUsageIndexName(), Optional.of(lastWeekDateRange),
ImmutableList.of("entityType.keyword", "section.keyword"),
ImmutableMap.of("type", ImmutableList.of("EntitySectionViewEvent")), Optional.empty(), true);
ImmutableMap.of("type", ImmutableList.of("EntitySectionViewEvent")), Collections.emptyMap(),
Optional.empty(), true);
charts.add(BarChart.builder().setTitle(sectionViewsTitle).setBars(sectionViewsPerEntityType).build());

// Chart 5: Bar Graph Chart
final String actionsByTypeTitle = "Actions by Entity Type";
final List<NamedBar> eventsByEventType =
_analyticsService.getBarChart(_analyticsService.getUsageIndexName(), Optional.of(lastWeekDateRange),
ImmutableList.of("entityType.keyword", "actionType.keyword"),
ImmutableMap.of("type", ImmutableList.of("EntityActionEvent")), Optional.empty(), true);
ImmutableMap.of("type", ImmutableList.of("EntityActionEvent")), Collections.emptyMap(), Optional.empty(),
true);
charts.add(BarChart.builder().setTitle(actionsByTypeTitle).setBars(eventsByEventType).build());

// Chart 6: Table Chart
Expand All @@ -131,7 +134,7 @@ private List<AnalyticsChart> getProductAnalyticsCharts(Authentication authentica
final List<Row> topViewedDatasets =
_analyticsService.getTopNTableChart(_analyticsService.getUsageIndexName(), Optional.of(lastWeekDateRange),
"entityUrn.keyword", ImmutableMap.of("type", ImmutableList.of("EntityViewEvent"), "entityType.keyword",
ImmutableList.of(EntityType.DATASET.name())), Optional.empty(), 10,
ImmutableList.of(EntityType.DATASET.name())), Collections.emptyMap(), Optional.empty(), 10,
AnalyticsUtil::buildCellWithEntityLandingPage);
AnalyticsUtil.hydrateDisplayNameForTable(_entityClient, topViewedDatasets, Constants.DATASET_ENTITY_NAME,
ImmutableSet.of(Constants.DATASET_KEY_ASPECT_NAME), AnalyticsUtil::getDatasetName, authentication);
Expand All @@ -145,7 +148,8 @@ private List<AnalyticsChart> getGlobalMetadataAnalyticsCharts(Authentication aut
// Chart 1: Entities per domain
final List<NamedBar> entitiesPerDomain =
_analyticsService.getBarChart(_analyticsService.getAllEntityIndexName(), Optional.empty(),
ImmutableList.of("domains.keyword", "platform.keyword"), Collections.emptyMap(), Optional.empty(), false);
ImmutableList.of("domains.keyword", "platform.keyword"), Collections.emptyMap(),
ImmutableMap.of("removed", ImmutableList.of("true")), Optional.empty(), false);
AnalyticsUtil.hydrateDisplayNameForBars(_entityClient, entitiesPerDomain, Constants.DOMAIN_ENTITY_NAME,
ImmutableSet.of(Constants.DOMAIN_PROPERTIES_ASPECT_NAME), AnalyticsUtil::getDomainName, authentication);
AnalyticsUtil.hydrateDisplayNameForSegments(_entityClient, entitiesPerDomain, Constants.DATA_PLATFORM_ENTITY_NAME,
Expand All @@ -157,7 +161,8 @@ private List<AnalyticsChart> getGlobalMetadataAnalyticsCharts(Authentication aut
// Chart 2: Entities per platform
final List<NamedBar> entitiesPerPlatform =
_analyticsService.getBarChart(_analyticsService.getAllEntityIndexName(), Optional.empty(),
ImmutableList.of("platform.keyword"), Collections.emptyMap(), Optional.empty(), false);
ImmutableList.of("platform.keyword"), Collections.emptyMap(),
ImmutableMap.of("removed", ImmutableList.of("true")), Optional.empty(), false);
AnalyticsUtil.hydrateDisplayNameForBars(_entityClient, entitiesPerPlatform, Constants.DATA_PLATFORM_ENTITY_NAME,
ImmutableSet.of(Constants.DATA_PLATFORM_INFO_ASPECT_NAME), AnalyticsUtil::getPlatformName, authentication);
if (!entitiesPerPlatform.isEmpty()) {
Expand All @@ -167,7 +172,8 @@ private List<AnalyticsChart> getGlobalMetadataAnalyticsCharts(Authentication aut
// Chart 3: Entities per term
final List<NamedBar> entitiesPerTerm =
_analyticsService.getBarChart(_analyticsService.getAllEntityIndexName(), Optional.empty(),
ImmutableList.of("glossaryTerms.keyword"), Collections.emptyMap(), Optional.empty(), false);
ImmutableList.of("glossaryTerms.keyword"), Collections.emptyMap(),
ImmutableMap.of("removed", ImmutableList.of("true")), Optional.empty(), false);
AnalyticsUtil.hydrateDisplayNameForBars(_entityClient, entitiesPerTerm, Constants.GLOSSARY_TERM_ENTITY_NAME,
ImmutableSet.of(Constants.GLOSSARY_TERM_KEY_ASPECT_NAME), AnalyticsUtil::getTermName, authentication);
if (!entitiesPerTerm.isEmpty()) {
Expand All @@ -177,7 +183,8 @@ private List<AnalyticsChart> getGlobalMetadataAnalyticsCharts(Authentication aut
// Chart 4: Entities per fabric type
final List<NamedBar> entitiesPerEnv =
_analyticsService.getBarChart(_analyticsService.getAllEntityIndexName(), Optional.empty(),
ImmutableList.of("origin.keyword"), Collections.emptyMap(), Optional.empty(), false);
ImmutableList.of("origin.keyword"), Collections.emptyMap(),
ImmutableMap.of("removed", ImmutableList.of("true")), Optional.empty(), false);
if (entitiesPerEnv.size() > 1) {
charts.add(BarChart.builder().setTitle("Entities per Environment").setBars(entitiesPerEnv).build());
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
package com.linkedin.datahub.graphql.analytics.service;

import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.linkedin.datahub.graphql.generated.BarSegment;
import com.linkedin.datahub.graphql.generated.Cell;
import com.linkedin.datahub.graphql.generated.DateInterval;
Expand Down Expand Up @@ -75,14 +74,14 @@ public String getUsageIndexName() {

public List<NamedLine> getTimeseriesChart(String indexName, DateRange dateRange, DateInterval granularity,
Optional<String> dimension, // Length 1 for now
Map<String, List<String>> filters, Optional<String> uniqueOn) {
Map<String, List<String>> filters, Map<String, List<String>> mustNotFilters, Optional<String> uniqueOn) {

log.debug(
String.format("Invoked getTimeseriesChart with indexName: %s, dateRange: %s, granularity: %s, dimension: %s,",
indexName, dateRange, granularity, dimension) + String.format("filters: %s, uniqueOn: %s", filters,
uniqueOn));

AggregationBuilder filteredAgg = getFilteredAggregation(filters, ImmutableMap.of(), Optional.of(dateRange));
AggregationBuilder filteredAgg = getFilteredAggregation(filters, mustNotFilters, Optional.of(dateRange));

AggregationBuilder dateHistogram = AggregationBuilders.dateHistogram(DATE_HISTOGRAM)
.field("timestamp")
Expand Down Expand Up @@ -128,22 +127,22 @@ private List<NumericDataPoint> extractPointsFromAggregations(Aggregations aggreg

public List<NamedBar> getBarChart(String indexName, Optional<DateRange> dateRange, List<String> dimensions,
// Length 1 or 2
Map<String, List<String>> filters, Optional<String> uniqueOn, boolean showMissing) {
Map<String, List<String>> filters, Map<String, List<String>> mustNotFilters, Optional<String> uniqueOn,
boolean showMissing) {
log.debug(
String.format("Invoked getBarChart with indexName: %s, dateRange: %s, dimensions: %s,", indexName, dateRange,
dimensions) + String.format("filters: %s, uniqueOn: %s", filters, uniqueOn));

assert (dimensions.size() == 1 || dimensions.size() == 2);
AggregationBuilder filteredAgg = getFilteredAggregation(filters, ImmutableMap.of(), dateRange);
AggregationBuilder filteredAgg = getFilteredAggregation(filters, mustNotFilters, dateRange);

TermsAggregationBuilder termAgg = AggregationBuilders.terms(DIMENSION).field(dimensions.get(0));
if (showMissing) {
termAgg.missing(NA);
}

if (dimensions.size() == 2) {
TermsAggregationBuilder secondTermAgg =
AggregationBuilders.terms(SECOND_DIMENSION).field(dimensions.get(1));
TermsAggregationBuilder secondTermAgg = AggregationBuilders.terms(SECOND_DIMENSION).field(dimensions.get(1));
if (showMissing) {
secondTermAgg.missing(NA);
}
Expand Down Expand Up @@ -194,13 +193,13 @@ public Row buildRow(String groupByValue, Function<String, Cell> groupByValueToCe
}

public List<Row> getTopNTableChart(String indexName, Optional<DateRange> dateRange, String groupBy,
Map<String, List<String>> filters, Optional<String> uniqueOn, int maxRows,
Function<String, Cell> groupByValueToCell) {
Map<String, List<String>> filters, Map<String, List<String>> mustNotFilters, Optional<String> uniqueOn,
int maxRows, Function<String, Cell> groupByValueToCell) {
log.debug(
String.format("Invoked getTopNTableChart with indexName: %s, dateRange: %s, groupBy: %s", indexName, dateRange,
groupBy) + String.format("filters: %s, uniqueOn: %s", filters, uniqueOn));

AggregationBuilder filteredAgg = getFilteredAggregation(filters, ImmutableMap.of(), dateRange);
AggregationBuilder filteredAgg = getFilteredAggregation(filters, mustNotFilters, dateRange);

TermsAggregationBuilder termAgg = AggregationBuilders.terms(DIMENSION).field(groupBy).size(maxRows);
if (uniqueOn.isPresent()) {
Expand Down
1 change: 1 addition & 0 deletions datahub-web-react/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@
"react-syntax-highlighter": "^15.4.4",
"react-timezone-select": "^1.1.15",
"react-visibility-sensor": "^5.1.1",
"rgb-hex": "^4.0.0",
"sinon": "^11.1.1",
"start-server-and-test": "1.12.2",
"styled-components": "^5.2.1",
Expand Down
10 changes: 7 additions & 3 deletions datahub-web-react/src/app/entity/dataset/DatasetEntity.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -111,9 +111,13 @@ export class DatasetEntity implements Entity<Dataset> {
component: LineageTab,
display: {
visible: (_, _1) => true,
enabled: (_, dataset: GetDatasetQuery) =>
(dataset?.dataset?.upstream?.count || 0) > 0 ||
(dataset?.dataset?.downstream?.count || 0) > 0,
enabled: (_, dataset: GetDatasetQuery) => {
console.log(dataset?.dataset?.upstream, dataset?.dataset?.downstream);
return (
(dataset?.dataset?.upstream?.total || 0) > 0 ||
(dataset?.dataset?.downstream?.total || 0) > 0
);
},
},
},
{
Expand Down
4 changes: 3 additions & 1 deletion datahub-web-react/src/app/lineage/LineageEntityNode.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -261,7 +261,9 @@ export default function LineageEntityNode({
|{' '}
</tspan>
<tspan dx=".25em" dy="-2px">
{capitalizeFirstLetter(node.data.subtype || node.data.type)}
{capitalizeFirstLetter(
node.data.subtype || (node.data.type && entityRegistry.getEntityName(node.data.type)),
)}
</tspan>
</UnselectableText>
{expandTitles ? (
Expand Down
11 changes: 6 additions & 5 deletions datahub-web-react/src/app/shared/TagStyleEntity.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ import { ApolloError } from '@apollo/client';
import styled from 'styled-components';
import { ChromePicker } from 'react-color';
import ColorHash from 'color-hash';
import rgbHex from 'rgb-hex';

import { PlusOutlined } from '@ant-design/icons';
import { useGetTagQuery } from '../../graphql/tag.generated';
import { EntityType, FacetMetadata, Maybe, Scalars } from '../../types.generated';
Expand Down Expand Up @@ -256,10 +258,6 @@ export default function TagStyleEntity({ urn, useGetSearchResults = useWrappedSe
saveColor();
};

const handleColorChange = (color: any) => {
setColorValue(color?.hex);
};

// Update Description
const updateDescriptionValue = (desc: string) => {
setUpdatedDescription(desc);
Expand Down Expand Up @@ -314,7 +312,10 @@ export default function TagStyleEntity({ urn, useGetSearchResults = useWrappedSe
</TagName>
{displayColorPicker && (
<ColorPickerPopOver ref={colorPickerRef}>
<ChromePicker color={colorValue} onChange={handleColorChange} />
<ChromePicker
color={colorValue}
onChange={(c) => setColorValue(`#${rgbHex(c.rgb.r, c.rgb.g, c.rgb.b, c.rgb.a)}`)}
/>
</ColorPickerPopOver>
)}
</div>
Expand Down
5 changes: 5 additions & 0 deletions datahub-web-react/yarn.lock
Original file line number Diff line number Diff line change
Expand Up @@ -14722,6 +14722,11 @@ [email protected]:
convert-source-map "^0.3.3"
css "^2.0.0"

rgb-hex@^4.0.0:
version "4.0.0"
resolved "https://registry.yarnpkg.com/rgb-hex/-/rgb-hex-4.0.0.tgz#02fb1e215e3fe5d070501579a3d2e4fa3c0acec8"
integrity sha512-Eg2ev5CiMBnQ9Gpflmqbwbso0CCdISqtVIow7OpYSLN1ULUv2jTB9YieS1DSSn/17AD7KkPWDPzSFzI4GSuu/Q==

rgb-regex@^1.0.1:
version "1.0.1"
resolved "https://registry.yarnpkg.com/rgb-regex/-/rgb-regex-1.0.1.tgz#c0e0d6882df0e23be254a475e8edd41915feaeb1"
Expand Down
3 changes: 2 additions & 1 deletion metadata-ingestion/integration_docs/great-expectations.md
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,8 @@ This integration does not support
- `retry_status_codes` (optional): Retry HTTP request also on these status codes.
- `retry_max_times` (optional): Maximum times to retry if HTTP request fails. The delay between retries is increased exponentially.
- `extra_headers` (optional): Extra headers which will be added to the datahub request.

- `parse_table_names_from_sql` (defaults to false): The integration can use an SQL parser to try to parse the datasets being asserted. This parsing is disabled by default, but can be enabled by setting `parse_table_names_from_sql: True`. The parser is based on the [`sqllineage`](https://pypi.org/project/sqllineage/) package.

## Learn more

To see the Great Expectations in action, check out [this demo](https://www.loom.com/share/d781c9f0b270477fb5d6b0c26ef7f22d) from the Feb 2022 townhall.
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@
from datahub.metadata.com.linkedin.pegasus2avro.common import DataPlatformInstance
from datahub.metadata.com.linkedin.pegasus2avro.events.metadata import ChangeType
from datahub.metadata.schema_classes import PartitionSpecClass, PartitionTypeClass
from datahub.utilities.sql_parser import MetadataSQLSQLParser
from datahub.utilities.sql_parser import DefaultSQLParser

logger = logging.getLogger(__name__)

Expand All @@ -71,6 +71,7 @@ def __init__(
retry_status_codes: Optional[List[int]] = None,
retry_max_times: Optional[int] = None,
extra_headers: Optional[Dict[str, str]] = None,
parse_table_names_from_sql: bool = False,
):
super().__init__(data_context)
self.server_url = server_url
Expand All @@ -82,6 +83,7 @@ def __init__(
self.retry_status_codes = retry_status_codes
self.retry_max_times = retry_max_times
self.extra_headers = extra_headers
self.parse_table_names_from_sql = parse_table_names_from_sql

def _run(
self,
Expand Down Expand Up @@ -598,6 +600,12 @@ def get_dataset_partitions(self, batch_identifier, data_asset):
}
)
elif isinstance(ge_batch_spec, RuntimeQueryBatchSpec):
if not self.parse_table_names_from_sql:
warn(
"Enable parse_table_names_from_sql in DatahubValidationAction config\
to try to parse the tables being asserted from SQL query"
)
return []
query = data_asset.batches[
batch_identifier
].batch_request.runtime_parameters["query"]
Expand All @@ -610,11 +618,12 @@ def get_dataset_partitions(self, batch_identifier, data_asset):
query=query,
customProperties=batchSpecProperties,
)
tables = MetadataSQLSQLParser(query).get_tables()
tables = DefaultSQLParser(query).get_tables()
if len(set(tables)) != 1:
warn(
"DataHubValidationAction does not support cross dataset assertions."
)
return []
for table in tables:
dataset_urn = make_dataset_urn_from_sqlalchemy_uri(
sqlalchemy_uri,
Expand Down
10 changes: 5 additions & 5 deletions metadata-ingestion/src/datahub/utilities/sql_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,15 @@
from abc import ABCMeta, abstractmethod
from typing import List, Set

import sqlparse
from networkx import DiGraph
from sqllineage.core import LineageAnalyzer
from sqllineage.core.holders import Column, SQLLineageHolder

import datahub.utilities.sqllineage_patch

try:
import sqlparse
from networkx import DiGraph
from sql_metadata import Parser as MetadataSQLParser
from sqllineage.core import LineageAnalyzer

import datahub.utilities.sqllineage_patch
except ImportError:
pass

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ action_list:
class_name: DataHubValidationAction
server_url: http://localhost:8080
graceful_exceptions: False
parse_table_names_from_sql: True
platform_instance_map:
my_postgresql_datasource: postgres1
runtime_postgres_datasource: postgres1
Expand Down
Loading