Skip to content

Commit

Permalink
feat(ingestion): superset - add display_uri to config (#5408)
Browse files Browse the repository at this point in the history
  • Loading branch information
milimetric authored Aug 3, 2022
1 parent b1ff030 commit 865074e
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 4 deletions.
3 changes: 3 additions & 0 deletions metadata-ingestion/archived/source_docs/superset.md
Original file line number Diff line number Diff line change
Expand Up @@ -64,12 +64,15 @@ Note that a `.` is used to denote nested fields in the YAML recipe.
| Field | Required | Default | Description |
| ------------- | -------- | ------------------ | ------------------------------------------------------- |
| `connect_uri` | | `"localhost:8088"` | Superset host URL. |
| `display_uri` | | `(connect_uri)` | Publicly accessible Superset URL, see note below. |
| `username` | | | Superset username. |
| `password` | | | Superset password. |
| `provider` | | `"db"` | Superset provider. |
| `env` | | `"PROD"` | Environment to use in namespace when constructing URNs. |
| `database_alias` | | | Can be used to change mapping for database names in superset to what you have in datahub |

NOTE: `display_uri` can be used when you need to ingest from a private, specially configured instance, but still want dashboard, graph, etc. links to point to the publicly accessible URL. So, for example, you could set `connect_uri: localhost:xxxx, display_uri: superset.mydomain.com`. You may need to do this if `superset.mydomain.com` has complex authentication that is not easy to pass through this source config.

## Compatibility

Coming soon!
Expand Down
19 changes: 15 additions & 4 deletions metadata-ingestion/src/datahub/ingestion/source/superset.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

import dateutil.parser as dp
import requests
from pydantic.class_validators import validator
from pydantic.class_validators import root_validator, validator
from pydantic.fields import Field

from datahub.configuration.common import ConfigModel
Expand Down Expand Up @@ -59,6 +59,10 @@ class SupersetConfig(ConfigModel):
# See the Superset /security/login endpoint for details
# https://superset.apache.org/docs/rest-api
connect_uri: str = Field(default="localhost:8088", description="Superset host URL.")
display_uri: str = Field(
default=None,
description="optional URL to use in links (if `connect_uri` is only for ingestion)",
)
username: Optional[str] = Field(default=None, description="Superset username.")
password: Optional[str] = Field(default=None, description="Superset password.")
provider: str = Field(default="db", description="Superset provider.")
Expand All @@ -72,10 +76,17 @@ class SupersetConfig(ConfigModel):
description="Can be used to change mapping for database names in superset to what you have in datahub",
)

@validator("connect_uri")
@validator("connect_uri", "display_uri")
def remove_trailing_slash(cls, v):
return config_clean.remove_trailing_slashes(v)

@root_validator
def default_display_uri_to_connect_uri(cls, values):
base = values.get("display_uri")
if base is None:
values.set("display_uri", values.get("connect_uri"))
return values


def get_metric_name(metric):
if not metric:
Expand Down Expand Up @@ -208,7 +219,7 @@ def construct_dashboard_from_api_data(self, dashboard_data):
created=AuditStamp(time=modified_ts, actor=modified_actor),
lastModified=AuditStamp(time=modified_ts, actor=modified_actor),
)
dashboard_url = f"{self.config.connect_uri}{dashboard_data.get('url', '')}"
dashboard_url = f"{self.config.display_uri}{dashboard_data.get('url', '')}"

chart_urns = []
raw_position_data = dashboard_data.get("position_json", "{}")
Expand Down Expand Up @@ -280,7 +291,7 @@ def construct_chart_from_chart_data(self, chart_data):
lastModified=AuditStamp(time=modified_ts, actor=modified_actor),
)
chart_type = chart_type_from_viz_type.get(chart_data.get("viz_type", ""))
chart_url = f"{self.config.connect_uri}{chart_data.get('url', '')}"
chart_url = f"{self.config.display_uri}{chart_data.get('url', '')}"

datasource_id = chart_data.get("datasource_id")
datasource_urn = self.get_datasource_urn_from_id(datasource_id)
Expand Down

0 comments on commit 865074e

Please sign in to comment.