Skip to content

Commit

Permalink
fix(ingestion/datahub): moved urn_pattern config to source config (da…
Browse files Browse the repository at this point in the history
  • Loading branch information
dushayntAW authored and sleeperdeep committed Jun 25, 2024
1 parent 3030aa9 commit 0b24236
Show file tree
Hide file tree
Showing 5 changed files with 17 additions and 22 deletions.
14 changes: 7 additions & 7 deletions metadata-ingestion/docs/sources/datahub/datahub_pre.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,13 +44,13 @@ source:
stateful_ingestion:
enabled: true
ignore_old_state: true
urn_pattern: # URN pattern to ignore/include in the ingestion
deny:
# Ignores all datahub metadata where the urn matches the regex
- ^denied.urn.*
allow:
# Ingests all datahub metadata where the urn matches the regex.
- ^allowed.urn.*
urn_pattern: # URN pattern to ignore/include in the ingestion
deny:
# Ignores all datahub metadata where the urn matches the regex
- ^denied.urn.*
allow:
# Ingests all datahub metadata where the urn matches the regex.
- ^allowed.urn.*
```
#### Limitations
Expand Down
6 changes: 3 additions & 3 deletions metadata-ingestion/docs/sources/datahub/datahub_recipe.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,15 @@ source:
stateful_ingestion:
enabled: true
ignore_old_state: false
extractor_config:
set_system_metadata: false # Replicate system metadata
urn_pattern:
urn_pattern:
deny:
# Ignores all datahub metadata where the urn matches the regex
- ^denied.urn.*
allow:
# Ingests all datahub metadata where the urn matches the regex.
- ^allowed.urn.*
extractor_config:
set_system_metadata: false # Replicate system metadata

# Here, we write to a DataHub instance
# You can also use a different sink, e.g. to write the data to a file instead
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,7 @@

from datahub.cli.cli_utils import get_url_and_token
from datahub.configuration import config_loader
from datahub.configuration.common import (
AllowDenyPattern,
ConfigModel,
DynamicTypedConfig,
)
from datahub.configuration.common import ConfigModel, DynamicTypedConfig
from datahub.ingestion.graph.client import DatahubClientConfig
from datahub.ingestion.sink.file import FileSinkConfig

Expand All @@ -25,7 +21,6 @@
class SourceConfig(DynamicTypedConfig):
extractor: str = "generic"
extractor_config: dict = Field(default_factory=dict)
urn_pattern: AllowDenyPattern = Field(default=AllowDenyPattern())


class ReporterConfig(DynamicTypedConfig):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

from pydantic import Field, root_validator

from datahub.configuration.common import AllowDenyPattern
from datahub.configuration.kafka import KafkaConsumerConnectionConfig
from datahub.ingestion.source.sql.sql_config import SQLAlchemyConnectionConfig
from datahub.ingestion.source.state.stateful_ingestion_base import (
Expand Down Expand Up @@ -80,6 +81,8 @@ class DataHubSourceConfig(StatefulIngestionConfigBase):
hidden_from_docs=True,
)

urn_pattern: AllowDenyPattern = Field(default=AllowDenyPattern())

@root_validator(skip_on_failure=True)
def check_ingesting_data(cls, values):
if (
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,12 +40,9 @@ def __init__(self, config: DataHubSourceConfig, ctx: PipelineContext):
super().__init__(config, ctx)
self.config = config

if (
ctx.pipeline_config
and ctx.pipeline_config.source
and ctx.pipeline_config.source.urn_pattern
):
self.urn_pattern = ctx.pipeline_config.source.urn_pattern
if self.config.urn_pattern:
self.urn_pattern = self.config.urn_pattern

self.report: DataHubSourceReport = DataHubSourceReport()
self.stateful_ingestion_handler = StatefulDataHubIngestionHandler(self)

Expand Down

0 comments on commit 0b24236

Please sign in to comment.