From 3b9b0d5c29c89164cbc16353f11435c9b7505421 Mon Sep 17 00:00:00 2001 From: Chris Collins Date: Fri, 24 Jun 2022 12:46:36 -0400 Subject: [PATCH 1/3] WIP --- .../glossaryTerm/GlossaryTermEntity.tsx | 19 + .../profile/sidebar/SidebarAboutSection.tsx | 11 +- .../Documentation/components/LinkList.tsx | 32 +- .../src/app/entity/shared/types.ts | 1 + .../src/assets/datahub_ingestion_schema.json | 10055 ++++++++++++++++ 5 files changed, 10115 insertions(+), 3 deletions(-) create mode 100644 datahub-web-react/src/assets/datahub_ingestion_schema.json diff --git a/datahub-web-react/src/app/entity/glossaryTerm/GlossaryTermEntity.tsx b/datahub-web-react/src/app/entity/glossaryTerm/GlossaryTermEntity.tsx index 5c9e9fc8a92c7..0a3577eec910d 100644 --- a/datahub-web-react/src/app/entity/glossaryTerm/GlossaryTermEntity.tsx +++ b/datahub-web-react/src/app/entity/glossaryTerm/GlossaryTermEntity.tsx @@ -118,12 +118,31 @@ export class GlossaryTermEntity implements Entity { }; getOverridePropertiesFromEntity = (glossaryTerm?: GlossaryTerm | null): GenericEntityProperties => { + // let institutionalMemory = glossaryTerm?.institutionalMemory; + // if (glossaryTerm?.properties?.sourceUrl) { + // const sourceInfo = { + // url: glossaryTerm.properties.sourceUrl, + // label: 'Definition', + // } as InstitutionalMemoryMetadata; + + // if (glossaryTerm.institutionalMemory) { + // const elements = glossaryTerm.institutionalMemory.elements || []; + // const updatedElements = [...elements, sourceInfo]; + // institutionalMemory = { ...glossaryTerm.institutionalMemory, elements: updatedElements }; + // } else { + // institutionalMemory = { elements: [sourceInfo], __typename: 'InstitutionalMemory' }; + // } + // } + // if dataset has subTypes filled out, pick the most specific subtype and return it return { customProperties: glossaryTerm?.properties?.customProperties, + // institutionalMemory, }; }; + // if + renderSearch = (result: SearchResult) => { return this.renderPreview(PreviewType.SEARCH, result.entity as GlossaryTerm); }; diff --git a/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/SidebarAboutSection.tsx b/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/SidebarAboutSection.tsx index 61bae85408c8f..94cef6f327c4a 100644 --- a/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/SidebarAboutSection.tsx +++ b/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/SidebarAboutSection.tsx @@ -49,6 +49,9 @@ export const SidebarAboutSection = ({ properties }: { properties?: Props }) => { const description = entityData?.editableProperties?.description || entityData?.properties?.description; const links = entityData?.institutionalMemory?.elements || []; + console.log('entityData', entityData?.properties?.sourceUrl); + const sourceUrl = entityData?.properties?.sourceUrl; + const isUntouched = !description && !(links?.length > 0); return ( @@ -94,8 +97,14 @@ export const SidebarAboutSection = ({ properties }: { properties?: Props }) => { )} - {links?.length > 0 ? ( + {links?.length > 0 || !!sourceUrl ? ( + {sourceUrl && ( + + + Definition + + )} {(links || []).map((link) => ( Promise; }; @@ -37,6 +41,7 @@ export const LinkList = ({ refetch }: LinkListProps) => { const entityRegistry = useEntityRegistry(); const [removeLinkMutation] = useRemoveLinkMutation(); const links = entityData?.institutionalMemory?.elements || []; + const sourceUrl = entityData?.properties?.sourceUrl; const handleDeleteLink = async (linkUrl: string) => { try { @@ -55,6 +60,29 @@ export const LinkList = ({ refetch }: LinkListProps) => { return entityData ? ( <> + {sourceUrl && ( + ( + + + + + + + Definition + + + } + /> + + )} + /> + )} + {sourceUrl && links.length > 0 && } {links.length > 0 && ( ; qualifiedName?: Maybe; + sourceUrl?: Maybe; }>; globalTags?: Maybe; glossaryTerms?: Maybe; diff --git a/datahub-web-react/src/assets/datahub_ingestion_schema.json b/datahub-web-react/src/assets/datahub_ingestion_schema.json new file mode 100644 index 0000000000000..a9819d98746fa --- /dev/null +++ b/datahub-web-react/src/assets/datahub_ingestion_schema.json @@ -0,0 +1,10055 @@ +{ + "$schema": "http://json-schema.org/draft-04/schema#", + "id": "https://json.schemastore.org/datahub-ingestion", + "title": "Datahub Ingestion", + "description": "Root schema of Datahub Ingestion", + "definitions": { + "console_sink": { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "console" + ] + } + }, + "required": [ + "type" + ] + }, + "file_sink": { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "file" + ] + }, + "config": { + "$ref": "#/definitions/file_sink_config" + } + }, + "required": [ + "type", + "config" + ] + }, + "file_sink_config": { + "type": "object", + "properties": { + "filename": { + "description": "Path to file to write to.", + "type": "string" + } + }, + "required": [ + "filename" + ], + "additionalProperties": false + }, + "datahub_rest_sink": { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "datahub-rest" + ] + }, + "config": { + "$ref": "#/definitions/datahub_rest_sink_config" + } + }, + "required": [ + "type", + "config" + ], + "additionalProperties": false + }, + "datahub_rest_sink_config": { + "type": "object", + "properties": { + "ca_certificate_path": { + "type": "string", + "description": "Path to CA certificate for HTTPS communications." + }, + "max_threads": { + "type": "number", + "description": "Experimental: Max parallelism for REST API calls", + "default": 1 + }, + "retry_status_codes": { + "type": "array", + "items": { + "type": "number" + }, + "description": "Retry HTTP request also on these status codes", + "default": [ + 429, + 502, + 503, + 504 + ] + }, + "server": { + "type": "string", + "description": "URL of DataHub GMS endpoint." + }, + "timeout_sec": { + "type": "number", + "description": "Per-HTTP request timeout.", + "default": 30 + }, + "token": { + "type": "string", + "description": "Bearer token used for authentication." + }, + "extra_headers": { + "type": "string", + "description": "Extra headers which will be added to the request." + } + }, + "required": [ + "server" + ], + "additionalProperties": false + }, + "datahub_kafka_sink": { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "datahub-kafka" + ] + }, + "config": { + "$ref": "#/definitions/datahub_kafka_sink_config" + } + }, + "required": [ + "type", + "config" + ], + "additionalProperties": false + }, + "datahub_kafka_sink_config": { + "type": "object", + "properties": { + "connection": { + "type": "object", + "properties": { + "bootstrap": { + "type": "string", + "description": "Kafka bootstrap URL.", + "default": "localhost:9092" + }, + "producer_config": { + "type": "object", + "description": "Passed to https://docs.confluent.io/platform/current/clients/confluent-kafka-python/html/index.html#confluent_kafka.SerializingProducer" + }, + "schema_registry_url": { + "type": "string", + "description": "URL of schema registry being used.", + "default": "http://localhost:8081" + }, + "schema_registry_config": { + "type": "object", + "description": "Passed to https://docs.confluent.io/platform/current/clients/confluent-kafka-python/html/index.html#confluent_kafka.schema_registry.SchemaRegistryClient" + } + }, + "additionalProperties": false, + "required": [ + "bootstrap", + "schema_registry_url" + ] + }, + "topic_routes": { + "type": "object", + "properties": { + "MetadataChangeEvent": { + "type": "string", + "description": "Overridden Kafka topic name for the MetadataChangeEvent", + "default": "MetadataChangeEvent" + }, + "MetadataChangeProposal": { + "type": "string", + "description": "Overridden Kafka topic name for the MetadataChangeProposal", + "default": "MetadataChangeProposal" + } + }, + "additionalProperties": false + } + }, + "required": [ + "connection" + ], + "additionalProperties": false + }, + "feast-legacy": { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "feast-legacy" + ] + }, + "config": { + "$ref": "#/definitions/feast-legacy_config" + } + }, + "required": [ + "type", + "config" + ] + }, + "feast-legacy_config": { + "title": "FeastConfig", + "description": "Any source that produces dataset urns in a single environment should inherit this class", + "type": "object", + "properties": { + "env": { + "title": "Env", + "description": "The environment that all assets produced by this connector belong to", + "default": "PROD", + "type": "string" + }, + "core_url": { + "title": "Core Url", + "description": "URL of Feast Core instance.", + "default": "localhost:6565", + "type": "string" + }, + "use_local_build": { + "title": "Use Local Build", + "description": "Whether to build Feast ingestion Docker image locally.", + "default": false, + "type": "boolean" + } + }, + "additionalProperties": false + }, + "redash": { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "redash" + ] + }, + "config": { + "$ref": "#/definitions/redash_config" + } + }, + "required": [ + "type", + "config" + ] + }, + "redash_config": { + "title": "RedashConfig", + "type": "object", + "properties": { + "connect_uri": { + "title": "Connect Uri", + "description": "Redash base URL.", + "default": "http://localhost:5000", + "type": "string" + }, + "api_key": { + "title": "Api Key", + "description": "Redash user API key.", + "default": "REDASH_API_KEY", + "type": "string" + }, + "dashboard_patterns": { + "title": "Dashboard Patterns", + "description": "regex patterns for dashboards to filter for ingestion.", + "default": { + "allow": [ + ".*" + ], + "deny": [], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "allOf": [ + { + "$ref": "#/definitions/AllowDenyPattern" + } + ] + }, + "chart_patterns": { + "title": "Chart Patterns", + "description": "regex patterns for charts to filter for ingestion.", + "default": { + "allow": [ + ".*" + ], + "deny": [], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "allOf": [ + { + "$ref": "#/definitions/AllowDenyPattern" + } + ] + }, + "skip_draft": { + "title": "Skip Draft", + "description": "Only ingest published dashboards and charts.", + "default": true, + "type": "boolean" + }, + "page_size": { + "title": "Page Size", + "description": "Limit on number of items to be queried at once.", + "default": 25, + "type": "integer" + }, + "api_page_limit": { + "title": "Api Page Limit", + "description": "Limit on number of pages queried for ingesting dashboards and charts API during pagination.", + "default": 9223372036854775807, + "type": "integer" + }, + "parallelism": { + "title": "Parallelism", + "description": "Parallelism to use while processing.", + "default": 1, + "type": "integer" + }, + "parse_table_names_from_sql": { + "title": "Parse Table Names From Sql", + "description": "See note below.", + "default": false, + "type": "boolean" + }, + "sql_parser": { + "title": "Sql Parser", + "description": "custom SQL parser. See note below for details.", + "default": "datahub.utilities.sql_parser.DefaultSQLParser", + "type": "string" + }, + "env": { + "title": "Env", + "description": "Environment to use in namespace when constructing URNs.", + "default": "PROD", + "type": "string" + } + }, + "additionalProperties": false + }, + "iceberg": { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "iceberg" + ] + }, + "config": { + "$ref": "#/definitions/iceberg_config" + } + }, + "required": [ + "type", + "config" + ] + }, + "iceberg_config": { + "title": "IcebergSourceConfig", + "description": "Any source that is a primary producer of Dataset metadata should inherit this class", + "type": "object", + "properties": { + "env": { + "title": "Env", + "description": "The environment that all assets produced by this connector belong to", + "default": "PROD", + "type": "string" + }, + "platform": { + "title": "Platform", + "description": "The platform that this source connects to", + "type": "string" + }, + "platform_instance": { + "title": "Platform Instance", + "description": "The instance of the platform that all assets produced by this recipe belong to", + "type": "string" + }, + "adls": { + "title": "Adls", + "description": "[Azure Data Lake Storage](https://docs.microsoft.com/en-us/azure/storage/blobs/data-lake-storage-introduction) to crawl for Iceberg tables. This is one filesystem type supported by this source and **only one can be configured**.", + "allOf": [ + { + "$ref": "#/definitions/AdlsSourceConfig" + } + ] + }, + "localfs": { + "title": "Localfs", + "description": "Local path to crawl for Iceberg tables. This is one filesystem type supported by this source and **only one can be configured**.", + "type": "string" + }, + "max_path_depth": { + "title": "Max Path Depth", + "description": "Maximum folder depth to crawl for Iceberg tables. Folders deeper than this value will be silently ignored.", + "default": 2, + "type": "integer" + }, + "table_pattern": { + "title": "Table Pattern", + "description": "Regex patterns for tables to filter in ingestion.", + "default": { + "allow": [ + ".*" + ], + "deny": [], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "allOf": [ + { + "$ref": "#/definitions/AllowDenyPattern" + } + ] + }, + "user_ownership_property": { + "title": "User Ownership Property", + "description": "Iceberg table property to look for a `CorpUser` owner. Can only hold a single user value. If property has no value, no owner information will be emitted.", + "default": "owner", + "type": "string" + }, + "group_ownership_property": { + "title": "Group Ownership Property", + "description": "Iceberg table property to look for a `CorpGroup` owner. Can only hold a single group value. If property has no value, no owner information will be emitted.", + "type": "string" + }, + "profiling": { + "title": "Profiling", + "default": { + "enabled": false, + "include_field_null_count": true, + "include_field_min_value": true, + "include_field_max_value": true + }, + "allOf": [ + { + "$ref": "#/definitions/IcebergProfilingConfig" + } + ] + } + }, + "additionalProperties": false + }, + "pulsar": { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "pulsar" + ] + }, + "config": { + "$ref": "#/definitions/pulsar_config" + } + }, + "required": [ + "type", + "config" + ] + }, + "pulsar_config": { + "title": "PulsarSourceConfig", + "description": "Base configuration class for stateful ingestion for source configs to inherit from.", + "type": "object", + "properties": { + "env": { + "title": "Env", + "default": "PROD", + "type": "string" + }, + "platform": { + "title": "Platform", + "description": "The platform that this source connects to", + "type": "string" + }, + "platform_instance": { + "title": "Platform Instance", + "description": "The instance of the platform that all assets produced by this recipe belong to", + "type": "string" + }, + "stateful_ingestion": { + "title": "Stateful Ingestion", + "description": "see Stateful Ingestion", + "allOf": [ + { + "$ref": "#/definitions/PulsarSourceStatefulIngestionConfig" + } + ] + }, + "web_service_url": { + "title": "Web Service Url", + "description": "The web URL for the cluster.", + "default": "http://localhost:8080", + "type": "string" + }, + "timeout": { + "title": "Timeout", + "description": "Timout setting, how long to wait for the Pulsar rest api to send data before giving up", + "default": 5, + "type": "integer" + }, + "issuer_url": { + "title": "Issuer Url", + "description": "The complete URL for a Custom Authorization Server. Mandatory for OAuth based authentication.", + "type": "string" + }, + "client_id": { + "title": "Client Id", + "description": "The application's client ID", + "type": "string" + }, + "client_secret": { + "title": "Client Secret", + "description": "The application's client secret", + "type": "string" + }, + "token": { + "title": "Token", + "description": "The access token for the application. Mandatory for token based authentication.", + "type": "string" + }, + "verify_ssl": { + "title": "Verify Ssl", + "description": "Either a boolean, in which case it controls whether we verify the server's TLS certificate, or a string, in which case it must be a path to a CA bundle to use.", + "default": true, + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "string" + } + ] + }, + "tenant_patterns": { + "title": "Tenant Patterns", + "description": "List of regex patterns for tenants to include/exclude from ingestion. By default all tenants are allowed.", + "default": { + "allow": [ + ".*" + ], + "deny": [ + "pulsar" + ], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "allOf": [ + { + "$ref": "#/definitions/AllowDenyPattern" + } + ] + }, + "namespace_patterns": { + "title": "Namespace Patterns", + "description": "List of regex patterns for namespaces to include/exclude from ingestion. By default the functions namespace is denied.", + "default": { + "allow": [ + ".*" + ], + "deny": [ + "public/functions" + ], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "allOf": [ + { + "$ref": "#/definitions/AllowDenyPattern" + } + ] + }, + "topic_patterns": { + "title": "Topic Patterns", + "description": "List of regex patterns for topics to include/exclude from ingestion. By default the Pulsar system topics are denied.", + "default": { + "allow": [ + ".*" + ], + "deny": [ + "/__.*$" + ], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "allOf": [ + { + "$ref": "#/definitions/AllowDenyPattern" + } + ] + }, + "exclude_individual_partitions": { + "title": "Exclude Individual Partitions", + "description": "Extract each individual partitioned topic. e.g. when turned off a topic with 100 partitions will result in 100 Datesets.", + "default": true, + "type": "boolean" + }, + "tenants": { + "title": "Tenants", + "description": "Listing all tenants requires superUser role, alternative you can set a list of tenants you want to scrape using the tenant admin role", + "default": [], + "type": "array", + "items": { + "type": "string" + } + }, + "domain": { + "title": "Domain", + "description": "Domain patterns", + "type": "object", + "additionalProperties": { + "$ref": "#/definitions/AllowDenyPattern" + } + }, + "oid_config": { + "title": "Oid Config", + "description": "Placeholder for OpenId discovery document", + "type": "object" + } + }, + "additionalProperties": false + }, + "looker": { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "looker" + ] + }, + "config": { + "$ref": "#/definitions/looker_config" + } + }, + "required": [ + "type", + "config" + ] + }, + "looker_config": { + "title": "LookerDashboardSourceConfig", + "description": "Any source that is a primary producer of Dataset metadata should inherit this class", + "type": "object", + "properties": { + "env": { + "title": "Env", + "description": "The environment that all assets produced by this connector belong to", + "default": "PROD", + "type": "string" + }, + "platform": { + "title": "Platform", + "description": "The platform that this source connects to", + "type": "string" + }, + "platform_instance": { + "title": "Platform Instance", + "description": "The instance of the platform that all assets produced by this recipe belong to", + "type": "string" + }, + "explore_naming_pattern": { + "title": "Explore Naming Pattern", + "description": "Pattern for providing dataset names to explores. Allowed variables are {project}, {model}, {name}. Default is `{model}.explore.{name}`", + "default": { + "allowed_vars": [ + "platform", + "env", + "project", + "model", + "name" + ], + "pattern": "{model}.explore.{name}", + "variables": null + }, + "allOf": [ + { + "$ref": "#/definitions/NamingPattern" + } + ] + }, + "explore_browse_pattern": { + "title": "Explore Browse Pattern", + "default": { + "allowed_vars": [ + "platform", + "env", + "project", + "model", + "name" + ], + "pattern": "/{env}/{platform}/{project}/explores/{model}.{name}", + "variables": null + }, + "allOf": [ + { + "$ref": "#/definitions/NamingPattern" + } + ] + }, + "view_naming_pattern": { + "title": "View Naming Pattern", + "description": "Pattern for providing dataset names to views. Allowed variables are `{project}`, `{model}`, `{name}`", + "default": { + "allowed_vars": [ + "platform", + "env", + "project", + "model", + "name" + ], + "pattern": "{project}.view.{name}", + "variables": null + }, + "allOf": [ + { + "$ref": "#/definitions/NamingPattern" + } + ] + }, + "view_browse_pattern": { + "title": "View Browse Pattern", + "description": "Pattern for providing browse paths to views. Allowed variables are `{project}`, `{model}`, `{name}`, `{platform}` and `{env}`", + "default": { + "allowed_vars": [ + "platform", + "env", + "project", + "model", + "name" + ], + "pattern": "/{env}/{platform}/{project}/views/{name}", + "variables": null + }, + "allOf": [ + { + "$ref": "#/definitions/NamingPattern" + } + ] + }, + "tag_measures_and_dimensions": { + "title": "Tag Measures And Dimensions", + "description": "When enabled, attaches tags to measures, dimensions and dimension groups to make them more discoverable. When disabled, adds this information to the description of the column.", + "default": true, + "type": "boolean" + }, + "platform_name": { + "title": "Platform Name", + "description": "Default platform name. Don't change.", + "default": "looker", + "type": "string" + }, + "github_info": { + "title": "Github Info", + "description": "Reference to your github location to enable easy navigation from DataHub to your LookML files", + "allOf": [ + { + "$ref": "#/definitions/GitHubInfo" + } + ] + }, + "client_id": { + "title": "Client Id", + "description": "Looker API client id.", + "type": "string" + }, + "client_secret": { + "title": "Client Secret", + "description": "Looker API client secret.", + "type": "string" + }, + "base_url": { + "title": "Base Url", + "description": "Url to your Looker instance: `https://company.looker.com:19999` or `https://looker.company.com`, or similar. Used for making API calls to Looker and constructing clickable dashboard and chart urls.", + "type": "string" + }, + "transport_options": { + "title": "Transport Options", + "description": "Populates the [TransportOptions](https://github.com/looker-open-source/sdk-codegen/blob/94d6047a0d52912ac082eb91616c1e7c379ab262/python/looker_sdk/rtl/transport.py#L70) struct for looker client", + "allOf": [ + { + "$ref": "#/definitions/TransportOptionsConfig" + } + ] + }, + "dashboard_pattern": { + "title": "Dashboard Pattern", + "description": "Patterns for selecting dashboard ids that are to be included", + "default": { + "allow": [ + ".*" + ], + "deny": [], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "allOf": [ + { + "$ref": "#/definitions/AllowDenyPattern" + } + ] + }, + "chart_pattern": { + "title": "Chart Pattern", + "description": "Patterns for selecting chart ids that are to be included", + "default": { + "allow": [ + ".*" + ], + "deny": [], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "allOf": [ + { + "$ref": "#/definitions/AllowDenyPattern" + } + ] + }, + "include_deleted": { + "title": "Include Deleted", + "description": "Whether to include deleted dashboards.", + "default": false, + "type": "boolean" + }, + "extract_owners": { + "title": "Extract Owners", + "description": "When enabled, extracts ownership from Looker directly. When disabled, ownership is left empty for dashboards and charts.", + "default": true, + "type": "boolean" + }, + "actor": { + "title": "Actor", + "description": "This config is deprecated in favor of `extract_owners`. Previously, was the actor to use in ownership properties of ingested metadata.", + "type": "string" + }, + "strip_user_ids_from_email": { + "title": "Strip User Ids From Email", + "description": "When enabled, converts Looker user emails of the form name@domain.com to urn:li:corpuser:name when assigning ownership", + "default": false, + "type": "boolean" + }, + "skip_personal_folders": { + "title": "Skip Personal Folders", + "description": "Whether to skip ingestion of dashboards in personal folders. Setting this to True will only ingest dashboards in the Shared folder space.", + "default": false, + "type": "boolean" + }, + "max_threads": { + "title": "Max Threads", + "description": "Max parallelism for Looker API calls. Defaults to cpuCount or 40", + "default": 10, + "type": "integer" + }, + "external_base_url": { + "title": "External Base Url", + "description": "Optional URL to use when constructing external URLs to Looker if the `base_url` is not the correct one to use. For example, `https://looker-public.company.com`. If not provided, the external base URL will default to `base_url`.", + "type": "string" + } + }, + "required": [ + "client_id", + "client_secret", + "base_url" + ], + "additionalProperties": false + }, + "sqlalchemy": { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "sqlalchemy" + ] + }, + "config": { + "$ref": "#/definitions/sqlalchemy_config" + } + }, + "required": [ + "type", + "config" + ] + }, + "sqlalchemy_config": { + "title": "SQLAlchemyGenericConfig", + "description": "Base configuration class for stateful ingestion for source configs to inherit from.", + "type": "object", + "properties": { + "env": { + "title": "Env", + "description": "The environment that all assets produced by this connector belong to", + "default": "PROD", + "type": "string" + }, + "platform": { + "title": "Platform", + "description": "Name of platform being ingested, used in constructing URNs.", + "type": "string" + }, + "platform_instance": { + "title": "Platform Instance", + "description": "The instance of the platform that all assets produced by this recipe belong to", + "type": "string" + }, + "stateful_ingestion": { + "$ref": "#/definitions/SQLAlchemyStatefulIngestionConfig" + }, + "options": { + "title": "Options", + "default": {}, + "type": "object" + }, + "schema_pattern": { + "title": "Schema Pattern", + "description": "regex patterns for schemas to filter in ingestion.", + "default": { + "allow": [ + ".*" + ], + "deny": [], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "allOf": [ + { + "$ref": "#/definitions/AllowDenyPattern" + } + ] + }, + "table_pattern": { + "title": "Table Pattern", + "description": "regex patterns for tables to filter in ingestion.", + "default": { + "allow": [ + ".*" + ], + "deny": [], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "allOf": [ + { + "$ref": "#/definitions/AllowDenyPattern" + } + ] + }, + "view_pattern": { + "title": "View Pattern", + "description": "regex patterns for views to filter in ingestion.", + "default": { + "allow": [ + ".*" + ], + "deny": [], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "allOf": [ + { + "$ref": "#/definitions/AllowDenyPattern" + } + ] + }, + "profile_pattern": { + "title": "Profile Pattern", + "description": "regex patterns for profiles to filter in ingestion, allowed by the `table_pattern`.", + "default": { + "allow": [ + ".*" + ], + "deny": [], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "allOf": [ + { + "$ref": "#/definitions/AllowDenyPattern" + } + ] + }, + "domain": { + "title": "Domain", + "description": " regex patterns for tables/schemas to descide domain_key domain key (domain_key can be any string like \"sales\".) There can be multiple domain key specified.", + "default": {}, + "type": "object", + "additionalProperties": { + "$ref": "#/definitions/AllowDenyPattern" + } + }, + "include_views": { + "title": "Include Views", + "description": "Whether views should be ingested.", + "default": true, + "type": "boolean" + }, + "include_tables": { + "title": "Include Tables", + "description": "Whether tables should be ingested.", + "default": true, + "type": "boolean" + }, + "profiling": { + "title": "Profiling", + "default": { + "enabled": false, + "limit": null, + "offset": null, + "report_dropped_profiles": false, + "turn_off_expensive_profiling_metrics": false, + "profile_table_level_only": false, + "include_field_null_count": true, + "include_field_min_value": true, + "include_field_max_value": true, + "include_field_mean_value": true, + "include_field_median_value": true, + "include_field_stddev_value": true, + "include_field_quantiles": false, + "include_field_distinct_value_frequencies": false, + "include_field_histogram": false, + "include_field_sample_values": true, + "allow_deny_patterns": { + "allow": [ + ".*" + ], + "deny": [], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "max_number_of_fields_to_profile": null, + "profile_if_updated_since_days": 1, + "max_workers": 50, + "query_combiner_enabled": true, + "catch_exceptions": true, + "partition_profiling_enabled": true, + "bigquery_temp_table_schema": null, + "partition_datetime": null + }, + "allOf": [ + { + "$ref": "#/definitions/GEProfilingConfig" + } + ] + }, + "connect_uri": { + "title": "Connect Uri", + "description": "URI of database to connect to. See https://docs.sqlalchemy.org/en/14/core/engines.html#database-urls", + "type": "string" + } + }, + "required": [ + "platform", + "connect_uri" + ], + "additionalProperties": false + }, + "azure-ad": { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "azure-ad" + ] + }, + "config": { + "$ref": "#/definitions/azure-ad_config" + } + }, + "required": [ + "type", + "config" + ] + }, + "azure-ad_config": { + "title": "AzureADConfig", + "description": "Config to create a token and connect to Azure AD instance", + "type": "object", + "properties": { + "client_id": { + "title": "Client Id", + "description": "Application ID. Found in your app registration on Azure AD Portal", + "type": "string" + }, + "tenant_id": { + "title": "Tenant Id", + "description": "Directory ID. Found in your app registration on Azure AD Portal", + "type": "string" + }, + "client_secret": { + "title": "Client Secret", + "description": "Client secret. Found in your app registration on Azure AD Portal", + "type": "string" + }, + "authority": { + "title": "Authority", + "description": "The authority (https://docs.microsoft.com/en-us/azure/active-directory/develop/msal-client-application-configuration) is a URL that indicates a directory that MSAL can request tokens from.", + "type": "string" + }, + "token_url": { + "title": "Token Url", + "description": "The token URL that acquires a token from Azure AD for authorizing requests. This source will only work with v1.0 endpoint.", + "type": "string" + }, + "redirect": { + "title": "Redirect", + "description": "Redirect URI. Found in your app registration on Azure AD Portal.", + "default": "https://login.microsoftonline.com/common/oauth2/nativeclient", + "type": "string" + }, + "graph_url": { + "title": "Graph Url", + "description": "[Microsoft Graph API endpoint](https://docs.microsoft.com/en-us/graph/use-the-api)", + "default": "https://graph.microsoft.com/v1.0", + "type": "string" + }, + "azure_ad_response_to_username_attr": { + "title": "Azure Ad Response To Username Attr", + "description": "Which Azure AD User Response attribute to use as input to DataHub username mapping.", + "default": "userPrincipalName", + "type": "string" + }, + "azure_ad_response_to_username_regex": { + "title": "Azure Ad Response To Username Regex", + "description": "A regex used to parse the DataHub username from the attribute specified in `azure_ad_response_to_username_attr`.", + "default": "(.*)", + "type": "string" + }, + "azure_ad_response_to_groupname_attr": { + "title": "Azure Ad Response To Groupname Attr", + "description": "Which Azure AD Group Response attribute to use as input to DataHub group name mapping.", + "default": "displayName", + "type": "string" + }, + "azure_ad_response_to_groupname_regex": { + "title": "Azure Ad Response To Groupname Regex", + "description": "A regex used to parse the DataHub group name from the attribute specified in `azure_ad_response_to_groupname_attr`.", + "default": "(.*)", + "type": "string" + }, + "ingest_users": { + "title": "Ingest Users", + "description": "Whether users should be ingested into DataHub.", + "default": true, + "type": "boolean" + }, + "ingest_groups": { + "title": "Ingest Groups", + "description": "Whether groups should be ingested into DataHub.", + "default": true, + "type": "boolean" + }, + "ingest_group_membership": { + "title": "Ingest Group Membership", + "description": "Whether group membership should be ingested into DataHub. ingest_groups must be True if this is True.", + "default": true, + "type": "boolean" + }, + "ingest_groups_users": { + "title": "Ingest Groups Users", + "description": "This option is useful only when `ingest_users` is set to False and `ingest_group_membership` to True. As effect, only the users which belongs to the selected groups will be ingested.", + "default": true, + "type": "boolean" + }, + "users_pattern": { + "title": "Users Pattern", + "description": "regex patterns for users to filter in ingestion.", + "default": { + "allow": [ + ".*" + ], + "deny": [], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "allOf": [ + { + "$ref": "#/definitions/AllowDenyPattern" + } + ] + }, + "groups_pattern": { + "title": "Groups Pattern", + "description": "regex patterns for groups to include in ingestion.", + "default": { + "allow": [ + ".*" + ], + "deny": [], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "allOf": [ + { + "$ref": "#/definitions/AllowDenyPattern" + } + ] + }, + "filtered_tracking": { + "title": "Filtered Tracking", + "description": "If enabled, report will contain names of filtered users and groups.", + "default": true, + "type": "boolean" + }, + "mask_group_id": { + "title": "Mask Group Id", + "description": "Whether workunit ID's for groups should be masked to avoid leaking sensitive information.", + "default": true, + "type": "boolean" + }, + "mask_user_id": { + "title": "Mask User Id", + "description": "Whether workunit ID's for users should be masked to avoid leaking sensitive information.", + "default": true, + "type": "boolean" + } + }, + "required": [ + "client_id", + "tenant_id", + "client_secret", + "authority", + "token_url" + ], + "additionalProperties": false + }, + "starburst-trino-usage": { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "starburst-trino-usage" + ] + }, + "config": { + "$ref": "#/definitions/starburst-trino-usage_config" + } + }, + "required": [ + "type", + "config" + ] + }, + "starburst-trino-usage_config": { + "title": "TrinoUsageConfig", + "description": "Base configuration class for stateful ingestion for source configs to inherit from.", + "type": "object", + "properties": { + "bucket_duration": { + "description": "Size of the time window to aggregate usage stats.", + "default": "DAY", + "allOf": [ + { + "$ref": "#/definitions/BucketDuration" + } + ] + }, + "end_time": { + "title": "End Time", + "description": "Latest date of usage to consider. Default: Last full day in UTC (or hour, depending on `bucket_duration`)", + "type": "string", + "format": "date-time" + }, + "start_time": { + "title": "Start Time", + "description": "Earliest date of usage to consider. Default: Last full day in UTC (or hour, depending on `bucket_duration`)", + "type": "string", + "format": "date-time" + }, + "top_n_queries": { + "title": "Top N Queries", + "description": "Number of top queries to save to each table.", + "default": 10, + "exclusiveMinimum": 0, + "type": "integer" + }, + "user_email_pattern": { + "title": "User Email Pattern", + "description": "regex patterns for user emails to filter in usage.", + "default": { + "allow": [ + ".*" + ], + "deny": [], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "allOf": [ + { + "$ref": "#/definitions/AllowDenyPattern" + } + ] + }, + "include_operational_stats": { + "title": "Include Operational Stats", + "description": "Whether to display operational stats.", + "default": true, + "type": "boolean" + }, + "include_read_operational_stats": { + "title": "Include Read Operational Stats", + "description": "Whether to report read operational stats. Experimental.", + "default": false, + "type": "boolean" + }, + "format_sql_queries": { + "title": "Format Sql Queries", + "description": "Whether to format sql queries", + "default": false, + "type": "boolean" + }, + "include_top_n_queries": { + "title": "Include Top N Queries", + "description": "Whether to ingest the top_n_queries.", + "default": true, + "type": "boolean" + }, + "env": { + "title": "Env", + "description": "The environment that all assets produced by this connector belong to", + "default": "PROD", + "type": "string" + }, + "platform": { + "title": "Platform", + "description": "The platform that this source connects to", + "type": "string" + }, + "platform_instance": { + "title": "Platform Instance", + "description": "The instance of the platform that all assets produced by this recipe belong to", + "type": "string" + }, + "stateful_ingestion": { + "$ref": "#/definitions/SQLAlchemyStatefulIngestionConfig" + }, + "options": { + "title": "Options", + "default": {}, + "type": "object" + }, + "schema_pattern": { + "title": "Schema Pattern", + "description": "regex patterns for schemas to filter in ingestion.", + "default": { + "allow": [ + ".*" + ], + "deny": [], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "allOf": [ + { + "$ref": "#/definitions/AllowDenyPattern" + } + ] + }, + "table_pattern": { + "title": "Table Pattern", + "description": "regex patterns for tables to filter in ingestion.", + "default": { + "allow": [ + ".*" + ], + "deny": [], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "allOf": [ + { + "$ref": "#/definitions/AllowDenyPattern" + } + ] + }, + "view_pattern": { + "title": "View Pattern", + "description": "regex patterns for views to filter in ingestion.", + "default": { + "allow": [ + ".*" + ], + "deny": [], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "allOf": [ + { + "$ref": "#/definitions/AllowDenyPattern" + } + ] + }, + "profile_pattern": { + "title": "Profile Pattern", + "description": "regex patterns for profiles to filter in ingestion, allowed by the `table_pattern`.", + "default": { + "allow": [ + ".*" + ], + "deny": [], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "allOf": [ + { + "$ref": "#/definitions/AllowDenyPattern" + } + ] + }, + "domain": { + "title": "Domain", + "description": " regex patterns for tables/schemas to descide domain_key domain key (domain_key can be any string like \"sales\".) There can be multiple domain key specified.", + "default": {}, + "type": "object", + "additionalProperties": { + "$ref": "#/definitions/AllowDenyPattern" + } + }, + "include_views": { + "title": "Include Views", + "description": "Whether views should be ingested.", + "default": true, + "type": "boolean" + }, + "include_tables": { + "title": "Include Tables", + "description": "Whether tables should be ingested.", + "default": true, + "type": "boolean" + }, + "profiling": { + "title": "Profiling", + "default": { + "enabled": false, + "limit": null, + "offset": null, + "report_dropped_profiles": false, + "turn_off_expensive_profiling_metrics": false, + "profile_table_level_only": false, + "include_field_null_count": true, + "include_field_min_value": true, + "include_field_max_value": true, + "include_field_mean_value": true, + "include_field_median_value": true, + "include_field_stddev_value": true, + "include_field_quantiles": false, + "include_field_distinct_value_frequencies": false, + "include_field_histogram": false, + "include_field_sample_values": true, + "allow_deny_patterns": { + "allow": [ + ".*" + ], + "deny": [], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "max_number_of_fields_to_profile": null, + "profile_if_updated_since_days": 1, + "max_workers": 50, + "query_combiner_enabled": true, + "catch_exceptions": true, + "partition_profiling_enabled": true, + "bigquery_temp_table_schema": null, + "partition_datetime": null + }, + "allOf": [ + { + "$ref": "#/definitions/GEProfilingConfig" + } + ] + }, + "username": { + "title": "Username", + "description": "username", + "type": "string" + }, + "password": { + "title": "Password", + "description": "password", + "type": "string", + "writeOnly": true, + "format": "password" + }, + "host_port": { + "title": "Host Port", + "description": "host URL", + "type": "string" + }, + "database": { + "title": "Database", + "description": "The name of the catalog from getting the usage", + "type": "string" + }, + "database_alias": { + "title": "Database Alias", + "description": "Alias to apply to database when ingesting.", + "type": "string" + }, + "scheme": { + "title": "Scheme", + "default": "trino", + "type": "string" + }, + "sqlalchemy_uri": { + "title": "Sqlalchemy Uri", + "description": "URI of database to connect to. See https://docs.sqlalchemy.org/en/14/core/engines.html#database-urls. Takes precedence over other connection parameters.", + "type": "string" + }, + "email_domain": { + "title": "Email Domain", + "description": "The email domain which will be appended to the users ", + "type": "string" + }, + "audit_catalog": { + "title": "Audit Catalog", + "description": "The catalog name where the audit table can be found ", + "type": "string" + }, + "audit_schema": { + "title": "Audit Schema", + "description": "The schema name where the audit table can be found", + "type": "string" + } + }, + "required": [ + "host_port", + "database", + "email_domain", + "audit_catalog", + "audit_schema" + ], + "additionalProperties": false + }, + "postgres": { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "postgres" + ] + }, + "config": { + "$ref": "#/definitions/postgres_config" + } + }, + "required": [ + "type", + "config" + ] + }, + "postgres_config": { + "title": "PostgresConfig", + "description": "Base configuration class for stateful ingestion for source configs to inherit from.", + "type": "object", + "properties": { + "env": { + "title": "Env", + "description": "The environment that all assets produced by this connector belong to", + "default": "PROD", + "type": "string" + }, + "platform": { + "title": "Platform", + "description": "The platform that this source connects to", + "type": "string" + }, + "platform_instance": { + "title": "Platform Instance", + "description": "The instance of the platform that all assets produced by this recipe belong to", + "type": "string" + }, + "stateful_ingestion": { + "$ref": "#/definitions/SQLAlchemyStatefulIngestionConfig" + }, + "options": { + "title": "Options", + "default": {}, + "type": "object" + }, + "schema_pattern": { + "title": "Schema Pattern", + "default": { + "allow": [ + ".*" + ], + "deny": [ + "information_schema" + ], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "allOf": [ + { + "$ref": "#/definitions/AllowDenyPattern" + } + ] + }, + "table_pattern": { + "title": "Table Pattern", + "description": "regex patterns for tables to filter in ingestion.", + "default": { + "allow": [ + ".*" + ], + "deny": [], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "allOf": [ + { + "$ref": "#/definitions/AllowDenyPattern" + } + ] + }, + "view_pattern": { + "title": "View Pattern", + "description": "regex patterns for views to filter in ingestion.", + "default": { + "allow": [ + ".*" + ], + "deny": [], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "allOf": [ + { + "$ref": "#/definitions/AllowDenyPattern" + } + ] + }, + "profile_pattern": { + "title": "Profile Pattern", + "description": "regex patterns for profiles to filter in ingestion, allowed by the `table_pattern`.", + "default": { + "allow": [ + ".*" + ], + "deny": [], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "allOf": [ + { + "$ref": "#/definitions/AllowDenyPattern" + } + ] + }, + "domain": { + "title": "Domain", + "description": " regex patterns for tables/schemas to descide domain_key domain key (domain_key can be any string like \"sales\".) There can be multiple domain key specified.", + "default": {}, + "type": "object", + "additionalProperties": { + "$ref": "#/definitions/AllowDenyPattern" + } + }, + "include_views": { + "title": "Include Views", + "description": "Whether views should be ingested.", + "default": true, + "type": "boolean" + }, + "include_tables": { + "title": "Include Tables", + "description": "Whether tables should be ingested.", + "default": true, + "type": "boolean" + }, + "profiling": { + "title": "Profiling", + "default": { + "enabled": false, + "limit": null, + "offset": null, + "report_dropped_profiles": false, + "turn_off_expensive_profiling_metrics": false, + "profile_table_level_only": false, + "include_field_null_count": true, + "include_field_min_value": true, + "include_field_max_value": true, + "include_field_mean_value": true, + "include_field_median_value": true, + "include_field_stddev_value": true, + "include_field_quantiles": false, + "include_field_distinct_value_frequencies": false, + "include_field_histogram": false, + "include_field_sample_values": true, + "allow_deny_patterns": { + "allow": [ + ".*" + ], + "deny": [], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "max_number_of_fields_to_profile": null, + "profile_if_updated_since_days": 1, + "max_workers": 50, + "query_combiner_enabled": true, + "catch_exceptions": true, + "partition_profiling_enabled": true, + "bigquery_temp_table_schema": null, + "partition_datetime": null + }, + "allOf": [ + { + "$ref": "#/definitions/GEProfilingConfig" + } + ] + }, + "username": { + "title": "Username", + "description": "username", + "type": "string" + }, + "password": { + "title": "Password", + "description": "password", + "type": "string", + "writeOnly": true, + "format": "password" + }, + "host_port": { + "title": "Host Port", + "description": "host URL", + "type": "string" + }, + "database": { + "title": "Database", + "description": "database (catalog)", + "type": "string" + }, + "database_alias": { + "title": "Database Alias", + "description": "Alias to apply to database when ingesting.", + "type": "string" + }, + "scheme": { + "title": "Scheme", + "description": "database scheme", + "default": "postgresql+psycopg2", + "type": "string" + }, + "sqlalchemy_uri": { + "title": "Sqlalchemy Uri", + "description": "URI of database to connect to. See https://docs.sqlalchemy.org/en/14/core/engines.html#database-urls. Takes precedence over other connection parameters.", + "type": "string" + } + }, + "required": [ + "host_port" + ], + "additionalProperties": false + }, + "tableau": { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "tableau" + ] + }, + "config": { + "$ref": "#/definitions/tableau_config" + } + }, + "required": [ + "type", + "config" + ] + }, + "tableau_config": { + "title": "TableauConfig", + "type": "object", + "properties": { + "connect_uri": { + "title": "Connect Uri", + "description": "Tableau host URL.", + "type": "string" + }, + "username": { + "title": "Username", + "description": "Tableau username, must be set if authenticating using username/password.", + "type": "string" + }, + "password": { + "title": "Password", + "description": "Tableau password, must be set if authenticating using username/password.", + "type": "string" + }, + "token_name": { + "title": "Token Name", + "description": "Tableau token name, must be set if authenticating using a personal access token.", + "type": "string" + }, + "token_value": { + "title": "Token Value", + "description": "Tableau token value, must be set if authenticating using a personal access token.", + "type": "string" + }, + "site": { + "title": "Site", + "description": "Tableau Site. Always required for Tableau Online. Use emptystring to connect with Default site on Tableau Server.", + "default": "", + "type": "string" + }, + "projects": { + "title": "Projects", + "description": "List of projects", + "default": [ + "default" + ], + "type": "array", + "items": { + "type": "string" + } + }, + "default_schema_map": { + "title": "Default Schema Map", + "description": "Default schema to use when schema is not found.", + "default": {}, + "type": "object" + }, + "ingest_tags": { + "title": "Ingest Tags", + "description": "Ingest Tags from source. This will override Tags entered from UI", + "default": false, + "type": "boolean" + }, + "ingest_owner": { + "title": "Ingest Owner", + "description": "Ingest Owner from source. This will override Owner info entered from UI", + "default": false, + "type": "boolean" + }, + "ingest_tables_external": { + "title": "Ingest Tables External", + "description": "Ingest details for tables external to (not embedded in) tableau as entities.", + "default": false, + "type": "boolean" + }, + "workbooks_page_size": { + "title": "Workbooks Page Size", + "description": "@deprecated(use page_size instead) Number of workbooks to query at a time using Tableau api.", + "type": "integer" + }, + "page_size": { + "title": "Page Size", + "description": "Number of metadata objects (e.g. CustomSQLTable, PublishedDatasource, etc) to query at a time using Tableau api.", + "default": 10, + "type": "integer" + }, + "env": { + "title": "Env", + "description": "Environment to use in namespace when constructing URNs.", + "default": "PROD", + "type": "string" + } + }, + "required": [ + "connect_uri" + ], + "additionalProperties": false + }, + "lookml": { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "lookml" + ] + }, + "config": { + "$ref": "#/definitions/lookml_config" + } + }, + "required": [ + "type", + "config" + ] + }, + "lookml_config": { + "title": "LookMLSourceConfig", + "description": "Any source that is a primary producer of Dataset metadata should inherit this class", + "type": "object", + "properties": { + "env": { + "title": "Env", + "description": "The environment that all assets produced by this connector belong to", + "default": "PROD", + "type": "string" + }, + "platform": { + "title": "Platform", + "description": "The platform that this source connects to", + "type": "string" + }, + "platform_instance": { + "title": "Platform Instance", + "description": "The instance of the platform that all assets produced by this recipe belong to", + "type": "string" + }, + "explore_naming_pattern": { + "title": "Explore Naming Pattern", + "description": "Pattern for providing dataset names to explores. Allowed variables are {project}, {model}, {name}. Default is `{model}.explore.{name}`", + "default": { + "allowed_vars": [ + "platform", + "env", + "project", + "model", + "name" + ], + "pattern": "{model}.explore.{name}", + "variables": null + }, + "allOf": [ + { + "$ref": "#/definitions/NamingPattern" + } + ] + }, + "explore_browse_pattern": { + "title": "Explore Browse Pattern", + "default": { + "allowed_vars": [ + "platform", + "env", + "project", + "model", + "name" + ], + "pattern": "/{env}/{platform}/{project}/explores/{model}.{name}", + "variables": null + }, + "allOf": [ + { + "$ref": "#/definitions/NamingPattern" + } + ] + }, + "view_naming_pattern": { + "title": "View Naming Pattern", + "description": "Pattern for providing dataset names to views. Allowed variables are `{project}`, `{model}`, `{name}`", + "default": { + "allowed_vars": [ + "platform", + "env", + "project", + "model", + "name" + ], + "pattern": "{project}.view.{name}", + "variables": null + }, + "allOf": [ + { + "$ref": "#/definitions/NamingPattern" + } + ] + }, + "view_browse_pattern": { + "title": "View Browse Pattern", + "description": "Pattern for providing browse paths to views. Allowed variables are `{project}`, `{model}`, `{name}`, `{platform}` and `{env}`", + "default": { + "allowed_vars": [ + "platform", + "env", + "project", + "model", + "name" + ], + "pattern": "/{env}/{platform}/{project}/views/{name}", + "variables": null + }, + "allOf": [ + { + "$ref": "#/definitions/NamingPattern" + } + ] + }, + "tag_measures_and_dimensions": { + "title": "Tag Measures And Dimensions", + "description": "When enabled, attaches tags to measures, dimensions and dimension groups to make them more discoverable. When disabled, adds this information to the description of the column.", + "default": true, + "type": "boolean" + }, + "platform_name": { + "title": "Platform Name", + "description": "Default platform name. Don't change.", + "default": "looker", + "type": "string" + }, + "github_info": { + "title": "Github Info", + "description": "Reference to your github location to enable easy navigation from DataHub to your LookML files", + "allOf": [ + { + "$ref": "#/definitions/GitHubInfo" + } + ] + }, + "base_folder": { + "title": "Base Folder", + "description": "Local filepath where the root of the LookML repo lives. This is typically the root folder where the `*.model.lkml` and `*.view.lkml` files are stored. e.g. If you have checked out your LookML repo under `/Users/jdoe/workspace/my-lookml-repo`, then set `base_folder` to `/Users/jdoe/workspace/my-lookml-repo`.", + "format": "directory-path", + "type": "string" + }, + "connection_to_platform_map": { + "title": "Connection To Platform Map", + "description": "A mapping of [Looker connection names](https://docs.looker.com/reference/model-params/connection-for-model) to DataHub platform, database, and schema values.", + "type": "object", + "additionalProperties": { + "$ref": "#/definitions/LookerConnectionDefinition" + } + }, + "model_pattern": { + "title": "Model Pattern", + "description": "List of regex patterns for LookML models to include in the extraction.", + "default": { + "allow": [ + ".*" + ], + "deny": [], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "allOf": [ + { + "$ref": "#/definitions/AllowDenyPattern" + } + ] + }, + "view_pattern": { + "title": "View Pattern", + "description": "List of regex patterns for LookML views to include in the extraction.", + "default": { + "allow": [ + ".*" + ], + "deny": [], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "allOf": [ + { + "$ref": "#/definitions/AllowDenyPattern" + } + ] + }, + "parse_table_names_from_sql": { + "title": "Parse Table Names From Sql", + "description": "See note below.", + "default": false, + "type": "boolean" + }, + "sql_parser": { + "title": "Sql Parser", + "description": "See note below.", + "default": "datahub.utilities.sql_parser.DefaultSQLParser", + "type": "string" + }, + "api": { + "$ref": "#/definitions/LookerAPIConfig" + }, + "project_name": { + "title": "Project Name", + "description": "Required if you don't specify the `api` section. The project name within which all the model files live. See (https://docs.looker.com/data-modeling/getting-started/how-project-works) to understand what the Looker project name should be. The simplest way to see your projects is to click on `Develop` followed by `Manage LookML Projects` in the Looker application.", + "type": "string" + }, + "transport_options": { + "title": "Transport Options", + "description": "Populates the [TransportOptions](https://github.com/looker-open-source/sdk-codegen/blob/94d6047a0d52912ac082eb91616c1e7c379ab262/python/looker_sdk/rtl/transport.py#L70) struct for looker client", + "allOf": [ + { + "$ref": "#/definitions/TransportOptionsConfig" + } + ] + }, + "max_file_snippet_length": { + "title": "Max File Snippet Length", + "description": "When extracting the view definition from a lookml file, the maximum number of characters to extract.", + "default": 512000, + "type": "integer" + } + }, + "required": [ + "base_folder" + ], + "additionalProperties": false + }, + "powerbi": { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "powerbi" + ] + }, + "config": { + "$ref": "#/definitions/powerbi_config" + } + }, + "required": [ + "type", + "config" + ] + }, + "powerbi_config": { + "title": "PowerBiDashboardSourceConfig", + "description": "Any source that produces dataset urns in a single environment should inherit this class", + "type": "object", + "properties": { + "env": { + "title": "Env", + "description": "The environment that all assets produced by this connector belong to", + "default": "PROD", + "type": "string" + }, + "tenant_id": { + "title": "Tenant Id", + "description": "Power BI tenant identifier.", + "type": "string" + }, + "workspace_id": { + "title": "Workspace Id", + "description": "Power BI workspace identifier.", + "type": "string" + }, + "dataset_type_mapping": { + "title": "Dataset Type Mapping", + "description": "Mapping of Power BI datasource type to Datahub dataset.", + "type": "object", + "additionalProperties": { + "type": "string" + } + }, + "client_id": { + "title": "Client Id", + "description": "Azure AD App client identifier.", + "type": "string" + }, + "client_secret": { + "title": "Client Secret", + "description": "Azure AD App client secret.", + "type": "string" + }, + "scan_timeout": { + "title": "Scan Timeout", + "description": "time in seconds to wait for Power BI metadata scan result.", + "default": 60, + "type": "integer" + }, + "scope": { + "title": "Scope", + "default": "https://analysis.windows.net/powerbi/api/.default", + "type": "string" + }, + "base_url": { + "title": "Base Url", + "default": "https://api.powerbi.com/v1.0/myorg/groups", + "type": "string" + }, + "admin_base_url": { + "title": "Admin Base Url", + "default": "https://api.powerbi.com/v1.0/myorg/admin", + "type": "string" + }, + "authority": { + "title": "Authority", + "default": "https://login.microsoftonline.com/", + "type": "string" + }, + "platform_name": { + "title": "Platform Name", + "default": "powerbi", + "type": "string" + }, + "platform_urn": { + "title": "Platform Urn", + "default": "urn:li:dataPlatform:powerbi", + "type": "string" + }, + "dashboard_pattern": { + "title": "Dashboard Pattern", + "default": { + "allow": [ + ".*" + ], + "deny": [], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "allOf": [ + { + "$ref": "#/definitions/AllowDenyPattern" + } + ] + }, + "chart_pattern": { + "title": "Chart Pattern", + "default": { + "allow": [ + ".*" + ], + "deny": [], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "allOf": [ + { + "$ref": "#/definitions/AllowDenyPattern" + } + ] + } + }, + "required": [ + "tenant_id", + "workspace_id", + "dataset_type_mapping", + "client_id", + "client_secret" + ], + "additionalProperties": false + }, + "kafka-connect": { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "kafka-connect" + ] + }, + "config": { + "$ref": "#/definitions/kafka-connect_config" + } + }, + "required": [ + "type", + "config" + ] + }, + "kafka-connect_config": { + "title": "KafkaConnectSourceConfig", + "description": "Any non-Dataset source that produces lineage to Datasets should inherit this class.\ne.g. Orchestrators, Pipelines, BI Tools etc.", + "type": "object", + "properties": { + "env": { + "title": "Env", + "description": "The environment that all assets produced by this connector belong to", + "default": "PROD", + "type": "string" + }, + "platform_instance_map": { + "title": "Platform Instance Map", + "description": "Platform instance mapping to use when constructing URNs. e.g.`platform_instance_map: { \"hive\": \"warehouse\" }`", + "type": "object", + "additionalProperties": { + "type": "string" + } + }, + "connect_uri": { + "title": "Connect Uri", + "description": "URI to connect to.", + "default": "http://localhost:8083/", + "type": "string" + }, + "username": { + "title": "Username", + "description": "Kafka Connect username.", + "type": "string" + }, + "password": { + "title": "Password", + "description": "Kafka Connect password.", + "type": "string" + }, + "cluster_name": { + "title": "Cluster Name", + "description": "Cluster to ingest from.", + "default": "connect-cluster", + "type": "string" + }, + "construct_lineage_workunits": { + "title": "Construct Lineage Workunits", + "description": "Whether to create the input and output Dataset entities", + "default": true, + "type": "boolean" + }, + "connector_patterns": { + "title": "Connector Patterns", + "description": "regex patterns for connectors to filter for ingestion.", + "default": { + "allow": [ + ".*" + ], + "deny": [], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "allOf": [ + { + "$ref": "#/definitions/AllowDenyPattern" + } + ] + }, + "provided_configs": { + "title": "Provided Configs", + "description": "Provided Configurations", + "type": "array", + "items": { + "$ref": "#/definitions/ProvidedConfig" + } + }, + "connect_to_platform_map": { + "title": "Connect To Platform Map", + "description": "Platform instance mapping when multiple instances for a platform is available. Entry for a platform should be in either `platform_instance_map` or `connect_to_platform_map`. e.g.`connect_to_platform_map: { \"postgres-connector-finance-db\": \"postgres\": \"core_finance_instance\" }`", + "type": "object" + } + }, + "additionalProperties": false + }, + "snowflake": { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "snowflake" + ] + }, + "config": { + "$ref": "#/definitions/snowflake_config" + } + }, + "required": [ + "type", + "config" + ] + }, + "snowflake_config": { + "title": "SnowflakeConfig", + "description": "Base configuration class for stateful ingestion for source configs to inherit from.", + "type": "object", + "properties": { + "env": { + "title": "Env", + "description": "The environment that all assets produced by this connector belong to", + "default": "PROD", + "type": "string" + }, + "platform": { + "title": "Platform", + "description": "The platform that this source connects to", + "type": "string" + }, + "platform_instance": { + "title": "Platform Instance", + "description": "The instance of the platform that all assets produced by this recipe belong to", + "type": "string" + }, + "stateful_ingestion": { + "$ref": "#/definitions/SQLAlchemyStatefulIngestionConfig" + }, + "options": { + "title": "Options", + "default": {}, + "type": "object" + }, + "schema_pattern": { + "title": "Schema Pattern", + "description": "regex patterns for schemas to filter in ingestion.", + "default": { + "allow": [ + ".*" + ], + "deny": [], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "allOf": [ + { + "$ref": "#/definitions/AllowDenyPattern" + } + ] + }, + "table_pattern": { + "title": "Table Pattern", + "description": "regex patterns for tables to filter in ingestion.", + "default": { + "allow": [ + ".*" + ], + "deny": [], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "allOf": [ + { + "$ref": "#/definitions/AllowDenyPattern" + } + ] + }, + "view_pattern": { + "title": "View Pattern", + "description": "regex patterns for views to filter in ingestion.", + "default": { + "allow": [ + ".*" + ], + "deny": [], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "allOf": [ + { + "$ref": "#/definitions/AllowDenyPattern" + } + ] + }, + "profile_pattern": { + "title": "Profile Pattern", + "description": "regex patterns for profiles to filter in ingestion, allowed by the `table_pattern`.", + "default": { + "allow": [ + ".*" + ], + "deny": [], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "allOf": [ + { + "$ref": "#/definitions/AllowDenyPattern" + } + ] + }, + "domain": { + "title": "Domain", + "description": " regex patterns for tables/schemas to descide domain_key domain key (domain_key can be any string like \"sales\".) There can be multiple domain key specified.", + "default": {}, + "type": "object", + "additionalProperties": { + "$ref": "#/definitions/AllowDenyPattern" + } + }, + "include_views": { + "title": "Include Views", + "description": "Whether views should be ingested.", + "default": true, + "type": "boolean" + }, + "include_tables": { + "title": "Include Tables", + "description": "Whether tables should be ingested.", + "default": true, + "type": "boolean" + }, + "profiling": { + "title": "Profiling", + "default": { + "enabled": false, + "limit": null, + "offset": null, + "report_dropped_profiles": false, + "turn_off_expensive_profiling_metrics": false, + "profile_table_level_only": false, + "include_field_null_count": true, + "include_field_min_value": true, + "include_field_max_value": true, + "include_field_mean_value": true, + "include_field_median_value": true, + "include_field_stddev_value": true, + "include_field_quantiles": false, + "include_field_distinct_value_frequencies": false, + "include_field_histogram": false, + "include_field_sample_values": true, + "allow_deny_patterns": { + "allow": [ + ".*" + ], + "deny": [], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "max_number_of_fields_to_profile": null, + "profile_if_updated_since_days": 1, + "max_workers": 50, + "query_combiner_enabled": true, + "catch_exceptions": true, + "partition_profiling_enabled": true, + "bigquery_temp_table_schema": null, + "partition_datetime": null + }, + "allOf": [ + { + "$ref": "#/definitions/GEProfilingConfig" + } + ] + }, + "bucket_duration": { + "description": "Size of the time window to aggregate usage stats.", + "default": "DAY", + "allOf": [ + { + "$ref": "#/definitions/BucketDuration" + } + ] + }, + "end_time": { + "title": "End Time", + "description": "Latest date of usage to consider. Default: Last full day in UTC (or hour, depending on `bucket_duration`)", + "type": "string", + "format": "date-time" + }, + "start_time": { + "title": "Start Time", + "description": "Earliest date of usage to consider. Default: Last full day in UTC (or hour, depending on `bucket_duration`)", + "type": "string", + "format": "date-time" + }, + "scheme": { + "title": "Scheme", + "default": "snowflake", + "type": "string" + }, + "username": { + "title": "Username", + "description": "Snowflake username.", + "type": "string" + }, + "password": { + "title": "Password", + "description": "Snowflake password.", + "type": "string", + "writeOnly": true, + "format": "password" + }, + "private_key_path": { + "title": "Private Key Path", + "description": "The path to the private key if using key pair authentication. See: https://docs.snowflake.com/en/user-guide/key-pair-auth.html", + "type": "string" + }, + "private_key_password": { + "title": "Private Key Password", + "description": "Password for your private key if using key pair authentication.", + "type": "string", + "writeOnly": true, + "format": "password" + }, + "oauth_config": { + "title": "Oauth Config", + "description": "oauth configuration - https://docs.snowflake.com/en/user-guide/python-connector-example.html#connecting-with-oauth", + "allOf": [ + { + "$ref": "#/definitions/OauthConfiguration" + } + ] + }, + "authentication_type": { + "title": "Authentication Type", + "description": "The type of authenticator to use when connecting to Snowflake. Supports \"DEFAULT_AUTHENTICATOR\", \"EXTERNAL_BROWSER_AUTHENTICATOR\" and \"KEY_PAIR_AUTHENTICATOR\".", + "default": "DEFAULT_AUTHENTICATOR", + "type": "string" + }, + "host_port": { + "title": "Host Port", + "description": "DEPRECATED: Snowflake account. e.g. abc48144", + "type": "string" + }, + "account_id": { + "title": "Account Id", + "description": "Snowflake account. e.g. abc48144", + "type": "string" + }, + "warehouse": { + "title": "Warehouse", + "description": "Snowflake warehouse.", + "type": "string" + }, + "role": { + "title": "Role", + "description": "Snowflake role.", + "type": "string" + }, + "include_table_lineage": { + "title": "Include Table Lineage", + "description": "If enabled, populates the snowflake table-to-table and s3-to-snowflake table lineage. Requires appropriate grants given to the role.", + "default": true, + "type": "boolean" + }, + "include_view_lineage": { + "title": "Include View Lineage", + "description": "If enabled, populates the snowflake view->table and table->view lineages (no view->view lineage yet). Requires appropriate grants given to the role, and include_table_lineage to be True.", + "default": true, + "type": "boolean" + }, + "connect_args": { + "title": "Connect Args", + "description": "Connect args to pass to Snowflake SqlAlchemy driver", + "type": "object" + }, + "check_role_grants": { + "title": "Check Role Grants", + "description": "If set to True then checks role grants at the beginning of the ingestion run. To be used for debugging purposes. If you think everything is working fine then set it to False. In some cases this can take long depending on how many roles you might have.", + "default": false, + "type": "boolean" + }, + "database_pattern": { + "title": "Database Pattern", + "default": { + "allow": [ + ".*" + ], + "deny": [ + "^UTIL_DB$", + "^SNOWFLAKE$", + "^SNOWFLAKE_SAMPLE_DATA$" + ], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "allOf": [ + { + "$ref": "#/definitions/AllowDenyPattern" + } + ] + }, + "provision_role": { + "$ref": "#/definitions/SnowflakeProvisionRoleConfig" + }, + "ignore_start_time_lineage": { + "title": "Ignore Start Time Lineage", + "default": false, + "type": "boolean" + }, + "upstream_lineage_in_report": { + "title": "Upstream Lineage In Report", + "default": false, + "type": "boolean" + } + }, + "additionalProperties": false + }, + "snowflake-usage": { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "snowflake-usage" + ] + }, + "config": { + "$ref": "#/definitions/snowflake-usage_config" + } + }, + "required": [ + "type", + "config" + ] + }, + "snowflake-usage_config": { + "title": "SnowflakeUsageConfig", + "description": "Base configuration class for stateful ingestion for source configs to inherit from.", + "type": "object", + "properties": { + "env": { + "title": "Env", + "description": "The environment that all assets produced by this connector belong to", + "default": "PROD", + "type": "string" + }, + "platform": { + "title": "Platform", + "description": "The platform that this source connects to", + "type": "string" + }, + "platform_instance": { + "title": "Platform Instance", + "description": "The instance of the platform that all assets produced by this recipe belong to", + "type": "string" + }, + "stateful_ingestion": { + "title": "Stateful Ingestion", + "description": "Stateful ingestion related configs", + "allOf": [ + { + "$ref": "#/definitions/SnowflakeStatefulIngestionConfig" + } + ] + }, + "bucket_duration": { + "description": "Size of the time window to aggregate usage stats.", + "default": "DAY", + "allOf": [ + { + "$ref": "#/definitions/BucketDuration" + } + ] + }, + "end_time": { + "title": "End Time", + "description": "Latest date of usage to consider. Default: Last full day in UTC (or hour, depending on `bucket_duration`)", + "type": "string", + "format": "date-time" + }, + "start_time": { + "title": "Start Time", + "description": "Earliest date of usage to consider. Default: Last full day in UTC (or hour, depending on `bucket_duration`)", + "type": "string", + "format": "date-time" + }, + "top_n_queries": { + "title": "Top N Queries", + "description": "Number of top queries to save to each table.", + "default": 10, + "exclusiveMinimum": 0, + "type": "integer" + }, + "user_email_pattern": { + "title": "User Email Pattern", + "description": "regex patterns for user emails to filter in usage.", + "default": { + "allow": [ + ".*" + ], + "deny": [], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "allOf": [ + { + "$ref": "#/definitions/AllowDenyPattern" + } + ] + }, + "include_operational_stats": { + "title": "Include Operational Stats", + "description": "Whether to display operational stats.", + "default": true, + "type": "boolean" + }, + "include_read_operational_stats": { + "title": "Include Read Operational Stats", + "description": "Whether to report read operational stats. Experimental.", + "default": false, + "type": "boolean" + }, + "format_sql_queries": { + "title": "Format Sql Queries", + "description": "Whether to format sql queries", + "default": false, + "type": "boolean" + }, + "include_top_n_queries": { + "title": "Include Top N Queries", + "description": "Whether to ingest the top_n_queries.", + "default": true, + "type": "boolean" + }, + "scheme": { + "title": "Scheme", + "default": "snowflake", + "type": "string" + }, + "username": { + "title": "Username", + "description": "Snowflake username.", + "type": "string" + }, + "password": { + "title": "Password", + "description": "Snowflake password.", + "type": "string", + "writeOnly": true, + "format": "password" + }, + "private_key_path": { + "title": "Private Key Path", + "description": "The path to the private key if using key pair authentication. See: https://docs.snowflake.com/en/user-guide/key-pair-auth.html", + "type": "string" + }, + "private_key_password": { + "title": "Private Key Password", + "description": "Password for your private key if using key pair authentication.", + "type": "string", + "writeOnly": true, + "format": "password" + }, + "oauth_config": { + "title": "Oauth Config", + "description": "oauth configuration - https://docs.snowflake.com/en/user-guide/python-connector-example.html#connecting-with-oauth", + "allOf": [ + { + "$ref": "#/definitions/OauthConfiguration" + } + ] + }, + "authentication_type": { + "title": "Authentication Type", + "description": "The type of authenticator to use when connecting to Snowflake. Supports \"DEFAULT_AUTHENTICATOR\", \"EXTERNAL_BROWSER_AUTHENTICATOR\" and \"KEY_PAIR_AUTHENTICATOR\".", + "default": "DEFAULT_AUTHENTICATOR", + "type": "string" + }, + "host_port": { + "title": "Host Port", + "description": "DEPRECATED: Snowflake account. e.g. abc48144", + "type": "string" + }, + "account_id": { + "title": "Account Id", + "description": "Snowflake account. e.g. abc48144", + "type": "string" + }, + "warehouse": { + "title": "Warehouse", + "description": "Snowflake warehouse.", + "type": "string" + }, + "role": { + "title": "Role", + "description": "Snowflake role.", + "type": "string" + }, + "include_table_lineage": { + "title": "Include Table Lineage", + "description": "If enabled, populates the snowflake table-to-table and s3-to-snowflake table lineage. Requires appropriate grants given to the role.", + "default": true, + "type": "boolean" + }, + "include_view_lineage": { + "title": "Include View Lineage", + "description": "If enabled, populates the snowflake view->table and table->view lineages (no view->view lineage yet). Requires appropriate grants given to the role, and include_table_lineage to be True.", + "default": true, + "type": "boolean" + }, + "connect_args": { + "title": "Connect Args", + "description": "Connect args to pass to Snowflake SqlAlchemy driver", + "type": "object" + }, + "check_role_grants": { + "title": "Check Role Grants", + "description": "If set to True then checks role grants at the beginning of the ingestion run. To be used for debugging purposes. If you think everything is working fine then set it to False. In some cases this can take long depending on how many roles you might have.", + "default": false, + "type": "boolean" + }, + "options": { + "title": "Options", + "description": "Any options specified here will be passed to SQLAlchemy's create_engine as kwargs. See https://docs.sqlalchemy.org/en/14/core/engines.html#sqlalchemy.create_engine for details.", + "type": "object" + }, + "database_pattern": { + "title": "Database Pattern", + "description": "List of regex patterns for databases to include/exclude in usage ingestion.", + "default": { + "allow": [ + ".*" + ], + "deny": [ + "^UTIL_DB$", + "^SNOWFLAKE$", + "^SNOWFLAKE_SAMPLE_DATA$" + ], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "allOf": [ + { + "$ref": "#/definitions/AllowDenyPattern" + } + ] + }, + "email_domain": { + "title": "Email Domain", + "description": "Email domain of your organisation so users can be displayed on UI appropriately.", + "type": "string" + }, + "schema_pattern": { + "title": "Schema Pattern", + "description": "List of regex patterns for schemas to include/exclude in usage ingestion.", + "default": { + "allow": [ + ".*" + ], + "deny": [], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "allOf": [ + { + "$ref": "#/definitions/AllowDenyPattern" + } + ] + }, + "table_pattern": { + "title": "Table Pattern", + "description": "List of regex patterns for tables to include in ingestion.", + "default": { + "allow": [ + ".*" + ], + "deny": [], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "allOf": [ + { + "$ref": "#/definitions/AllowDenyPattern" + } + ] + }, + "view_pattern": { + "title": "View Pattern", + "description": "List of regex patterns for views to include in ingestion.", + "default": { + "allow": [ + ".*" + ], + "deny": [], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "allOf": [ + { + "$ref": "#/definitions/AllowDenyPattern" + } + ] + }, + "apply_view_usage_to_tables": { + "title": "Apply View Usage To Tables", + "description": "Allow/deny patterns for views in snowflake dataset names.", + "default": false, + "type": "boolean" + } + }, + "additionalProperties": false + }, + "redshift-usage": { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "redshift-usage" + ] + }, + "config": { + "$ref": "#/definitions/redshift-usage_config" + } + }, + "required": [ + "type", + "config" + ] + }, + "redshift-usage_config": { + "title": "RedshiftUsageConfig", + "description": "Base configuration class for stateful ingestion for source configs to inherit from.", + "type": "object", + "properties": { + "env": { + "title": "Env", + "description": "The environment that all assets produced by this connector belong to", + "default": "PROD", + "type": "string" + }, + "bucket_duration": { + "description": "Size of the time window to aggregate usage stats.", + "default": "DAY", + "allOf": [ + { + "$ref": "#/definitions/BucketDuration" + } + ] + }, + "end_time": { + "title": "End Time", + "description": "Latest date of usage to consider. Default: Last full day in UTC (or hour, depending on `bucket_duration`)", + "type": "string", + "format": "date-time" + }, + "start_time": { + "title": "Start Time", + "description": "Earliest date of usage to consider. Default: Last full day in UTC (or hour, depending on `bucket_duration`)", + "type": "string", + "format": "date-time" + }, + "top_n_queries": { + "title": "Top N Queries", + "description": "Number of top queries to save to each table.", + "default": 10, + "exclusiveMinimum": 0, + "type": "integer" + }, + "user_email_pattern": { + "title": "User Email Pattern", + "description": "regex patterns for user emails to filter in usage.", + "default": { + "allow": [ + ".*" + ], + "deny": [], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "allOf": [ + { + "$ref": "#/definitions/AllowDenyPattern" + } + ] + }, + "include_operational_stats": { + "title": "Include Operational Stats", + "description": "Whether to display operational stats.", + "default": true, + "type": "boolean" + }, + "include_read_operational_stats": { + "title": "Include Read Operational Stats", + "description": "Whether to report read operational stats. Experimental.", + "default": false, + "type": "boolean" + }, + "format_sql_queries": { + "title": "Format Sql Queries", + "description": "Whether to format sql queries", + "default": false, + "type": "boolean" + }, + "include_top_n_queries": { + "title": "Include Top N Queries", + "description": "Whether to ingest the top_n_queries.", + "default": true, + "type": "boolean" + }, + "platform_instance_map": { + "title": "Platform Instance Map", + "description": "A holder for platform -> platform_instance mappings to generate correct dataset urns", + "type": "object", + "additionalProperties": { + "type": "string" + } + }, + "platform": { + "title": "Platform", + "description": "The platform that this source connects to", + "type": "string" + }, + "platform_instance": { + "title": "Platform Instance", + "description": "The instance of the platform that all assets produced by this recipe belong to", + "type": "string" + }, + "stateful_ingestion": { + "$ref": "#/definitions/SQLAlchemyStatefulIngestionConfig" + }, + "options": { + "title": "Options", + "description": "Any options specified here will be passed to SQLAlchemy's create_engine as kwargs.See https://docs.sqlalchemy.org/en/14/core/engines.html#sqlalchemy.create_engine for details.", + "default": {}, + "type": "object" + }, + "schema_pattern": { + "title": "Schema Pattern", + "default": { + "allow": [ + ".*" + ], + "deny": [ + "information_schema" + ], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "allOf": [ + { + "$ref": "#/definitions/AllowDenyPattern" + } + ] + }, + "table_pattern": { + "title": "Table Pattern", + "description": "regex patterns for tables to filter in ingestion.", + "default": { + "allow": [ + ".*" + ], + "deny": [], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "allOf": [ + { + "$ref": "#/definitions/AllowDenyPattern" + } + ] + }, + "view_pattern": { + "title": "View Pattern", + "description": "regex patterns for views to filter in ingestion.", + "default": { + "allow": [ + ".*" + ], + "deny": [], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "allOf": [ + { + "$ref": "#/definitions/AllowDenyPattern" + } + ] + }, + "profile_pattern": { + "title": "Profile Pattern", + "description": "regex patterns for profiles to filter in ingestion, allowed by the `table_pattern`.", + "default": { + "allow": [ + ".*" + ], + "deny": [], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "allOf": [ + { + "$ref": "#/definitions/AllowDenyPattern" + } + ] + }, + "domain": { + "title": "Domain", + "description": " regex patterns for tables/schemas to descide domain_key domain key (domain_key can be any string like \"sales\".) There can be multiple domain key specified.", + "default": {}, + "type": "object", + "additionalProperties": { + "$ref": "#/definitions/AllowDenyPattern" + } + }, + "include_views": { + "title": "Include Views", + "description": "Whether views should be ingested.", + "default": true, + "type": "boolean" + }, + "include_tables": { + "title": "Include Tables", + "description": "Whether tables should be ingested.", + "default": true, + "type": "boolean" + }, + "profiling": { + "title": "Profiling", + "default": { + "enabled": false, + "limit": null, + "offset": null, + "report_dropped_profiles": false, + "turn_off_expensive_profiling_metrics": false, + "profile_table_level_only": false, + "include_field_null_count": true, + "include_field_min_value": true, + "include_field_max_value": true, + "include_field_mean_value": true, + "include_field_median_value": true, + "include_field_stddev_value": true, + "include_field_quantiles": false, + "include_field_distinct_value_frequencies": false, + "include_field_histogram": false, + "include_field_sample_values": true, + "allow_deny_patterns": { + "allow": [ + ".*" + ], + "deny": [], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "max_number_of_fields_to_profile": null, + "profile_if_updated_since_days": 1, + "max_workers": 50, + "query_combiner_enabled": true, + "catch_exceptions": true, + "partition_profiling_enabled": true, + "bigquery_temp_table_schema": null, + "partition_datetime": null + }, + "allOf": [ + { + "$ref": "#/definitions/GEProfilingConfig" + } + ] + }, + "username": { + "title": "Username", + "description": "username", + "type": "string" + }, + "password": { + "title": "Password", + "description": "password", + "type": "string", + "writeOnly": true, + "format": "password" + }, + "host_port": { + "title": "Host Port", + "description": "host URL", + "type": "string" + }, + "database": { + "title": "Database", + "description": "database (catalog)", + "type": "string" + }, + "database_alias": { + "title": "Database Alias", + "description": "Alias to apply to database when ingesting.", + "type": "string" + }, + "scheme": { + "title": "Scheme", + "default": "redshift+psycopg2", + "type": "string" + }, + "sqlalchemy_uri": { + "title": "Sqlalchemy Uri", + "description": "URI of database to connect to. See https://docs.sqlalchemy.org/en/14/core/engines.html#database-urls. Takes precedence over other connection parameters.", + "type": "string" + }, + "default_schema": { + "title": "Default Schema", + "description": "The default schema to use if the sql parser fails to parse the schema with `sql_based` lineage collector", + "default": "public", + "type": "string" + }, + "include_table_lineage": { + "title": "Include Table Lineage", + "description": "Whether table lineage should be ingested.", + "default": true, + "type": "boolean" + }, + "include_copy_lineage": { + "title": "Include Copy Lineage", + "description": "Whether lineage should be collected from copy commands", + "default": true, + "type": "boolean" + }, + "capture_lineage_query_parser_failures": { + "title": "Capture Lineage Query Parser Failures", + "description": "Whether to capture lineage query parser errors with dataset properties for debuggings", + "default": false, + "type": "boolean" + }, + "table_lineage_mode": { + "description": "Which table lineage collector mode to use. Available modes are: [stl_scan_based, sql_based, mixed]", + "default": "stl_scan_based", + "allOf": [ + { + "$ref": "#/definitions/LineageMode" + } + ] + }, + "email_domain": { + "title": "Email Domain", + "description": "Email domain of your organisation so users can be displayed on UI appropriately.", + "type": "string" + } + }, + "required": [ + "host_port", + "email_domain" + ], + "additionalProperties": false + }, + "hive": { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "hive" + ] + }, + "config": { + "$ref": "#/definitions/hive_config" + } + }, + "required": [ + "type", + "config" + ] + }, + "hive_config": { + "title": "HiveConfig", + "description": "Base configuration class for stateful ingestion for source configs to inherit from.", + "type": "object", + "properties": { + "env": { + "title": "Env", + "description": "The environment that all assets produced by this connector belong to", + "default": "PROD", + "type": "string" + }, + "platform": { + "title": "Platform", + "description": "The platform that this source connects to", + "type": "string" + }, + "platform_instance": { + "title": "Platform Instance", + "description": "The instance of the platform that all assets produced by this recipe belong to", + "type": "string" + }, + "stateful_ingestion": { + "$ref": "#/definitions/SQLAlchemyStatefulIngestionConfig" + }, + "options": { + "title": "Options", + "default": {}, + "type": "object" + }, + "schema_pattern": { + "title": "Schema Pattern", + "description": "regex patterns for schemas to filter in ingestion.", + "default": { + "allow": [ + ".*" + ], + "deny": [], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "allOf": [ + { + "$ref": "#/definitions/AllowDenyPattern" + } + ] + }, + "table_pattern": { + "title": "Table Pattern", + "description": "regex patterns for tables to filter in ingestion.", + "default": { + "allow": [ + ".*" + ], + "deny": [], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "allOf": [ + { + "$ref": "#/definitions/AllowDenyPattern" + } + ] + }, + "view_pattern": { + "title": "View Pattern", + "description": "regex patterns for views to filter in ingestion.", + "default": { + "allow": [ + ".*" + ], + "deny": [], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "allOf": [ + { + "$ref": "#/definitions/AllowDenyPattern" + } + ] + }, + "profile_pattern": { + "title": "Profile Pattern", + "description": "regex patterns for profiles to filter in ingestion, allowed by the `table_pattern`.", + "default": { + "allow": [ + ".*" + ], + "deny": [], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "allOf": [ + { + "$ref": "#/definitions/AllowDenyPattern" + } + ] + }, + "domain": { + "title": "Domain", + "description": " regex patterns for tables/schemas to descide domain_key domain key (domain_key can be any string like \"sales\".) There can be multiple domain key specified.", + "default": {}, + "type": "object", + "additionalProperties": { + "$ref": "#/definitions/AllowDenyPattern" + } + }, + "include_views": { + "title": "Include Views", + "description": "Hive SQLAlchemy connector returns views as tables. See https://github.com/dropbox/PyHive/blob/b21c507a24ed2f2b0cf15b0b6abb1c43f31d3ee0/pyhive/sqlalchemy_hive.py#L270-L273. Disabling views helps us prevent this duplication.", + "default": false, + "type": "boolean" + }, + "include_tables": { + "title": "Include Tables", + "description": "Whether tables should be ingested.", + "default": true, + "type": "boolean" + }, + "profiling": { + "title": "Profiling", + "default": { + "enabled": false, + "limit": null, + "offset": null, + "report_dropped_profiles": false, + "turn_off_expensive_profiling_metrics": false, + "profile_table_level_only": false, + "include_field_null_count": true, + "include_field_min_value": true, + "include_field_max_value": true, + "include_field_mean_value": true, + "include_field_median_value": true, + "include_field_stddev_value": true, + "include_field_quantiles": false, + "include_field_distinct_value_frequencies": false, + "include_field_histogram": false, + "include_field_sample_values": true, + "allow_deny_patterns": { + "allow": [ + ".*" + ], + "deny": [], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "max_number_of_fields_to_profile": null, + "profile_if_updated_since_days": 1, + "max_workers": 50, + "query_combiner_enabled": true, + "catch_exceptions": true, + "partition_profiling_enabled": true, + "bigquery_temp_table_schema": null, + "partition_datetime": null + }, + "allOf": [ + { + "$ref": "#/definitions/GEProfilingConfig" + } + ] + }, + "username": { + "title": "Username", + "description": "username", + "type": "string" + }, + "password": { + "title": "Password", + "description": "password", + "type": "string", + "writeOnly": true, + "format": "password" + }, + "host_port": { + "title": "Host Port", + "description": "host URL", + "type": "string" + }, + "database": { + "title": "Database", + "description": "database (catalog)", + "type": "string" + }, + "database_alias": { + "title": "Database Alias", + "description": "Alias to apply to database when ingesting.", + "type": "string" + }, + "scheme": { + "title": "Scheme", + "default": "hive", + "type": "string" + }, + "sqlalchemy_uri": { + "title": "Sqlalchemy Uri", + "description": "URI of database to connect to. See https://docs.sqlalchemy.org/en/14/core/engines.html#database-urls. Takes precedence over other connection parameters.", + "type": "string" + } + }, + "required": [ + "host_port" + ], + "additionalProperties": false + }, + "hana": { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "hana" + ] + }, + "config": { + "$ref": "#/definitions/hana_config" + } + }, + "required": [ + "type", + "config" + ] + }, + "hana_config": { + "title": "HanaConfig", + "description": "Base configuration class for stateful ingestion for source configs to inherit from.", + "type": "object", + "properties": { + "env": { + "title": "Env", + "description": "The environment that all assets produced by this connector belong to", + "default": "PROD", + "type": "string" + }, + "platform": { + "title": "Platform", + "description": "The platform that this source connects to", + "type": "string" + }, + "platform_instance": { + "title": "Platform Instance", + "description": "The instance of the platform that all assets produced by this recipe belong to", + "type": "string" + }, + "stateful_ingestion": { + "$ref": "#/definitions/SQLAlchemyStatefulIngestionConfig" + }, + "options": { + "title": "Options", + "default": {}, + "type": "object" + }, + "schema_pattern": { + "title": "Schema Pattern", + "description": "regex patterns for schemas to filter in ingestion.", + "default": { + "allow": [ + ".*" + ], + "deny": [], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "allOf": [ + { + "$ref": "#/definitions/AllowDenyPattern" + } + ] + }, + "table_pattern": { + "title": "Table Pattern", + "description": "regex patterns for tables to filter in ingestion.", + "default": { + "allow": [ + ".*" + ], + "deny": [], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "allOf": [ + { + "$ref": "#/definitions/AllowDenyPattern" + } + ] + }, + "view_pattern": { + "title": "View Pattern", + "description": "regex patterns for views to filter in ingestion.", + "default": { + "allow": [ + ".*" + ], + "deny": [], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "allOf": [ + { + "$ref": "#/definitions/AllowDenyPattern" + } + ] + }, + "profile_pattern": { + "title": "Profile Pattern", + "description": "regex patterns for profiles to filter in ingestion, allowed by the `table_pattern`.", + "default": { + "allow": [ + ".*" + ], + "deny": [], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "allOf": [ + { + "$ref": "#/definitions/AllowDenyPattern" + } + ] + }, + "domain": { + "title": "Domain", + "description": " regex patterns for tables/schemas to descide domain_key domain key (domain_key can be any string like \"sales\".) There can be multiple domain key specified.", + "default": {}, + "type": "object", + "additionalProperties": { + "$ref": "#/definitions/AllowDenyPattern" + } + }, + "include_views": { + "title": "Include Views", + "description": "Whether views should be ingested.", + "default": true, + "type": "boolean" + }, + "include_tables": { + "title": "Include Tables", + "description": "Whether tables should be ingested.", + "default": true, + "type": "boolean" + }, + "profiling": { + "title": "Profiling", + "default": { + "enabled": false, + "limit": null, + "offset": null, + "report_dropped_profiles": false, + "turn_off_expensive_profiling_metrics": false, + "profile_table_level_only": false, + "include_field_null_count": true, + "include_field_min_value": true, + "include_field_max_value": true, + "include_field_mean_value": true, + "include_field_median_value": true, + "include_field_stddev_value": true, + "include_field_quantiles": false, + "include_field_distinct_value_frequencies": false, + "include_field_histogram": false, + "include_field_sample_values": true, + "allow_deny_patterns": { + "allow": [ + ".*" + ], + "deny": [], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "max_number_of_fields_to_profile": null, + "profile_if_updated_since_days": 1, + "max_workers": 50, + "query_combiner_enabled": true, + "catch_exceptions": true, + "partition_profiling_enabled": true, + "bigquery_temp_table_schema": null, + "partition_datetime": null + }, + "allOf": [ + { + "$ref": "#/definitions/GEProfilingConfig" + } + ] + }, + "username": { + "title": "Username", + "description": "username", + "type": "string" + }, + "password": { + "title": "Password", + "description": "password", + "type": "string", + "writeOnly": true, + "format": "password" + }, + "host_port": { + "title": "Host Port", + "default": "localhost:39041", + "type": "string" + }, + "database": { + "title": "Database", + "description": "database (catalog)", + "type": "string" + }, + "database_alias": { + "title": "Database Alias", + "description": "Alias to apply to database when ingesting.", + "type": "string" + }, + "scheme": { + "title": "Scheme", + "default": "hana+hdbcli", + "type": "string" + }, + "sqlalchemy_uri": { + "title": "Sqlalchemy Uri", + "description": "URI of database to connect to. See https://docs.sqlalchemy.org/en/14/core/engines.html#database-urls. Takes precedence over other connection parameters.", + "type": "string" + } + }, + "additionalProperties": false + }, + "sagemaker": { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "sagemaker" + ] + }, + "config": { + "$ref": "#/definitions/sagemaker_config" + } + }, + "required": [ + "type", + "config" + ] + }, + "sagemaker_config": { + "title": "SagemakerSourceConfig", + "description": "Common AWS credentials config.\n\nCurrently used by:\n - Glue source\n - SageMaker source", + "type": "object", + "properties": { + "aws_access_key_id": { + "title": "Aws Access Key Id", + "description": "Autodetected. See https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html", + "type": "string" + }, + "aws_secret_access_key": { + "title": "Aws Secret Access Key", + "description": "Autodetected. See https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html", + "type": "string" + }, + "aws_session_token": { + "title": "Aws Session Token", + "description": "Autodetected. See https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html", + "type": "string" + }, + "aws_role": { + "title": "Aws Role", + "description": "Autodetected. See https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html", + "anyOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "type": "string" + } + } + ] + }, + "aws_profile": { + "title": "Aws Profile", + "description": "Named AWS profile to use, if not set the default will be used", + "type": "string" + }, + "aws_region": { + "title": "Aws Region", + "description": "AWS region code.", + "type": "string" + }, + "aws_endpoint_url": { + "title": "Aws Endpoint Url", + "description": "Autodetected. See https://boto3.amazonaws.com/v1/documentation/api/latest/reference/core/session.html", + "type": "string" + }, + "aws_proxy": { + "title": "Aws Proxy", + "description": "Autodetected. See https://boto3.amazonaws.com/v1/documentation/api/latest/reference/core/session.html", + "type": "object", + "additionalProperties": { + "type": "string" + } + }, + "env": { + "title": "Env", + "description": "The environment that all assets produced by this connector belong to", + "default": "PROD", + "type": "string" + }, + "database_pattern": { + "title": "Database Pattern", + "description": "regex patterns for databases to filter in ingestion.", + "default": { + "allow": [ + ".*" + ], + "deny": [], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "allOf": [ + { + "$ref": "#/definitions/AllowDenyPattern" + } + ] + }, + "table_pattern": { + "title": "Table Pattern", + "description": "regex patterns for tables to filter in ingestion.", + "default": { + "allow": [ + ".*" + ], + "deny": [], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "allOf": [ + { + "$ref": "#/definitions/AllowDenyPattern" + } + ] + }, + "extract_feature_groups": { + "title": "Extract Feature Groups", + "description": "Whether to extract feature groups.", + "default": true, + "type": "boolean" + }, + "extract_models": { + "title": "Extract Models", + "description": "Whether to extract models.", + "default": true, + "type": "boolean" + }, + "extract_jobs": { + "title": "Extract Jobs", + "description": "Whether to extract AutoML jobs.", + "default": true, + "anyOf": [ + { + "type": "object", + "additionalProperties": { + "type": "string" + } + }, + { + "type": "boolean" + } + ] + } + }, + "required": [ + "aws_region" + ], + "additionalProperties": false + }, + "glue": { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "glue" + ] + }, + "config": { + "$ref": "#/definitions/glue_config" + } + }, + "required": [ + "type", + "config" + ] + }, + "glue_config": { + "title": "GlueSourceConfig", + "description": "Common AWS credentials config.\n\nCurrently used by:\n - Glue source\n - SageMaker source", + "type": "object", + "properties": { + "row_count": { + "title": "Row Count", + "description": "The parameter name for row count in glue table.", + "type": "string" + }, + "column_count": { + "title": "Column Count", + "description": "The parameter name for column count in glue table.", + "type": "string" + }, + "unique_count": { + "title": "Unique Count", + "description": "The parameter name for the count of unique value in a column.", + "type": "string" + }, + "unique_proportion": { + "title": "Unique Proportion", + "description": "The parameter name for the proportion of unique values in a column.", + "type": "string" + }, + "null_count": { + "title": "Null Count", + "description": "The parameter name for the count of null values in a column.", + "type": "integer" + }, + "null_proportion": { + "title": "Null Proportion", + "description": "The parameter name for the proportion of null values in a column.", + "type": "string" + }, + "min": { + "title": "Min", + "description": "The parameter name for the min value of a column.", + "type": "string" + }, + "max": { + "title": "Max", + "description": "The parameter name for the max value of a column.", + "type": "string" + }, + "mean": { + "title": "Mean", + "description": "The parameter name for the mean value of a column.", + "type": "string" + }, + "median": { + "title": "Median", + "description": "The parameter name for the median value of a column.", + "type": "string" + }, + "stdev": { + "title": "Stdev", + "description": "The parameter name for the standard deviation of a column.", + "type": "string" + }, + "partition_patterns": { + "title": "Partition Patterns", + "description": "Regex patterns for filtering partitions for profile. The pattern should be a string like: \"{'key':'value'}\".", + "default": { + "allow": [ + ".*" + ], + "deny": [], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "allOf": [ + { + "$ref": "#/definitions/AllowDenyPattern" + } + ] + }, + "platform": { + "title": "Platform", + "description": "The platform that this source connects to", + "type": "string" + }, + "platform_instance": { + "title": "Platform Instance", + "description": "The instance of the platform that all assets produced by this recipe belong to", + "type": "string" + }, + "aws_access_key_id": { + "title": "Aws Access Key Id", + "description": "Autodetected. See https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html", + "type": "string" + }, + "aws_secret_access_key": { + "title": "Aws Secret Access Key", + "description": "Autodetected. See https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html", + "type": "string" + }, + "aws_session_token": { + "title": "Aws Session Token", + "description": "Autodetected. See https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html", + "type": "string" + }, + "aws_role": { + "title": "Aws Role", + "description": "Autodetected. See https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html", + "anyOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "type": "string" + } + } + ] + }, + "aws_profile": { + "title": "Aws Profile", + "description": "Named AWS profile to use, if not set the default will be used", + "type": "string" + }, + "aws_region": { + "title": "Aws Region", + "description": "AWS region code.", + "type": "string" + }, + "aws_endpoint_url": { + "title": "Aws Endpoint Url", + "description": "Autodetected. See https://boto3.amazonaws.com/v1/documentation/api/latest/reference/core/session.html", + "type": "string" + }, + "aws_proxy": { + "title": "Aws Proxy", + "description": "Autodetected. See https://boto3.amazonaws.com/v1/documentation/api/latest/reference/core/session.html", + "type": "object", + "additionalProperties": { + "type": "string" + } + }, + "env": { + "title": "Env", + "description": "The environment that all assets produced by this connector belong to", + "default": "PROD", + "type": "string" + }, + "database_pattern": { + "title": "Database Pattern", + "description": "regex patterns for databases to filter in ingestion.", + "default": { + "allow": [ + ".*" + ], + "deny": [], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "allOf": [ + { + "$ref": "#/definitions/AllowDenyPattern" + } + ] + }, + "table_pattern": { + "title": "Table Pattern", + "description": "regex patterns for tables to filter in ingestion.", + "default": { + "allow": [ + ".*" + ], + "deny": [], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "allOf": [ + { + "$ref": "#/definitions/AllowDenyPattern" + } + ] + }, + "extract_owners": { + "title": "Extract Owners", + "description": "When enabled, extracts ownership from Glue directly and overwrites existing owners. When disabled, ownership is left empty for datasets.", + "default": true, + "type": "boolean" + }, + "extract_transforms": { + "title": "Extract Transforms", + "description": "Whether to extract Glue transform jobs.", + "default": true, + "type": "boolean" + }, + "underlying_platform": { + "title": "Underlying Platform", + "description": "@deprecated(Use `platform`) Override for platform name. Allowed values - `glue`, `athena`", + "type": "string" + }, + "ignore_unsupported_connectors": { + "title": "Ignore Unsupported Connectors", + "description": "Whether to ignore unsupported connectors. If disabled, an error will be raised.", + "default": true, + "type": "boolean" + }, + "emit_s3_lineage": { + "title": "Emit S3 Lineage", + "description": " Whether to emit S3-to-Glue lineage.", + "default": false, + "type": "boolean" + }, + "glue_s3_lineage_direction": { + "title": "Glue S3 Lineage Direction", + "description": "If `upstream`, S3 is upstream to Glue. If `downstream` S3 is downstream to Glue.", + "default": "upstream", + "type": "string" + }, + "domain": { + "title": "Domain", + "description": "regex patterns for tables to filter to assign domain_key. ", + "default": {}, + "type": "object", + "additionalProperties": { + "$ref": "#/definitions/AllowDenyPattern" + } + }, + "catalog_id": { + "title": "Catalog Id", + "description": "The aws account id where the target glue catalog lives. If None, datahub will ingest glue in aws caller's account.", + "type": "string" + }, + "use_s3_bucket_tags": { + "title": "Use S3 Bucket Tags", + "description": "If an S3 Buckets Tags should be created for the Tables ingested by Glue. Please Note that this will not apply tags to any folders ingested, only the files.", + "default": false, + "type": "boolean" + }, + "use_s3_object_tags": { + "title": "Use S3 Object Tags", + "description": "If an S3 Objects Tags should be created for the Tables ingested by Glue.", + "default": false, + "type": "boolean" + }, + "profiling": { + "title": "Profiling", + "description": "Configs to ingest data profiles from glue table", + "allOf": [ + { + "$ref": "#/definitions/GlueProfilingConfig" + } + ] + } + }, + "required": [ + "aws_region" + ], + "additionalProperties": false + }, + "oracle": { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "oracle" + ] + }, + "config": { + "$ref": "#/definitions/oracle_config" + } + }, + "required": [ + "type", + "config" + ] + }, + "oracle_config": { + "title": "OracleConfig", + "description": "Base configuration class for stateful ingestion for source configs to inherit from.", + "type": "object", + "properties": { + "env": { + "title": "Env", + "description": "The environment that all assets produced by this connector belong to", + "default": "PROD", + "type": "string" + }, + "platform": { + "title": "Platform", + "description": "The platform that this source connects to", + "type": "string" + }, + "platform_instance": { + "title": "Platform Instance", + "description": "The instance of the platform that all assets produced by this recipe belong to", + "type": "string" + }, + "stateful_ingestion": { + "$ref": "#/definitions/SQLAlchemyStatefulIngestionConfig" + }, + "options": { + "title": "Options", + "default": {}, + "type": "object" + }, + "schema_pattern": { + "title": "Schema Pattern", + "description": "regex patterns for schemas to filter in ingestion.", + "default": { + "allow": [ + ".*" + ], + "deny": [], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "allOf": [ + { + "$ref": "#/definitions/AllowDenyPattern" + } + ] + }, + "table_pattern": { + "title": "Table Pattern", + "description": "regex patterns for tables to filter in ingestion.", + "default": { + "allow": [ + ".*" + ], + "deny": [], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "allOf": [ + { + "$ref": "#/definitions/AllowDenyPattern" + } + ] + }, + "view_pattern": { + "title": "View Pattern", + "description": "regex patterns for views to filter in ingestion.", + "default": { + "allow": [ + ".*" + ], + "deny": [], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "allOf": [ + { + "$ref": "#/definitions/AllowDenyPattern" + } + ] + }, + "profile_pattern": { + "title": "Profile Pattern", + "description": "regex patterns for profiles to filter in ingestion, allowed by the `table_pattern`.", + "default": { + "allow": [ + ".*" + ], + "deny": [], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "allOf": [ + { + "$ref": "#/definitions/AllowDenyPattern" + } + ] + }, + "domain": { + "title": "Domain", + "description": " regex patterns for tables/schemas to descide domain_key domain key (domain_key can be any string like \"sales\".) There can be multiple domain key specified.", + "default": {}, + "type": "object", + "additionalProperties": { + "$ref": "#/definitions/AllowDenyPattern" + } + }, + "include_views": { + "title": "Include Views", + "description": "Whether views should be ingested.", + "default": true, + "type": "boolean" + }, + "include_tables": { + "title": "Include Tables", + "description": "Whether tables should be ingested.", + "default": true, + "type": "boolean" + }, + "profiling": { + "title": "Profiling", + "default": { + "enabled": false, + "limit": null, + "offset": null, + "report_dropped_profiles": false, + "turn_off_expensive_profiling_metrics": false, + "profile_table_level_only": false, + "include_field_null_count": true, + "include_field_min_value": true, + "include_field_max_value": true, + "include_field_mean_value": true, + "include_field_median_value": true, + "include_field_stddev_value": true, + "include_field_quantiles": false, + "include_field_distinct_value_frequencies": false, + "include_field_histogram": false, + "include_field_sample_values": true, + "allow_deny_patterns": { + "allow": [ + ".*" + ], + "deny": [], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "max_number_of_fields_to_profile": null, + "profile_if_updated_since_days": 1, + "max_workers": 50, + "query_combiner_enabled": true, + "catch_exceptions": true, + "partition_profiling_enabled": true, + "bigquery_temp_table_schema": null, + "partition_datetime": null + }, + "allOf": [ + { + "$ref": "#/definitions/GEProfilingConfig" + } + ] + }, + "username": { + "title": "Username", + "description": "username", + "type": "string" + }, + "password": { + "title": "Password", + "description": "password", + "type": "string", + "writeOnly": true, + "format": "password" + }, + "host_port": { + "title": "Host Port", + "description": "host URL", + "type": "string" + }, + "database": { + "title": "Database", + "description": "If using, omit `service_name`.", + "type": "string" + }, + "database_alias": { + "title": "Database Alias", + "description": "Alias to apply to database when ingesting.", + "type": "string" + }, + "scheme": { + "title": "Scheme", + "description": "Will be set automatically to default value.", + "default": "oracle+cx_oracle", + "type": "string" + }, + "sqlalchemy_uri": { + "title": "Sqlalchemy Uri", + "description": "URI of database to connect to. See https://docs.sqlalchemy.org/en/14/core/engines.html#database-urls. Takes precedence over other connection parameters.", + "type": "string" + }, + "service_name": { + "title": "Service Name", + "description": "Oracle service name. If using, omit `database`.", + "type": "string" + } + }, + "required": [ + "host_port" + ], + "additionalProperties": false + }, + "druid": { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "druid" + ] + }, + "config": { + "$ref": "#/definitions/druid_config" + } + }, + "required": [ + "type", + "config" + ] + }, + "druid_config": { + "title": "DruidConfig", + "description": "Base configuration class for stateful ingestion for source configs to inherit from.", + "type": "object", + "properties": { + "env": { + "title": "Env", + "description": "The environment that all assets produced by this connector belong to", + "default": "PROD", + "type": "string" + }, + "platform": { + "title": "Platform", + "description": "The platform that this source connects to", + "type": "string" + }, + "platform_instance": { + "title": "Platform Instance", + "description": "The instance of the platform that all assets produced by this recipe belong to", + "type": "string" + }, + "stateful_ingestion": { + "$ref": "#/definitions/SQLAlchemyStatefulIngestionConfig" + }, + "options": { + "title": "Options", + "default": {}, + "type": "object" + }, + "schema_pattern": { + "title": "Schema Pattern", + "description": "regex patterns for schemas to filter in ingestion.", + "default": { + "allow": [ + ".*" + ], + "deny": [ + "^(lookup|sys).*" + ], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "allOf": [ + { + "$ref": "#/definitions/AllowDenyPattern" + } + ] + }, + "table_pattern": { + "title": "Table Pattern", + "description": "regex patterns for tables to filter in ingestion.", + "default": { + "allow": [ + ".*" + ], + "deny": [], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "allOf": [ + { + "$ref": "#/definitions/AllowDenyPattern" + } + ] + }, + "view_pattern": { + "title": "View Pattern", + "description": "regex patterns for views to filter in ingestion.", + "default": { + "allow": [ + ".*" + ], + "deny": [], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "allOf": [ + { + "$ref": "#/definitions/AllowDenyPattern" + } + ] + }, + "profile_pattern": { + "title": "Profile Pattern", + "description": "regex patterns for profiles to filter in ingestion, allowed by the `table_pattern`.", + "default": { + "allow": [ + ".*" + ], + "deny": [], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "allOf": [ + { + "$ref": "#/definitions/AllowDenyPattern" + } + ] + }, + "domain": { + "title": "Domain", + "description": " regex patterns for tables/schemas to descide domain_key domain key (domain_key can be any string like \"sales\".) There can be multiple domain key specified.", + "default": {}, + "type": "object", + "additionalProperties": { + "$ref": "#/definitions/AllowDenyPattern" + } + }, + "include_views": { + "title": "Include Views", + "description": "Whether views should be ingested.", + "default": true, + "type": "boolean" + }, + "include_tables": { + "title": "Include Tables", + "description": "Whether tables should be ingested.", + "default": true, + "type": "boolean" + }, + "profiling": { + "title": "Profiling", + "default": { + "enabled": false, + "limit": null, + "offset": null, + "report_dropped_profiles": false, + "turn_off_expensive_profiling_metrics": false, + "profile_table_level_only": false, + "include_field_null_count": true, + "include_field_min_value": true, + "include_field_max_value": true, + "include_field_mean_value": true, + "include_field_median_value": true, + "include_field_stddev_value": true, + "include_field_quantiles": false, + "include_field_distinct_value_frequencies": false, + "include_field_histogram": false, + "include_field_sample_values": true, + "allow_deny_patterns": { + "allow": [ + ".*" + ], + "deny": [], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "max_number_of_fields_to_profile": null, + "profile_if_updated_since_days": 1, + "max_workers": 50, + "query_combiner_enabled": true, + "catch_exceptions": true, + "partition_profiling_enabled": true, + "bigquery_temp_table_schema": null, + "partition_datetime": null + }, + "allOf": [ + { + "$ref": "#/definitions/GEProfilingConfig" + } + ] + }, + "username": { + "title": "Username", + "description": "username", + "type": "string" + }, + "password": { + "title": "Password", + "description": "password", + "type": "string", + "writeOnly": true, + "format": "password" + }, + "host_port": { + "title": "Host Port", + "description": "host URL", + "type": "string" + }, + "database": { + "title": "Database", + "description": "database (catalog)", + "type": "string" + }, + "database_alias": { + "title": "Database Alias", + "description": "Alias to apply to database when ingesting.", + "type": "string" + }, + "scheme": { + "title": "Scheme", + "default": "druid", + "type": "string" + }, + "sqlalchemy_uri": { + "title": "Sqlalchemy Uri", + "description": "URI of database to connect to. See https://docs.sqlalchemy.org/en/14/core/engines.html#database-urls. Takes precedence over other connection parameters.", + "type": "string" + } + }, + "required": [ + "host_port" + ], + "additionalProperties": false + }, + "mode": { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "mode" + ] + }, + "config": { + "$ref": "#/definitions/mode_config" + } + }, + "required": [ + "type", + "config" + ] + }, + "mode_config": { + "title": "ModeConfig", + "description": "Any non-Dataset source that produces lineage to Datasets should inherit this class.\ne.g. Orchestrators, Pipelines, BI Tools etc.", + "type": "object", + "properties": { + "env": { + "title": "Env", + "description": "The environment that all assets produced by this connector belong to", + "default": "PROD", + "type": "string" + }, + "platform_instance_map": { + "title": "Platform Instance Map", + "description": "A holder for platform -> platform_instance mappings to generate correct dataset urns", + "type": "object", + "additionalProperties": { + "type": "string" + } + }, + "connect_uri": { + "title": "Connect Uri", + "description": "Mode host URL.", + "default": "https://app.mode.com", + "type": "string" + }, + "token": { + "title": "Token", + "description": "Mode user token.", + "type": "string" + }, + "password": { + "title": "Password", + "description": "Mode password for authentication.", + "type": "string" + }, + "workspace": { + "title": "Workspace", + "type": "string" + }, + "default_schema": { + "title": "Default Schema", + "description": "Default schema to use when schema is not provided in an SQL query", + "default": "public", + "type": "string" + }, + "owner_username_instead_of_email": { + "title": "Owner Username Instead Of Email", + "description": "Use username for owner URN instead of Email", + "default": true, + "type": "boolean" + }, + "api_options": { + "title": "Api Options", + "description": "Retry/Wait settings for Mode API to avoid \"Too many Requests\" error. See Mode API Options below", + "default": { + "retry_backoff_multiplier": 2, + "max_retry_interval": 10, + "max_attempts": 5 + }, + "allOf": [ + { + "$ref": "#/definitions/ModeAPIConfig" + } + ] + } + }, + "additionalProperties": false + }, + "file": { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "file" + ] + }, + "config": { + "$ref": "#/definitions/file_config" + } + }, + "required": [ + "type", + "config" + ] + }, + "file_config": { + "title": "FileSourceConfig", + "type": "object", + "properties": { + "filename": { + "title": "Filename", + "description": "Path to file to ingest.", + "type": "string" + } + }, + "required": [ + "filename" + ], + "additionalProperties": false + }, + "mssql": { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "mssql" + ] + }, + "config": { + "$ref": "#/definitions/mssql_config" + } + }, + "required": [ + "type", + "config" + ] + }, + "mssql_config": { + "title": "SQLServerConfig", + "description": "Base configuration class for stateful ingestion for source configs to inherit from.", + "type": "object", + "properties": { + "env": { + "title": "Env", + "description": "The environment that all assets produced by this connector belong to", + "default": "PROD", + "type": "string" + }, + "platform": { + "title": "Platform", + "description": "The platform that this source connects to", + "type": "string" + }, + "platform_instance": { + "title": "Platform Instance", + "description": "The instance of the platform that all assets produced by this recipe belong to", + "type": "string" + }, + "stateful_ingestion": { + "$ref": "#/definitions/SQLAlchemyStatefulIngestionConfig" + }, + "options": { + "title": "Options", + "default": {}, + "type": "object" + }, + "schema_pattern": { + "title": "Schema Pattern", + "description": "regex patterns for schemas to filter in ingestion.", + "default": { + "allow": [ + ".*" + ], + "deny": [], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "allOf": [ + { + "$ref": "#/definitions/AllowDenyPattern" + } + ] + }, + "table_pattern": { + "title": "Table Pattern", + "description": "regex patterns for tables to filter in ingestion.", + "default": { + "allow": [ + ".*" + ], + "deny": [], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "allOf": [ + { + "$ref": "#/definitions/AllowDenyPattern" + } + ] + }, + "view_pattern": { + "title": "View Pattern", + "description": "regex patterns for views to filter in ingestion.", + "default": { + "allow": [ + ".*" + ], + "deny": [], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "allOf": [ + { + "$ref": "#/definitions/AllowDenyPattern" + } + ] + }, + "profile_pattern": { + "title": "Profile Pattern", + "description": "regex patterns for profiles to filter in ingestion, allowed by the `table_pattern`.", + "default": { + "allow": [ + ".*" + ], + "deny": [], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "allOf": [ + { + "$ref": "#/definitions/AllowDenyPattern" + } + ] + }, + "domain": { + "title": "Domain", + "description": " regex patterns for tables/schemas to descide domain_key domain key (domain_key can be any string like \"sales\".) There can be multiple domain key specified.", + "default": {}, + "type": "object", + "additionalProperties": { + "$ref": "#/definitions/AllowDenyPattern" + } + }, + "include_views": { + "title": "Include Views", + "description": "Whether views should be ingested.", + "default": true, + "type": "boolean" + }, + "include_tables": { + "title": "Include Tables", + "description": "Whether tables should be ingested.", + "default": true, + "type": "boolean" + }, + "profiling": { + "title": "Profiling", + "default": { + "enabled": false, + "limit": null, + "offset": null, + "report_dropped_profiles": false, + "turn_off_expensive_profiling_metrics": false, + "profile_table_level_only": false, + "include_field_null_count": true, + "include_field_min_value": true, + "include_field_max_value": true, + "include_field_mean_value": true, + "include_field_median_value": true, + "include_field_stddev_value": true, + "include_field_quantiles": false, + "include_field_distinct_value_frequencies": false, + "include_field_histogram": false, + "include_field_sample_values": true, + "allow_deny_patterns": { + "allow": [ + ".*" + ], + "deny": [], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "max_number_of_fields_to_profile": null, + "profile_if_updated_since_days": 1, + "max_workers": 50, + "query_combiner_enabled": true, + "catch_exceptions": true, + "partition_profiling_enabled": true, + "bigquery_temp_table_schema": null, + "partition_datetime": null + }, + "allOf": [ + { + "$ref": "#/definitions/GEProfilingConfig" + } + ] + }, + "username": { + "title": "Username", + "description": "username", + "type": "string" + }, + "password": { + "title": "Password", + "description": "password", + "type": "string", + "writeOnly": true, + "format": "password" + }, + "host_port": { + "title": "Host Port", + "description": "MSSQL host URL.", + "default": "localhost:1433", + "type": "string" + }, + "database": { + "title": "Database", + "description": "database (catalog)", + "type": "string" + }, + "database_alias": { + "title": "Database Alias", + "description": "Alias to apply to database when ingesting.", + "type": "string" + }, + "scheme": { + "title": "Scheme", + "default": "mssql+pytds", + "type": "string" + }, + "sqlalchemy_uri": { + "title": "Sqlalchemy Uri", + "description": "URI of database to connect to. See https://docs.sqlalchemy.org/en/14/core/engines.html#database-urls. Takes precedence over other connection parameters.", + "type": "string" + }, + "use_odbc": { + "title": "Use Odbc", + "description": "See https://docs.sqlalchemy.org/en/14/dialects/mssql.html#module-sqlalchemy.dialects.mssql.pyodbc.", + "default": false, + "type": "boolean" + }, + "uri_args": { + "title": "Uri Args", + "description": "Arguments to URL-encode when connecting. See https://docs.microsoft.com/en-us/sql/connect/odbc/dsn-connection-string-attribute?view=sql-server-ver15.", + "default": {}, + "type": "object", + "additionalProperties": { + "type": "string" + } + } + }, + "additionalProperties": false + }, + "data-lake": { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "data-lake" + ] + }, + "config": { + "$ref": "#/definitions/data-lake_config" + } + }, + "required": [ + "type", + "config" + ] + }, + "data-lake_config": { + "title": "DataLakeSourceConfig", + "description": "Any source that produces dataset urns in a single environment should inherit this class", + "type": "object", + "properties": { + "env": { + "title": "Env", + "description": "The environment that all assets produced by this connector belong to", + "default": "PROD", + "type": "string" + }, + "base_path": { + "title": "Base Path", + "description": "Path of the base folder to crawl. Unless `schema_patterns` and `profile_patterns` are set, the connector will ingest all files in this folder.", + "type": "string" + }, + "platform": { + "title": "Platform", + "description": "Autodetected. Platform to use in namespace when constructing URNs. If left blank, local paths will correspond to `file` and S3 paths will correspond to `s3`.", + "default": "", + "type": "string" + }, + "use_relative_path": { + "title": "Use Relative Path", + "description": "Whether to use the relative path when constructing URNs. Has no effect when a `path_spec` is provided.", + "default": false, + "type": "boolean" + }, + "ignore_dotfiles": { + "title": "Ignore Dotfiles", + "description": "Whether to ignore files that start with `.`. For instance, `.DS_Store`, `.bash_profile`, etc.", + "default": true, + "type": "boolean" + }, + "aws_config": { + "title": "Aws Config", + "description": "AWS details", + "allOf": [ + { + "$ref": "#/definitions/AwsSourceConfig" + } + ] + }, + "schema_patterns": { + "title": "Schema Patterns", + "description": "regex patterns for tables to filter for ingestion.", + "default": { + "allow": [ + ".*" + ], + "deny": [], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "allOf": [ + { + "$ref": "#/definitions/AllowDenyPattern" + } + ] + }, + "profile_patterns": { + "title": "Profile Patterns", + "description": "regex patterns for tables to profile ", + "default": { + "allow": [ + ".*" + ], + "deny": [], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "allOf": [ + { + "$ref": "#/definitions/AllowDenyPattern" + } + ] + }, + "path_spec": { + "title": "Path Spec", + "description": "Format string for constructing table identifiers from the relative path. See the above setup section for details.", + "type": "string" + }, + "profiling": { + "title": "Profiling", + "description": "Profiling configurations", + "default": { + "enabled": false, + "spark_cluster_manager": null, + "profile_table_level_only": false, + "allow_deny_patterns": { + "allow": [ + ".*" + ], + "deny": [], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "max_number_of_fields_to_profile": null, + "include_field_null_count": true, + "include_field_min_value": true, + "include_field_max_value": true, + "include_field_mean_value": true, + "include_field_median_value": true, + "include_field_stddev_value": true, + "include_field_quantiles": true, + "include_field_distinct_value_frequencies": true, + "include_field_histogram": true, + "include_field_sample_values": true + }, + "allOf": [ + { + "$ref": "#/definitions/DataLakeProfilerConfig" + } + ] + }, + "spark_driver_memory": { + "title": "Spark Driver Memory", + "description": "Max amount of memory to grant Spark.", + "default": "4g", + "type": "string" + }, + "max_rows": { + "title": "Max Rows", + "description": "Maximum number of rows to use when inferring schemas for TSV and CSV files.", + "default": 100, + "type": "integer" + } + }, + "required": [ + "base_path" + ], + "additionalProperties": false + }, + "presto-on-hive": { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "presto-on-hive" + ] + }, + "config": { + "$ref": "#/definitions/presto-on-hive_config" + } + }, + "required": [ + "type", + "config" + ] + }, + "presto-on-hive_config": { + "title": "PrestoOnHiveConfig", + "description": "Base configuration class for stateful ingestion for source configs to inherit from.", + "type": "object", + "properties": { + "env": { + "title": "Env", + "description": "The environment that all assets produced by this connector belong to", + "default": "PROD", + "type": "string" + }, + "platform": { + "title": "Platform", + "description": "The platform that this source connects to", + "type": "string" + }, + "platform_instance": { + "title": "Platform Instance", + "description": "The instance of the platform that all assets produced by this recipe belong to", + "type": "string" + }, + "stateful_ingestion": { + "$ref": "#/definitions/SQLAlchemyStatefulIngestionConfig" + }, + "options": { + "title": "Options", + "default": {}, + "type": "object" + }, + "schema_pattern": { + "title": "Schema Pattern", + "description": "regex patterns for schemas to filter in ingestion.", + "default": { + "allow": [ + ".*" + ], + "deny": [], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "allOf": [ + { + "$ref": "#/definitions/AllowDenyPattern" + } + ] + }, + "table_pattern": { + "title": "Table Pattern", + "description": "regex patterns for tables to filter in ingestion.", + "default": { + "allow": [ + ".*" + ], + "deny": [], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "allOf": [ + { + "$ref": "#/definitions/AllowDenyPattern" + } + ] + }, + "view_pattern": { + "title": "View Pattern", + "description": "regex patterns for views to filter in ingestion.", + "default": { + "allow": [ + ".*" + ], + "deny": [], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "allOf": [ + { + "$ref": "#/definitions/AllowDenyPattern" + } + ] + }, + "profile_pattern": { + "title": "Profile Pattern", + "description": "regex patterns for profiles to filter in ingestion, allowed by the `table_pattern`.", + "default": { + "allow": [ + ".*" + ], + "deny": [], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "allOf": [ + { + "$ref": "#/definitions/AllowDenyPattern" + } + ] + }, + "domain": { + "title": "Domain", + "description": " regex patterns for tables/schemas to descide domain_key domain key (domain_key can be any string like \"sales\".) There can be multiple domain key specified.", + "default": {}, + "type": "object", + "additionalProperties": { + "$ref": "#/definitions/AllowDenyPattern" + } + }, + "include_views": { + "title": "Include Views", + "description": "Whether views should be ingested.", + "default": true, + "type": "boolean" + }, + "include_tables": { + "title": "Include Tables", + "description": "Whether tables should be ingested.", + "default": true, + "type": "boolean" + }, + "profiling": { + "title": "Profiling", + "default": { + "enabled": false, + "limit": null, + "offset": null, + "report_dropped_profiles": false, + "turn_off_expensive_profiling_metrics": false, + "profile_table_level_only": false, + "include_field_null_count": true, + "include_field_min_value": true, + "include_field_max_value": true, + "include_field_mean_value": true, + "include_field_median_value": true, + "include_field_stddev_value": true, + "include_field_quantiles": false, + "include_field_distinct_value_frequencies": false, + "include_field_histogram": false, + "include_field_sample_values": true, + "allow_deny_patterns": { + "allow": [ + ".*" + ], + "deny": [], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "max_number_of_fields_to_profile": null, + "profile_if_updated_since_days": 1, + "max_workers": 50, + "query_combiner_enabled": true, + "catch_exceptions": true, + "partition_profiling_enabled": true, + "bigquery_temp_table_schema": null, + "partition_datetime": null + }, + "allOf": [ + { + "$ref": "#/definitions/GEProfilingConfig" + } + ] + }, + "username": { + "title": "Username", + "description": "username", + "type": "string" + }, + "password": { + "title": "Password", + "description": "password", + "type": "string", + "writeOnly": true, + "format": "password" + }, + "host_port": { + "title": "Host Port", + "description": "Host URL and port to connect to. Example: localhost:3306", + "default": "localhost:3306", + "type": "string" + }, + "database": { + "title": "Database", + "description": "database (catalog)", + "type": "string" + }, + "database_alias": { + "title": "Database Alias", + "description": "Alias to apply to database when ingesting.", + "type": "string" + }, + "scheme": { + "title": "Scheme", + "default": "mysql+pymysql", + "type": "string" + }, + "sqlalchemy_uri": { + "title": "Sqlalchemy Uri", + "description": "URI of database to connect to. See https://docs.sqlalchemy.org/en/14/core/engines.html#database-urls. Takes precedence over other connection parameters.", + "type": "string" + }, + "views_where_clause_suffix": { + "title": "Views Where Clause Suffix", + "description": "Where clause to specify what Presto views should be ingested.", + "default": "", + "type": "string" + }, + "tables_where_clause_suffix": { + "title": "Tables Where Clause Suffix", + "description": "Where clause to specify what Hive tables should be ingested.", + "default": "", + "type": "string" + }, + "schemas_where_clause_suffix": { + "title": "Schemas Where Clause Suffix", + "description": "Where clause to specify what Hive schemas should be ingested.", + "default": "", + "type": "string" + } + }, + "additionalProperties": false + }, + "vertica": { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "vertica" + ] + }, + "config": { + "$ref": "#/definitions/vertica_config" + } + }, + "required": [ + "type", + "config" + ] + }, + "vertica_config": { + "title": "VerticaConfig", + "description": "Base configuration class for stateful ingestion for source configs to inherit from.", + "type": "object", + "properties": { + "env": { + "title": "Env", + "description": "The environment that all assets produced by this connector belong to", + "default": "PROD", + "type": "string" + }, + "platform": { + "title": "Platform", + "description": "The platform that this source connects to", + "type": "string" + }, + "platform_instance": { + "title": "Platform Instance", + "description": "The instance of the platform that all assets produced by this recipe belong to", + "type": "string" + }, + "stateful_ingestion": { + "$ref": "#/definitions/SQLAlchemyStatefulIngestionConfig" + }, + "options": { + "title": "Options", + "default": {}, + "type": "object" + }, + "schema_pattern": { + "title": "Schema Pattern", + "description": "regex patterns for schemas to filter in ingestion.", + "default": { + "allow": [ + ".*" + ], + "deny": [], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "allOf": [ + { + "$ref": "#/definitions/AllowDenyPattern" + } + ] + }, + "table_pattern": { + "title": "Table Pattern", + "description": "regex patterns for tables to filter in ingestion.", + "default": { + "allow": [ + ".*" + ], + "deny": [], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "allOf": [ + { + "$ref": "#/definitions/AllowDenyPattern" + } + ] + }, + "view_pattern": { + "title": "View Pattern", + "description": "regex patterns for views to filter in ingestion.", + "default": { + "allow": [ + ".*" + ], + "deny": [], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "allOf": [ + { + "$ref": "#/definitions/AllowDenyPattern" + } + ] + }, + "profile_pattern": { + "title": "Profile Pattern", + "description": "regex patterns for profiles to filter in ingestion, allowed by the `table_pattern`.", + "default": { + "allow": [ + ".*" + ], + "deny": [], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "allOf": [ + { + "$ref": "#/definitions/AllowDenyPattern" + } + ] + }, + "domain": { + "title": "Domain", + "description": " regex patterns for tables/schemas to descide domain_key domain key (domain_key can be any string like \"sales\".) There can be multiple domain key specified.", + "default": {}, + "type": "object", + "additionalProperties": { + "$ref": "#/definitions/AllowDenyPattern" + } + }, + "include_views": { + "title": "Include Views", + "description": "Whether views should be ingested.", + "default": true, + "type": "boolean" + }, + "include_tables": { + "title": "Include Tables", + "description": "Whether tables should be ingested.", + "default": true, + "type": "boolean" + }, + "profiling": { + "title": "Profiling", + "default": { + "enabled": false, + "limit": null, + "offset": null, + "report_dropped_profiles": false, + "turn_off_expensive_profiling_metrics": false, + "profile_table_level_only": false, + "include_field_null_count": true, + "include_field_min_value": true, + "include_field_max_value": true, + "include_field_mean_value": true, + "include_field_median_value": true, + "include_field_stddev_value": true, + "include_field_quantiles": false, + "include_field_distinct_value_frequencies": false, + "include_field_histogram": false, + "include_field_sample_values": true, + "allow_deny_patterns": { + "allow": [ + ".*" + ], + "deny": [], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "max_number_of_fields_to_profile": null, + "profile_if_updated_since_days": 1, + "max_workers": 50, + "query_combiner_enabled": true, + "catch_exceptions": true, + "partition_profiling_enabled": true, + "bigquery_temp_table_schema": null, + "partition_datetime": null + }, + "allOf": [ + { + "$ref": "#/definitions/GEProfilingConfig" + } + ] + }, + "username": { + "title": "Username", + "description": "username", + "type": "string" + }, + "password": { + "title": "Password", + "description": "password", + "type": "string", + "writeOnly": true, + "format": "password" + }, + "host_port": { + "title": "Host Port", + "description": "host URL", + "type": "string" + }, + "database": { + "title": "Database", + "description": "database (catalog)", + "type": "string" + }, + "database_alias": { + "title": "Database Alias", + "description": "Alias to apply to database when ingesting.", + "type": "string" + }, + "scheme": { + "title": "Scheme", + "default": "vertica+vertica_python", + "type": "string" + }, + "sqlalchemy_uri": { + "title": "Sqlalchemy Uri", + "description": "URI of database to connect to. See https://docs.sqlalchemy.org/en/14/core/engines.html#database-urls. Takes precedence over other connection parameters.", + "type": "string" + } + }, + "required": [ + "host_port" + ], + "additionalProperties": false + }, + "csv-enricher": { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "csv-enricher" + ] + }, + "config": { + "$ref": "#/definitions/csv-enricher_config" + } + }, + "required": [ + "type", + "config" + ] + }, + "csv-enricher_config": { + "title": "CSVEnricherConfig", + "type": "object", + "properties": { + "filename": { + "title": "Filename", + "description": "Path to ingestion CSV file", + "type": "string" + }, + "should_overwrite": { + "title": "Should Overwrite", + "description": "Whether the ingestion should overwrite. Otherwise, we will append data.", + "default": false, + "type": "boolean" + }, + "delimiter": { + "title": "Delimiter", + "description": "Delimiter to use when parsing CSV", + "default": ",", + "type": "string" + }, + "array_delimiter": { + "title": "Array Delimiter", + "description": "Delimiter to use when parsing array fields (tags, terms, owners)", + "default": "|", + "type": "string" + } + }, + "required": [ + "filename" + ], + "additionalProperties": false + }, + "mariadb": { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "mariadb" + ] + }, + "config": { + "$ref": "#/definitions/mariadb_config" + } + }, + "required": [ + "type", + "config" + ] + }, + "mariadb_config": { + "title": "MySQLConfig", + "description": "Base configuration class for stateful ingestion for source configs to inherit from.", + "type": "object", + "properties": { + "env": { + "title": "Env", + "description": "The environment that all assets produced by this connector belong to", + "default": "PROD", + "type": "string" + }, + "platform": { + "title": "Platform", + "description": "The platform that this source connects to", + "type": "string" + }, + "platform_instance": { + "title": "Platform Instance", + "description": "The instance of the platform that all assets produced by this recipe belong to", + "type": "string" + }, + "stateful_ingestion": { + "$ref": "#/definitions/SQLAlchemyStatefulIngestionConfig" + }, + "options": { + "title": "Options", + "default": {}, + "type": "object" + }, + "schema_pattern": { + "title": "Schema Pattern", + "description": "regex patterns for schemas to filter in ingestion.", + "default": { + "allow": [ + ".*" + ], + "deny": [], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "allOf": [ + { + "$ref": "#/definitions/AllowDenyPattern" + } + ] + }, + "table_pattern": { + "title": "Table Pattern", + "description": "regex patterns for tables to filter in ingestion.", + "default": { + "allow": [ + ".*" + ], + "deny": [], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "allOf": [ + { + "$ref": "#/definitions/AllowDenyPattern" + } + ] + }, + "view_pattern": { + "title": "View Pattern", + "description": "regex patterns for views to filter in ingestion.", + "default": { + "allow": [ + ".*" + ], + "deny": [], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "allOf": [ + { + "$ref": "#/definitions/AllowDenyPattern" + } + ] + }, + "profile_pattern": { + "title": "Profile Pattern", + "description": "regex patterns for profiles to filter in ingestion, allowed by the `table_pattern`.", + "default": { + "allow": [ + ".*" + ], + "deny": [], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "allOf": [ + { + "$ref": "#/definitions/AllowDenyPattern" + } + ] + }, + "domain": { + "title": "Domain", + "description": " regex patterns for tables/schemas to descide domain_key domain key (domain_key can be any string like \"sales\".) There can be multiple domain key specified.", + "default": {}, + "type": "object", + "additionalProperties": { + "$ref": "#/definitions/AllowDenyPattern" + } + }, + "include_views": { + "title": "Include Views", + "description": "Whether views should be ingested.", + "default": true, + "type": "boolean" + }, + "include_tables": { + "title": "Include Tables", + "description": "Whether tables should be ingested.", + "default": true, + "type": "boolean" + }, + "profiling": { + "title": "Profiling", + "default": { + "enabled": false, + "limit": null, + "offset": null, + "report_dropped_profiles": false, + "turn_off_expensive_profiling_metrics": false, + "profile_table_level_only": false, + "include_field_null_count": true, + "include_field_min_value": true, + "include_field_max_value": true, + "include_field_mean_value": true, + "include_field_median_value": true, + "include_field_stddev_value": true, + "include_field_quantiles": false, + "include_field_distinct_value_frequencies": false, + "include_field_histogram": false, + "include_field_sample_values": true, + "allow_deny_patterns": { + "allow": [ + ".*" + ], + "deny": [], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "max_number_of_fields_to_profile": null, + "profile_if_updated_since_days": 1, + "max_workers": 50, + "query_combiner_enabled": true, + "catch_exceptions": true, + "partition_profiling_enabled": true, + "bigquery_temp_table_schema": null, + "partition_datetime": null + }, + "allOf": [ + { + "$ref": "#/definitions/GEProfilingConfig" + } + ] + }, + "username": { + "title": "Username", + "description": "username", + "type": "string" + }, + "password": { + "title": "Password", + "description": "password", + "type": "string", + "writeOnly": true, + "format": "password" + }, + "host_port": { + "title": "Host Port", + "description": "MySQL host URL.", + "default": "localhost:3306", + "type": "string" + }, + "database": { + "title": "Database", + "description": "database (catalog)", + "type": "string" + }, + "database_alias": { + "title": "Database Alias", + "description": "Alias to apply to database when ingesting.", + "type": "string" + }, + "scheme": { + "title": "Scheme", + "default": "mysql+pymysql", + "type": "string" + }, + "sqlalchemy_uri": { + "title": "Sqlalchemy Uri", + "description": "URI of database to connect to. See https://docs.sqlalchemy.org/en/14/core/engines.html#database-urls. Takes precedence over other connection parameters.", + "type": "string" + } + }, + "additionalProperties": false + }, + "feast": { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "feast" + ] + }, + "config": { + "$ref": "#/definitions/feast_config" + } + }, + "required": [ + "type", + "config" + ] + }, + "feast_config": { + "title": "FeastRepositorySourceConfig", + "type": "object", + "properties": { + "path": { + "title": "Path", + "description": "Path to Feast repository", + "type": "string" + }, + "environment": { + "title": "Environment", + "description": "Environment to use when constructing URNs", + "default": "PROD", + "type": "string" + } + }, + "required": [ + "path" + ], + "additionalProperties": false + }, + "ldap": { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "ldap" + ] + }, + "config": { + "$ref": "#/definitions/ldap_config" + } + }, + "required": [ + "type", + "config" + ] + }, + "ldap_config": { + "title": "LDAPSourceConfig", + "description": "Config used by the LDAP Source.", + "type": "object", + "properties": { + "ldap_server": { + "title": "Ldap Server", + "description": "LDAP server URL.", + "type": "string" + }, + "ldap_user": { + "title": "Ldap User", + "description": "LDAP user.", + "type": "string" + }, + "ldap_password": { + "title": "Ldap Password", + "description": "LDAP password.", + "type": "string" + }, + "base_dn": { + "title": "Base Dn", + "description": "LDAP DN.", + "type": "string" + }, + "filter": { + "title": "Filter", + "description": "LDAP extractor filter.", + "default": "(objectClass=*)", + "type": "string" + }, + "drop_missing_first_last_name": { + "title": "Drop Missing First Last Name", + "description": "If set to true, any users without first and last names will be dropped.", + "default": true, + "type": "boolean" + }, + "page_size": { + "title": "Page Size", + "description": "Size of each page to fetch when extracting metadata.", + "default": 20, + "type": "integer" + }, + "user_attrs_map": { + "title": "User Attrs Map", + "default": {}, + "type": "object" + }, + "group_attrs_map": { + "title": "Group Attrs Map", + "default": {}, + "type": "object" + } + }, + "required": [ + "ldap_server", + "ldap_user", + "ldap_password", + "base_dn" + ], + "additionalProperties": false + }, + "elasticsearch": { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "elasticsearch" + ] + }, + "config": { + "$ref": "#/definitions/elasticsearch_config" + } + }, + "required": [ + "type", + "config" + ] + }, + "elasticsearch_config": { + "title": "ElasticsearchSourceConfig", + "description": "Any source that is a primary producer of Dataset metadata should inherit this class", + "type": "object", + "properties": { + "env": { + "title": "Env", + "description": "The environment that all assets produced by this connector belong to", + "default": "PROD", + "type": "string" + }, + "platform": { + "title": "Platform", + "description": "The platform that this source connects to", + "type": "string" + }, + "platform_instance": { + "title": "Platform Instance", + "description": "The instance of the platform that all assets produced by this recipe belong to", + "type": "string" + }, + "host": { + "title": "Host", + "description": "The elastic search host URI.", + "default": "localhost:9200", + "type": "string" + }, + "username": { + "title": "Username", + "description": "The username credential.", + "type": "string" + }, + "password": { + "title": "Password", + "description": "The password credential.", + "type": "string" + }, + "use_ssl": { + "title": "Use Ssl", + "description": "Whether to use SSL for the connection or not.", + "default": false, + "type": "boolean" + }, + "verify_certs": { + "title": "Verify Certs", + "description": "Whether to verify SSL certificates.", + "default": false, + "type": "boolean" + }, + "ca_certs": { + "title": "Ca Certs", + "description": "Path to a certificate authority (CA) certificate.", + "type": "string" + }, + "client_cert": { + "title": "Client Cert", + "description": "Path to the file containing the private key and the certificate, or cert only if using client_key.", + "type": "string" + }, + "client_key": { + "title": "Client Key", + "description": "Path to the file containing the private key if using separate cert and key files.", + "type": "string" + }, + "ssl_assert_hostname": { + "title": "Ssl Assert Hostname", + "description": "Use hostname verification if not False.", + "default": false, + "type": "boolean" + }, + "ssl_assert_fingerprint": { + "title": "Ssl Assert Fingerprint", + "description": "Verify the supplied certificate fingerprint if not None.", + "type": "string" + }, + "url_prefix": { + "title": "Url Prefix", + "description": "There are cases where an enterprise would have multiple elastic search clusters. One way for them to manage is to have a single endpoint for all the elastic search clusters and use url_prefix for routing requests to different clusters.", + "default": "", + "type": "string" + }, + "index_pattern": { + "title": "Index Pattern", + "description": "regex patterns for indexes to filter in ingestion.", + "default": { + "allow": [ + ".*" + ], + "deny": [ + "^_.*", + "^ilm-history.*" + ], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "allOf": [ + { + "$ref": "#/definitions/AllowDenyPattern" + } + ] + } + }, + "additionalProperties": false + }, + "superset": { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "superset" + ] + }, + "config": { + "$ref": "#/definitions/superset_config" + } + }, + "required": [ + "type", + "config" + ] + }, + "superset_config": { + "title": "SupersetConfig", + "type": "object", + "properties": { + "connect_uri": { + "title": "Connect Uri", + "description": "Superset host URL.", + "default": "localhost:8088", + "type": "string" + }, + "username": { + "title": "Username", + "description": "Superset username.", + "type": "string" + }, + "password": { + "title": "Password", + "description": "Superset password.", + "type": "string" + }, + "provider": { + "title": "Provider", + "description": "Superset provider.", + "default": "db", + "type": "string" + }, + "options": { + "title": "Options", + "default": {}, + "type": "object" + }, + "env": { + "title": "Env", + "description": "Environment to use in namespace when constructing URNs", + "default": "PROD", + "type": "string" + }, + "database_alias": { + "title": "Database Alias", + "description": "Can be used to change mapping for database names in superset to what you have in datahub", + "default": {}, + "type": "object", + "additionalProperties": { + "type": "string" + } + } + }, + "additionalProperties": false + }, + "datahub-lineage-file": { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "datahub-lineage-file" + ] + }, + "config": { + "$ref": "#/definitions/datahub-lineage-file_config" + } + }, + "required": [ + "type", + "config" + ] + }, + "datahub-lineage-file_config": { + "title": "LineageFileSourceConfig", + "type": "object", + "properties": { + "file": { + "title": "File", + "description": "Path to lineage file to ingest.", + "type": "string" + }, + "preserve_upstream": { + "title": "Preserve Upstream", + "description": "Whether we want to query datahub-gms for upstream data. False means it will hard replace upstream data for a given entity. True means it will query the backend for existing upstreams and include it in the ingestion run", + "default": true, + "type": "boolean" + } + }, + "required": [ + "file" + ], + "additionalProperties": false + }, + "s3": { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "s3" + ] + }, + "config": { + "$ref": "#/definitions/s3_config" + } + }, + "required": [ + "type", + "config" + ] + }, + "s3_config": { + "title": "DataLakeSourceConfig", + "description": "Any source that connects to a platform should inherit this class", + "type": "object", + "properties": { + "env": { + "title": "Env", + "description": "The environment that all assets produced by this connector belong to", + "default": "PROD", + "type": "string" + }, + "platform": { + "title": "Platform", + "description": "The platform that this source connects to", + "default": "", + "type": "string" + }, + "platform_instance": { + "title": "Platform Instance", + "description": "The instance of the platform that all assets produced by this recipe belong to", + "type": "string" + }, + "path_specs": { + "title": "Path Specs", + "description": "List of PathSpec. See below the details about PathSpec", + "type": "array", + "items": { + "$ref": "#/definitions/PathSpec" + } + }, + "path_spec": { + "title": "Path Spec", + "description": "Path spec will be deprecated in favour of path_specs option.", + "allOf": [ + { + "$ref": "#/definitions/PathSpec" + } + ] + }, + "aws_config": { + "title": "Aws Config", + "description": "AWS configuration", + "allOf": [ + { + "$ref": "#/definitions/AwsSourceConfig" + } + ] + }, + "use_s3_bucket_tags": { + "title": "Use S3 Bucket Tags", + "description": "Whether or not to create tags in datahub from the s3 bucket", + "type": "boolean" + }, + "use_s3_object_tags": { + "title": "Use S3 Object Tags", + "description": "# Whether or not to create tags in datahub from the s3 object", + "type": "boolean" + }, + "profile_patterns": { + "title": "Profile Patterns", + "description": "regex patterns for tables to profile ", + "default": { + "allow": [ + ".*" + ], + "deny": [], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "allOf": [ + { + "$ref": "#/definitions/AllowDenyPattern" + } + ] + }, + "profiling": { + "title": "Profiling", + "description": "Data profiling configuration", + "default": { + "enabled": false, + "profile_table_level_only": false, + "allow_deny_patterns": { + "allow": [ + ".*" + ], + "deny": [], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "max_number_of_fields_to_profile": null, + "include_field_null_count": true, + "include_field_min_value": true, + "include_field_max_value": true, + "include_field_mean_value": true, + "include_field_median_value": true, + "include_field_stddev_value": true, + "include_field_quantiles": true, + "include_field_distinct_value_frequencies": true, + "include_field_histogram": true, + "include_field_sample_values": true + }, + "allOf": [ + { + "$ref": "#/definitions/DataLakeProfilerConfig" + } + ] + }, + "spark_driver_memory": { + "title": "Spark Driver Memory", + "description": "Max amount of memory to grant Spark.", + "default": "4g", + "type": "string" + }, + "max_rows": { + "title": "Max Rows", + "description": "Maximum number of rows to use when inferring schemas for TSV and CSV files.", + "default": 100, + "type": "integer" + } + }, + "additionalProperties": false + }, + "athena": { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "athena" + ] + }, + "config": { + "$ref": "#/definitions/athena_config" + } + }, + "required": [ + "type", + "config" + ] + }, + "athena_config": { + "title": "AthenaConfig", + "description": "Base configuration class for stateful ingestion for source configs to inherit from.", + "type": "object", + "properties": { + "env": { + "title": "Env", + "description": "The environment that all assets produced by this connector belong to", + "default": "PROD", + "type": "string" + }, + "platform": { + "title": "Platform", + "description": "The platform that this source connects to", + "type": "string" + }, + "platform_instance": { + "title": "Platform Instance", + "description": "The instance of the platform that all assets produced by this recipe belong to", + "type": "string" + }, + "stateful_ingestion": { + "$ref": "#/definitions/SQLAlchemyStatefulIngestionConfig" + }, + "options": { + "title": "Options", + "default": {}, + "type": "object" + }, + "schema_pattern": { + "title": "Schema Pattern", + "description": "regex patterns for schemas to filter in ingestion.", + "default": { + "allow": [ + ".*" + ], + "deny": [], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "allOf": [ + { + "$ref": "#/definitions/AllowDenyPattern" + } + ] + }, + "table_pattern": { + "title": "Table Pattern", + "description": "regex patterns for tables to filter in ingestion.", + "default": { + "allow": [ + ".*" + ], + "deny": [], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "allOf": [ + { + "$ref": "#/definitions/AllowDenyPattern" + } + ] + }, + "view_pattern": { + "title": "View Pattern", + "description": "regex patterns for views to filter in ingestion.", + "default": { + "allow": [ + ".*" + ], + "deny": [], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "allOf": [ + { + "$ref": "#/definitions/AllowDenyPattern" + } + ] + }, + "profile_pattern": { + "title": "Profile Pattern", + "description": "regex patterns for profiles to filter in ingestion, allowed by the `table_pattern`.", + "default": { + "allow": [ + ".*" + ], + "deny": [], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "allOf": [ + { + "$ref": "#/definitions/AllowDenyPattern" + } + ] + }, + "domain": { + "title": "Domain", + "description": " regex patterns for tables/schemas to descide domain_key domain key (domain_key can be any string like \"sales\".) There can be multiple domain key specified.", + "default": {}, + "type": "object", + "additionalProperties": { + "$ref": "#/definitions/AllowDenyPattern" + } + }, + "include_views": { + "title": "Include Views", + "default": false, + "type": "boolean" + }, + "include_tables": { + "title": "Include Tables", + "description": "Whether tables should be ingested.", + "default": true, + "type": "boolean" + }, + "profiling": { + "title": "Profiling", + "default": { + "enabled": false, + "limit": null, + "offset": null, + "report_dropped_profiles": false, + "turn_off_expensive_profiling_metrics": false, + "profile_table_level_only": false, + "include_field_null_count": true, + "include_field_min_value": true, + "include_field_max_value": true, + "include_field_mean_value": true, + "include_field_median_value": true, + "include_field_stddev_value": true, + "include_field_quantiles": false, + "include_field_distinct_value_frequencies": false, + "include_field_histogram": false, + "include_field_sample_values": true, + "allow_deny_patterns": { + "allow": [ + ".*" + ], + "deny": [], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "max_number_of_fields_to_profile": null, + "profile_if_updated_since_days": 1, + "max_workers": 50, + "query_combiner_enabled": true, + "catch_exceptions": true, + "partition_profiling_enabled": true, + "bigquery_temp_table_schema": null, + "partition_datetime": null + }, + "allOf": [ + { + "$ref": "#/definitions/GEProfilingConfig" + } + ] + }, + "scheme": { + "title": "Scheme", + "default": "awsathena+rest", + "type": "string" + }, + "username": { + "title": "Username", + "description": "Username credential. If not specified, detected with boto3 rules. See https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html", + "type": "string" + }, + "password": { + "title": "Password", + "description": "Same detection scheme as username", + "type": "string" + }, + "database": { + "title": "Database", + "description": "The athena database to ingest from. If not set it will be autodetected", + "type": "string" + }, + "aws_region": { + "title": "Aws Region", + "description": "Aws region where your Athena database is located", + "type": "string" + }, + "s3_staging_dir": { + "title": "S3 Staging Dir", + "description": "Staging s3 location where the Athena query results will be stored", + "type": "string" + }, + "work_group": { + "title": "Work Group", + "description": "The name of your Amazon Athena Workgroups", + "type": "string" + } + }, + "required": [ + "aws_region", + "s3_staging_dir", + "work_group" + ], + "additionalProperties": false + }, + "redshift": { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "redshift" + ] + }, + "config": { + "$ref": "#/definitions/redshift_config" + } + }, + "required": [ + "type", + "config" + ] + }, + "redshift_config": { + "title": "RedshiftConfig", + "description": "Base configuration class for stateful ingestion for source configs to inherit from.", + "type": "object", + "properties": { + "env": { + "title": "Env", + "description": "The environment that all assets produced by this connector belong to", + "default": "PROD", + "type": "string" + }, + "platform_instance_map": { + "title": "Platform Instance Map", + "description": "A holder for platform -> platform_instance mappings to generate correct dataset urns", + "type": "object", + "additionalProperties": { + "type": "string" + } + }, + "bucket_duration": { + "description": "Size of the time window to aggregate usage stats.", + "default": "DAY", + "allOf": [ + { + "$ref": "#/definitions/BucketDuration" + } + ] + }, + "end_time": { + "title": "End Time", + "description": "Latest date of usage to consider. Default: Last full day in UTC (or hour, depending on `bucket_duration`)", + "type": "string", + "format": "date-time" + }, + "start_time": { + "title": "Start Time", + "description": "Earliest date of usage to consider. Default: Last full day in UTC (or hour, depending on `bucket_duration`)", + "type": "string", + "format": "date-time" + }, + "platform": { + "title": "Platform", + "description": "The platform that this source connects to", + "type": "string" + }, + "platform_instance": { + "title": "Platform Instance", + "description": "The instance of the platform that all assets produced by this recipe belong to", + "type": "string" + }, + "stateful_ingestion": { + "$ref": "#/definitions/SQLAlchemyStatefulIngestionConfig" + }, + "options": { + "title": "Options", + "default": {}, + "type": "object" + }, + "schema_pattern": { + "title": "Schema Pattern", + "default": { + "allow": [ + ".*" + ], + "deny": [ + "information_schema" + ], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "allOf": [ + { + "$ref": "#/definitions/AllowDenyPattern" + } + ] + }, + "table_pattern": { + "title": "Table Pattern", + "description": "regex patterns for tables to filter in ingestion.", + "default": { + "allow": [ + ".*" + ], + "deny": [], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "allOf": [ + { + "$ref": "#/definitions/AllowDenyPattern" + } + ] + }, + "view_pattern": { + "title": "View Pattern", + "description": "regex patterns for views to filter in ingestion.", + "default": { + "allow": [ + ".*" + ], + "deny": [], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "allOf": [ + { + "$ref": "#/definitions/AllowDenyPattern" + } + ] + }, + "profile_pattern": { + "title": "Profile Pattern", + "description": "regex patterns for profiles to filter in ingestion, allowed by the `table_pattern`.", + "default": { + "allow": [ + ".*" + ], + "deny": [], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "allOf": [ + { + "$ref": "#/definitions/AllowDenyPattern" + } + ] + }, + "domain": { + "title": "Domain", + "description": " regex patterns for tables/schemas to descide domain_key domain key (domain_key can be any string like \"sales\".) There can be multiple domain key specified.", + "default": {}, + "type": "object", + "additionalProperties": { + "$ref": "#/definitions/AllowDenyPattern" + } + }, + "include_views": { + "title": "Include Views", + "description": "Whether views should be ingested.", + "default": true, + "type": "boolean" + }, + "include_tables": { + "title": "Include Tables", + "description": "Whether tables should be ingested.", + "default": true, + "type": "boolean" + }, + "profiling": { + "title": "Profiling", + "default": { + "enabled": false, + "limit": null, + "offset": null, + "report_dropped_profiles": false, + "turn_off_expensive_profiling_metrics": false, + "profile_table_level_only": false, + "include_field_null_count": true, + "include_field_min_value": true, + "include_field_max_value": true, + "include_field_mean_value": true, + "include_field_median_value": true, + "include_field_stddev_value": true, + "include_field_quantiles": false, + "include_field_distinct_value_frequencies": false, + "include_field_histogram": false, + "include_field_sample_values": true, + "allow_deny_patterns": { + "allow": [ + ".*" + ], + "deny": [], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "max_number_of_fields_to_profile": null, + "profile_if_updated_since_days": 1, + "max_workers": 50, + "query_combiner_enabled": true, + "catch_exceptions": true, + "partition_profiling_enabled": true, + "bigquery_temp_table_schema": null, + "partition_datetime": null + }, + "allOf": [ + { + "$ref": "#/definitions/GEProfilingConfig" + } + ] + }, + "username": { + "title": "Username", + "description": "username", + "type": "string" + }, + "password": { + "title": "Password", + "description": "password", + "type": "string", + "writeOnly": true, + "format": "password" + }, + "host_port": { + "title": "Host Port", + "description": "host URL", + "type": "string" + }, + "database": { + "title": "Database", + "description": "database (catalog)", + "type": "string" + }, + "database_alias": { + "title": "Database Alias", + "description": "Alias to apply to database when ingesting.", + "type": "string" + }, + "scheme": { + "title": "Scheme", + "default": "redshift+psycopg2", + "type": "string" + }, + "sqlalchemy_uri": { + "title": "Sqlalchemy Uri", + "description": "URI of database to connect to. See https://docs.sqlalchemy.org/en/14/core/engines.html#database-urls. Takes precedence over other connection parameters.", + "type": "string" + }, + "default_schema": { + "title": "Default Schema", + "description": "The default schema to use if the sql parser fails to parse the schema with `sql_based` lineage collector", + "default": "public", + "type": "string" + }, + "include_table_lineage": { + "title": "Include Table Lineage", + "description": "Whether table lineage should be ingested.", + "default": true, + "type": "boolean" + }, + "include_copy_lineage": { + "title": "Include Copy Lineage", + "description": "Whether lineage should be collected from copy commands", + "default": true, + "type": "boolean" + }, + "capture_lineage_query_parser_failures": { + "title": "Capture Lineage Query Parser Failures", + "description": "Whether to capture lineage query parser errors with dataset properties for debuggings", + "default": false, + "type": "boolean" + }, + "table_lineage_mode": { + "description": "Which table lineage collector mode to use. Available modes are: [stl_scan_based, sql_based, mixed]", + "default": "stl_scan_based", + "allOf": [ + { + "$ref": "#/definitions/LineageMode" + } + ] + } + }, + "required": [ + "host_port" + ], + "additionalProperties": false + }, + "openapi": { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "openapi" + ] + }, + "config": { + "$ref": "#/definitions/openapi_config" + } + }, + "required": [ + "type", + "config" + ] + }, + "openapi_config": { + "title": "OpenApiConfig", + "type": "object", + "properties": { + "name": { + "title": "Name", + "type": "string" + }, + "url": { + "title": "Url", + "type": "string" + }, + "swagger_file": { + "title": "Swagger File", + "type": "string" + }, + "ignore_endpoints": { + "title": "Ignore Endpoints", + "default": [], + "type": "array", + "items": {} + }, + "username": { + "title": "Username", + "default": "", + "type": "string" + }, + "password": { + "title": "Password", + "default": "", + "type": "string" + }, + "forced_examples": { + "title": "Forced Examples", + "default": {}, + "type": "object" + }, + "token": { + "title": "Token", + "type": "string" + }, + "get_token": { + "title": "Get Token", + "default": {}, + "type": "object" + } + }, + "required": [ + "name", + "url", + "swagger_file" + ], + "additionalProperties": false + }, + "metabase": { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "metabase" + ] + }, + "config": { + "$ref": "#/definitions/metabase_config" + } + }, + "required": [ + "type", + "config" + ] + }, + "metabase_config": { + "title": "MetabaseConfig", + "description": "Any non-Dataset source that produces lineage to Datasets should inherit this class.\ne.g. Orchestrators, Pipelines, BI Tools etc.", + "type": "object", + "properties": { + "env": { + "title": "Env", + "description": "The environment that all assets produced by this connector belong to", + "default": "PROD", + "type": "string" + }, + "platform_instance_map": { + "title": "Platform Instance Map", + "description": "A holder for platform -> platform_instance mappings to generate correct dataset urns", + "type": "object", + "additionalProperties": { + "type": "string" + } + }, + "connect_uri": { + "title": "Connect Uri", + "description": "Metabase host URL.", + "default": "localhost:3000", + "type": "string" + }, + "username": { + "title": "Username", + "description": "Metabase username.", + "type": "string" + }, + "password": { + "title": "Password", + "description": "Metabase password.", + "type": "string" + }, + "database_alias_map": { + "title": "Database Alias Map", + "description": "Database name map to use when constructing dataset URN.", + "type": "object" + }, + "engine_platform_map": { + "title": "Engine Platform Map", + "description": "Custom mappings between metabase database engines and DataHub platforms", + "type": "object", + "additionalProperties": { + "type": "string" + } + }, + "default_schema": { + "title": "Default Schema", + "description": "Default schema name to use when schema is not provided in an SQL query", + "default": "public", + "type": "string" + } + }, + "additionalProperties": false + }, + "datahub-business-glossary": { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "datahub-business-glossary" + ] + }, + "config": { + "$ref": "#/definitions/datahub-business-glossary_config" + } + }, + "required": [ + "type", + "config" + ] + }, + "datahub-business-glossary_config": { + "title": "BusinessGlossarySourceConfig", + "type": "object", + "properties": { + "file": { + "title": "File", + "description": "Path to business glossary file to ingest.", + "type": "string" + } + }, + "required": [ + "file" + ], + "additionalProperties": false + }, + "clickhouse-usage": { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "clickhouse-usage" + ] + }, + "config": { + "$ref": "#/definitions/clickhouse-usage_config" + } + }, + "required": [ + "type", + "config" + ] + }, + "clickhouse-usage_config": { + "title": "ClickHouseUsageConfig", + "description": "Base configuration class for stateful ingestion for source configs to inherit from.", + "type": "object", + "properties": { + "env": { + "title": "Env", + "description": "The environment that all assets produced by this connector belong to", + "default": "PROD", + "type": "string" + }, + "bucket_duration": { + "description": "Size of the time window to aggregate usage stats.", + "default": "DAY", + "allOf": [ + { + "$ref": "#/definitions/BucketDuration" + } + ] + }, + "end_time": { + "title": "End Time", + "description": "Latest date of usage to consider. Default: Last full day in UTC (or hour, depending on `bucket_duration`)", + "type": "string", + "format": "date-time" + }, + "start_time": { + "title": "Start Time", + "description": "Earliest date of usage to consider. Default: Last full day in UTC (or hour, depending on `bucket_duration`)", + "type": "string", + "format": "date-time" + }, + "top_n_queries": { + "title": "Top N Queries", + "description": "Number of top queries to save to each table.", + "default": 10, + "exclusiveMinimum": 0, + "type": "integer" + }, + "user_email_pattern": { + "title": "User Email Pattern", + "description": "regex patterns for user emails to filter in usage.", + "default": { + "allow": [ + ".*" + ], + "deny": [], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "allOf": [ + { + "$ref": "#/definitions/AllowDenyPattern" + } + ] + }, + "include_operational_stats": { + "title": "Include Operational Stats", + "description": "Whether to display operational stats.", + "default": true, + "type": "boolean" + }, + "include_read_operational_stats": { + "title": "Include Read Operational Stats", + "description": "Whether to report read operational stats. Experimental.", + "default": false, + "type": "boolean" + }, + "format_sql_queries": { + "title": "Format Sql Queries", + "description": "Whether to format sql queries", + "default": false, + "type": "boolean" + }, + "include_top_n_queries": { + "title": "Include Top N Queries", + "description": "Whether to ingest the top_n_queries.", + "default": true, + "type": "boolean" + }, + "platform_instance_map": { + "title": "Platform Instance Map", + "description": "A holder for platform -> platform_instance mappings to generate correct dataset urns", + "type": "object", + "additionalProperties": { + "type": "string" + } + }, + "platform": { + "title": "Platform", + "description": "The platform that this source connects to", + "type": "string" + }, + "platform_instance": { + "title": "Platform Instance", + "description": "The instance of the platform that all assets produced by this recipe belong to", + "type": "string" + }, + "stateful_ingestion": { + "$ref": "#/definitions/SQLAlchemyStatefulIngestionConfig" + }, + "options": { + "title": "Options", + "default": {}, + "type": "object" + }, + "schema_pattern": { + "title": "Schema Pattern", + "description": "regex patterns for schemas to filter in ingestion.", + "default": { + "allow": [ + ".*" + ], + "deny": [], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "allOf": [ + { + "$ref": "#/definitions/AllowDenyPattern" + } + ] + }, + "table_pattern": { + "title": "Table Pattern", + "description": "regex patterns for tables to filter in ingestion.", + "default": { + "allow": [ + ".*" + ], + "deny": [], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "allOf": [ + { + "$ref": "#/definitions/AllowDenyPattern" + } + ] + }, + "view_pattern": { + "title": "View Pattern", + "description": "regex patterns for views to filter in ingestion.", + "default": { + "allow": [ + ".*" + ], + "deny": [], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "allOf": [ + { + "$ref": "#/definitions/AllowDenyPattern" + } + ] + }, + "profile_pattern": { + "title": "Profile Pattern", + "description": "regex patterns for profiles to filter in ingestion, allowed by the `table_pattern`.", + "default": { + "allow": [ + ".*" + ], + "deny": [], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "allOf": [ + { + "$ref": "#/definitions/AllowDenyPattern" + } + ] + }, + "domain": { + "title": "Domain", + "description": " regex patterns for tables/schemas to descide domain_key domain key (domain_key can be any string like \"sales\".) There can be multiple domain key specified.", + "default": {}, + "type": "object", + "additionalProperties": { + "$ref": "#/definitions/AllowDenyPattern" + } + }, + "include_views": { + "title": "Include Views", + "description": "Whether views should be ingested.", + "default": true, + "type": "boolean" + }, + "include_tables": { + "title": "Include Tables", + "description": "Whether tables should be ingested.", + "default": true, + "type": "boolean" + }, + "profiling": { + "title": "Profiling", + "default": { + "enabled": false, + "limit": null, + "offset": null, + "report_dropped_profiles": false, + "turn_off_expensive_profiling_metrics": false, + "profile_table_level_only": false, + "include_field_null_count": true, + "include_field_min_value": true, + "include_field_max_value": true, + "include_field_mean_value": true, + "include_field_median_value": true, + "include_field_stddev_value": true, + "include_field_quantiles": false, + "include_field_distinct_value_frequencies": false, + "include_field_histogram": false, + "include_field_sample_values": true, + "allow_deny_patterns": { + "allow": [ + ".*" + ], + "deny": [], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "max_number_of_fields_to_profile": null, + "profile_if_updated_since_days": 1, + "max_workers": 50, + "query_combiner_enabled": true, + "catch_exceptions": true, + "partition_profiling_enabled": true, + "bigquery_temp_table_schema": null, + "partition_datetime": null + }, + "allOf": [ + { + "$ref": "#/definitions/GEProfilingConfig" + } + ] + }, + "username": { + "title": "Username", + "description": "username", + "type": "string" + }, + "password": { + "title": "Password", + "description": "password", + "type": "string", + "writeOnly": true, + "format": "password" + }, + "host_port": { + "title": "Host Port", + "description": "ClickHouse host URL.", + "default": "localhost:8123", + "type": "string" + }, + "database": { + "title": "Database", + "description": "database (catalog)", + "type": "string" + }, + "database_alias": { + "title": "Database Alias", + "description": "Alias to apply to database when ingesting.", + "type": "string" + }, + "scheme": { + "title": "Scheme", + "default": "clickhouse", + "type": "string" + }, + "sqlalchemy_uri": { + "title": "Sqlalchemy Uri", + "description": "URI of database to connect to. See https://docs.sqlalchemy.org/en/14/core/engines.html#database-urls. Takes precedence over other connection parameters.", + "type": "string" + }, + "secure": { + "title": "Secure", + "type": "boolean" + }, + "protocol": { + "title": "Protocol", + "type": "string" + }, + "include_table_lineage": { + "title": "Include Table Lineage", + "description": "Whether table lineage should be ingested.", + "default": true, + "type": "boolean" + }, + "include_materialized_views": { + "title": "Include Materialized Views", + "default": true, + "type": "boolean" + }, + "email_domain": { + "title": "Email Domain", + "type": "string" + }, + "query_log_table": { + "title": "Query Log Table", + "default": "system.query_log", + "type": "string" + } + }, + "required": [ + "email_domain" + ], + "additionalProperties": false + }, + "mongodb": { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "mongodb" + ] + }, + "config": { + "$ref": "#/definitions/mongodb_config" + } + }, + "required": [ + "type", + "config" + ] + }, + "mongodb_config": { + "title": "MongoDBConfig", + "description": "Any source that produces dataset urns in a single environment should inherit this class", + "type": "object", + "properties": { + "env": { + "title": "Env", + "description": "The environment that all assets produced by this connector belong to", + "default": "PROD", + "type": "string" + }, + "connect_uri": { + "title": "Connect Uri", + "description": "MongoDB connection URI.", + "default": "mongodb://localhost", + "type": "string" + }, + "username": { + "title": "Username", + "description": "MongoDB username.", + "type": "string" + }, + "password": { + "title": "Password", + "description": "MongoDB password.", + "type": "string" + }, + "authMechanism": { + "title": "Authmechanism", + "description": "MongoDB authentication mechanism.", + "type": "string" + }, + "options": { + "title": "Options", + "description": "Additional options to pass to `pymongo.MongoClient()`.", + "default": {}, + "type": "object" + }, + "enableSchemaInference": { + "title": "Enableschemainference", + "description": "Whether to infer schemas. ", + "default": true, + "type": "boolean" + }, + "schemaSamplingSize": { + "title": "Schemasamplingsize", + "description": "Number of documents to use when inferring schema size. If set to `0`, all documents will be scanned.", + "default": 1000, + "exclusiveMinimum": 0, + "type": "integer" + }, + "useRandomSampling": { + "title": "Userandomsampling", + "description": "If documents for schema inference should be randomly selected. If `False`, documents will be selected from start.", + "default": true, + "type": "boolean" + }, + "maxSchemaSize": { + "title": "Maxschemasize", + "description": "Maximum number of fields to include in the schema.", + "default": 300, + "exclusiveMinimum": 0, + "type": "integer" + }, + "maxDocumentSize": { + "title": "Maxdocumentsize", + "default": 16793600, + "exclusiveMinimum": 0, + "type": "integer" + }, + "database_pattern": { + "title": "Database Pattern", + "description": "regex patterns for databases to filter in ingestion.", + "default": { + "allow": [ + ".*" + ], + "deny": [], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "allOf": [ + { + "$ref": "#/definitions/AllowDenyPattern" + } + ] + }, + "collection_pattern": { + "title": "Collection Pattern", + "description": "regex patterns for collections to filter in ingestion.", + "default": { + "allow": [ + ".*" + ], + "deny": [], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "allOf": [ + { + "$ref": "#/definitions/AllowDenyPattern" + } + ] + } + }, + "additionalProperties": false + }, + "nifi": { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "nifi" + ] + }, + "config": { + "$ref": "#/definitions/nifi_config" + } + }, + "required": [ + "type", + "config" + ] + }, + "nifi_config": { + "title": "NifiSourceConfig", + "description": "Any source that produces dataset urns in a single environment should inherit this class", + "type": "object", + "properties": { + "env": { + "title": "Env", + "description": "The environment that all assets produced by this connector belong to", + "default": "PROD", + "type": "string" + }, + "site_url": { + "title": "Site Url", + "description": "URI to connect", + "type": "string" + }, + "auth": { + "description": "Nifi authentication. must be one of : NO_AUTH, SINGLE_USER, CLIENT_CERT", + "default": "NO_AUTH", + "allOf": [ + { + "$ref": "#/definitions/NifiAuthType" + } + ] + }, + "provenance_days": { + "title": "Provenance Days", + "description": "time window to analyze provenance events for external datasets", + "default": 7, + "type": "integer" + }, + "process_group_pattern": { + "title": "Process Group Pattern", + "description": "regex patterns for filtering process groups", + "default": { + "allow": [ + ".*" + ], + "deny": [], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "allOf": [ + { + "$ref": "#/definitions/AllowDenyPattern" + } + ] + }, + "site_name": { + "title": "Site Name", + "description": "Site name to identify this site with, useful when using input and output ports receiving remote connections", + "default": "default", + "type": "string" + }, + "site_url_to_site_name": { + "title": "Site Url To Site Name", + "description": "Lookup to find site_name for site_url, required if using remote process groups in nifi flow", + "default": {}, + "type": "object", + "additionalProperties": { + "type": "string" + } + }, + "username": { + "title": "Username", + "description": "Nifi username, must be set for auth = \"SINGLE_USER\"", + "type": "string" + }, + "password": { + "title": "Password", + "description": "Nifi password, must be set for auth = \"SINGLE_USER\"", + "type": "string" + }, + "client_cert_file": { + "title": "Client Cert File", + "description": "Path to PEM file containing the public certificates for the user/client identity, must be set for auth = \"CLIENT_CERT\"", + "type": "string" + }, + "client_key_file": { + "title": "Client Key File", + "description": "Path to PEM file containing the client\u2019s secret key", + "type": "string" + }, + "client_key_password": { + "title": "Client Key Password", + "description": "The password to decrypt the client_key_file", + "type": "string" + }, + "ca_file": { + "title": "Ca File", + "description": "Path to PEM file containing certs for the root CA(s) for the NiFi", + "type": "string" + } + }, + "required": [ + "site_url" + ], + "additionalProperties": false + }, + "clickhouse": { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "clickhouse" + ] + }, + "config": { + "$ref": "#/definitions/clickhouse_config" + } + }, + "required": [ + "type", + "config" + ] + }, + "clickhouse_config": { + "title": "ClickHouseConfig", + "description": "Base configuration class for stateful ingestion for source configs to inherit from.", + "type": "object", + "properties": { + "env": { + "title": "Env", + "description": "The environment that all assets produced by this connector belong to", + "default": "PROD", + "type": "string" + }, + "platform_instance_map": { + "title": "Platform Instance Map", + "description": "A holder for platform -> platform_instance mappings to generate correct dataset urns", + "type": "object", + "additionalProperties": { + "type": "string" + } + }, + "bucket_duration": { + "description": "Size of the time window to aggregate usage stats.", + "default": "DAY", + "allOf": [ + { + "$ref": "#/definitions/BucketDuration" + } + ] + }, + "end_time": { + "title": "End Time", + "description": "Latest date of usage to consider. Default: Last full day in UTC (or hour, depending on `bucket_duration`)", + "type": "string", + "format": "date-time" + }, + "start_time": { + "title": "Start Time", + "description": "Earliest date of usage to consider. Default: Last full day in UTC (or hour, depending on `bucket_duration`)", + "type": "string", + "format": "date-time" + }, + "platform": { + "title": "Platform", + "description": "The platform that this source connects to", + "type": "string" + }, + "platform_instance": { + "title": "Platform Instance", + "description": "The instance of the platform that all assets produced by this recipe belong to", + "type": "string" + }, + "stateful_ingestion": { + "$ref": "#/definitions/SQLAlchemyStatefulIngestionConfig" + }, + "options": { + "title": "Options", + "default": {}, + "type": "object" + }, + "schema_pattern": { + "title": "Schema Pattern", + "description": "regex patterns for schemas to filter in ingestion.", + "default": { + "allow": [ + ".*" + ], + "deny": [], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "allOf": [ + { + "$ref": "#/definitions/AllowDenyPattern" + } + ] + }, + "table_pattern": { + "title": "Table Pattern", + "description": "regex patterns for tables to filter in ingestion.", + "default": { + "allow": [ + ".*" + ], + "deny": [], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "allOf": [ + { + "$ref": "#/definitions/AllowDenyPattern" + } + ] + }, + "view_pattern": { + "title": "View Pattern", + "description": "regex patterns for views to filter in ingestion.", + "default": { + "allow": [ + ".*" + ], + "deny": [], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "allOf": [ + { + "$ref": "#/definitions/AllowDenyPattern" + } + ] + }, + "profile_pattern": { + "title": "Profile Pattern", + "description": "regex patterns for profiles to filter in ingestion, allowed by the `table_pattern`.", + "default": { + "allow": [ + ".*" + ], + "deny": [], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "allOf": [ + { + "$ref": "#/definitions/AllowDenyPattern" + } + ] + }, + "domain": { + "title": "Domain", + "description": " regex patterns for tables/schemas to descide domain_key domain key (domain_key can be any string like \"sales\".) There can be multiple domain key specified.", + "default": {}, + "type": "object", + "additionalProperties": { + "$ref": "#/definitions/AllowDenyPattern" + } + }, + "include_views": { + "title": "Include Views", + "description": "Whether views should be ingested.", + "default": true, + "type": "boolean" + }, + "include_tables": { + "title": "Include Tables", + "description": "Whether tables should be ingested.", + "default": true, + "type": "boolean" + }, + "profiling": { + "title": "Profiling", + "default": { + "enabled": false, + "limit": null, + "offset": null, + "report_dropped_profiles": false, + "turn_off_expensive_profiling_metrics": false, + "profile_table_level_only": false, + "include_field_null_count": true, + "include_field_min_value": true, + "include_field_max_value": true, + "include_field_mean_value": true, + "include_field_median_value": true, + "include_field_stddev_value": true, + "include_field_quantiles": false, + "include_field_distinct_value_frequencies": false, + "include_field_histogram": false, + "include_field_sample_values": true, + "allow_deny_patterns": { + "allow": [ + ".*" + ], + "deny": [], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "max_number_of_fields_to_profile": null, + "profile_if_updated_since_days": 1, + "max_workers": 50, + "query_combiner_enabled": true, + "catch_exceptions": true, + "partition_profiling_enabled": true, + "bigquery_temp_table_schema": null, + "partition_datetime": null + }, + "allOf": [ + { + "$ref": "#/definitions/GEProfilingConfig" + } + ] + }, + "username": { + "title": "Username", + "description": "username", + "type": "string" + }, + "password": { + "title": "Password", + "description": "password", + "type": "string", + "writeOnly": true, + "format": "password" + }, + "host_port": { + "title": "Host Port", + "description": "ClickHouse host URL.", + "default": "localhost:8123", + "type": "string" + }, + "database": { + "title": "Database", + "description": "database (catalog)", + "type": "string" + }, + "database_alias": { + "title": "Database Alias", + "description": "Alias to apply to database when ingesting.", + "type": "string" + }, + "scheme": { + "title": "Scheme", + "default": "clickhouse", + "type": "string" + }, + "sqlalchemy_uri": { + "title": "Sqlalchemy Uri", + "description": "URI of database to connect to. See https://docs.sqlalchemy.org/en/14/core/engines.html#database-urls. Takes precedence over other connection parameters.", + "type": "string" + }, + "secure": { + "title": "Secure", + "type": "boolean" + }, + "protocol": { + "title": "Protocol", + "type": "string" + }, + "include_table_lineage": { + "title": "Include Table Lineage", + "description": "Whether table lineage should be ingested.", + "default": true, + "type": "boolean" + }, + "include_materialized_views": { + "title": "Include Materialized Views", + "default": true, + "type": "boolean" + } + }, + "additionalProperties": false + }, + "dbt": { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "dbt" + ] + }, + "config": { + "$ref": "#/definitions/dbt_config" + } + }, + "required": [ + "type", + "config" + ] + }, + "dbt_config": { + "title": "DBTConfig", + "description": "Base configuration class for stateful ingestion for source configs to inherit from.", + "type": "object", + "properties": { + "env": { + "title": "Env", + "description": "Environment to use in namespace when constructing URNs.", + "default": "PROD", + "type": "string" + }, + "platform": { + "title": "Platform", + "description": "The platform that this source connects to", + "type": "string" + }, + "platform_instance": { + "title": "Platform Instance", + "description": "The instance of the platform that all assets produced by this recipe belong to", + "type": "string" + }, + "stateful_ingestion": { + "$ref": "#/definitions/DBTStatefulIngestionConfig" + }, + "manifest_path": { + "title": "Manifest Path", + "description": "Path to dbt manifest JSON. See https://docs.getdbt.com/reference/artifacts/manifest-json Note this can be a local file or a URI.", + "type": "string" + }, + "catalog_path": { + "title": "Catalog Path", + "description": "Path to dbt catalog JSON. See https://docs.getdbt.com/reference/artifacts/catalog-json Note this can be a local file or a URI.", + "type": "string" + }, + "sources_path": { + "title": "Sources Path", + "description": "Path to dbt sources JSON. See https://docs.getdbt.com/reference/artifacts/sources-json. If not specified, last-modified fields will not be populated. Note this can be a local file or a URI.", + "type": "string" + }, + "test_results_path": { + "title": "Test Results Path", + "description": "Path to output of dbt test run as run_results file in JSON format. See https://docs.getdbt.com/reference/artifacts/run-results-json. If not specified, test execution results will not be populated in DataHub.", + "type": "string" + }, + "target_platform": { + "title": "Target Platform", + "description": "The platform that dbt is loading onto. (e.g. bigquery / redshift / postgres etc.)", + "type": "string" + }, + "target_platform_instance": { + "title": "Target Platform Instance", + "description": "The platform instance for the platform that dbt is operating on. Use this if you have multiple instances of the same platform (e.g. redshift) and need to distinguish between them.", + "type": "string" + }, + "load_schemas": { + "title": "Load Schemas", + "description": "This flag is only consulted when disable_dbt_node_creation is set to True. Load schemas for target_platform entities from dbt catalog file, not necessary when you are already ingesting this metadata from the data platform directly. If set to False, table schema details (e.g. columns) will not be ingested.", + "default": true, + "type": "boolean" + }, + "use_identifiers": { + "title": "Use Identifiers", + "description": "Use model identifier instead of model name if defined (if not, default to model name).", + "default": false, + "type": "boolean" + }, + "node_type_pattern": { + "title": "Node Type Pattern", + "description": "regex patterns for dbt nodes to filter in ingestion.", + "default": { + "allow": [ + ".*" + ], + "deny": [], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "allOf": [ + { + "$ref": "#/definitions/AllowDenyPattern" + } + ] + }, + "tag_prefix": { + "title": "Tag Prefix", + "description": "Prefix added to tags during ingestion.", + "default": "dbt:", + "type": "string" + }, + "node_name_pattern": { + "title": "Node Name Pattern", + "description": "regex patterns for dbt model names to filter in ingestion.", + "default": { + "allow": [ + ".*" + ], + "deny": [], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "allOf": [ + { + "$ref": "#/definitions/AllowDenyPattern" + } + ] + }, + "meta_mapping": { + "title": "Meta Mapping", + "description": "mapping rules that will be executed against dbt meta properties. Refer to the section below on dbt meta automated mappings.", + "default": {}, + "type": "object" + }, + "query_tag_mapping": { + "title": "Query Tag Mapping", + "description": "mapping rules that will be executed against dbt query_tag meta properties. Refer to the section below on dbt meta automated mappings.", + "default": {}, + "type": "object" + }, + "write_semantics": { + "title": "Write Semantics", + "description": "Whether the new tags, terms and owners to be added will override the existing ones added only by this source or not. Value for this config can be \"PATCH\" or \"OVERRIDE\"", + "default": "PATCH", + "type": "string" + }, + "strip_user_ids_from_email": { + "title": "Strip User Ids From Email", + "description": "Whether or not to strip email id while adding owners using dbt meta actions.", + "default": false, + "type": "boolean" + }, + "owner_extraction_pattern": { + "title": "Owner Extraction Pattern", + "description": "Regex string to extract owner from the dbt node using the `(?P...) syntax` of the [match object](https://docs.python.org/3/library/re.html#match-objects), where the group name must be `owner`. Examples: (1)`r\"(?P(.*)): (\\w+) (\\w+)\"` will extract `jdoe` as the owner from `\"jdoe: John Doe\"` (2) `r\"@(?P(.*))\"` will extract `alice` as the owner from `\"@alice\"`.", + "type": "string" + }, + "aws_connection": { + "title": "Aws Connection", + "description": "When fetching manifest files from s3, configuration for aws connection details", + "allOf": [ + { + "$ref": "#/definitions/AwsConnectionConfig" + } + ] + }, + "delete_tests_as_datasets": { + "title": "Delete Tests As Datasets", + "description": "Prior to version 0.8.38, dbt tests were represented as datasets. If you ingested dbt tests before, set this flag to True (just needed once) to soft-delete tests that were generated as datasets by previous ingestion.", + "default": false, + "type": "boolean" + }, + "disable_dbt_node_creation": { + "title": "Disable Dbt Node Creation", + "description": "Whether to suppress dbt dataset metadata creation. When set to True, this flag applies the dbt metadata to the target_platform entities (e.g. populating schema and column descriptions from dbt into the postgres / bigquery table metadata in DataHub) and generates lineage between the platform entities.", + "default": false, + "type": "boolean" + }, + "enable_meta_mapping": { + "title": "Enable Meta Mapping", + "description": "When enabled, applies the mappings that are defined through the meta_mapping directives.", + "default": true, + "type": "boolean" + }, + "enable_query_tag_mapping": { + "title": "Enable Query Tag Mapping", + "description": "When enabled, applies the mappings that are defined through the `query_tag_mapping` directives.", + "default": true, + "type": "boolean" + } + }, + "required": [ + "manifest_path", + "catalog_path", + "target_platform" + ], + "additionalProperties": false + }, + "mysql": { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "mysql" + ] + }, + "config": { + "$ref": "#/definitions/mysql_config" + } + }, + "required": [ + "type", + "config" + ] + }, + "mysql_config": { + "title": "MySQLConfig", + "description": "Base configuration class for stateful ingestion for source configs to inherit from.", + "type": "object", + "properties": { + "env": { + "title": "Env", + "description": "The environment that all assets produced by this connector belong to", + "default": "PROD", + "type": "string" + }, + "platform": { + "title": "Platform", + "description": "The platform that this source connects to", + "type": "string" + }, + "platform_instance": { + "title": "Platform Instance", + "description": "The instance of the platform that all assets produced by this recipe belong to", + "type": "string" + }, + "stateful_ingestion": { + "$ref": "#/definitions/SQLAlchemyStatefulIngestionConfig" + }, + "options": { + "title": "Options", + "default": {}, + "type": "object" + }, + "schema_pattern": { + "title": "Schema Pattern", + "description": "regex patterns for schemas to filter in ingestion.", + "default": { + "allow": [ + ".*" + ], + "deny": [], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "allOf": [ + { + "$ref": "#/definitions/AllowDenyPattern" + } + ] + }, + "table_pattern": { + "title": "Table Pattern", + "description": "regex patterns for tables to filter in ingestion.", + "default": { + "allow": [ + ".*" + ], + "deny": [], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "allOf": [ + { + "$ref": "#/definitions/AllowDenyPattern" + } + ] + }, + "view_pattern": { + "title": "View Pattern", + "description": "regex patterns for views to filter in ingestion.", + "default": { + "allow": [ + ".*" + ], + "deny": [], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "allOf": [ + { + "$ref": "#/definitions/AllowDenyPattern" + } + ] + }, + "profile_pattern": { + "title": "Profile Pattern", + "description": "regex patterns for profiles to filter in ingestion, allowed by the `table_pattern`.", + "default": { + "allow": [ + ".*" + ], + "deny": [], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "allOf": [ + { + "$ref": "#/definitions/AllowDenyPattern" + } + ] + }, + "domain": { + "title": "Domain", + "description": " regex patterns for tables/schemas to descide domain_key domain key (domain_key can be any string like \"sales\".) There can be multiple domain key specified.", + "default": {}, + "type": "object", + "additionalProperties": { + "$ref": "#/definitions/AllowDenyPattern" + } + }, + "include_views": { + "title": "Include Views", + "description": "Whether views should be ingested.", + "default": true, + "type": "boolean" + }, + "include_tables": { + "title": "Include Tables", + "description": "Whether tables should be ingested.", + "default": true, + "type": "boolean" + }, + "profiling": { + "title": "Profiling", + "default": { + "enabled": false, + "limit": null, + "offset": null, + "report_dropped_profiles": false, + "turn_off_expensive_profiling_metrics": false, + "profile_table_level_only": false, + "include_field_null_count": true, + "include_field_min_value": true, + "include_field_max_value": true, + "include_field_mean_value": true, + "include_field_median_value": true, + "include_field_stddev_value": true, + "include_field_quantiles": false, + "include_field_distinct_value_frequencies": false, + "include_field_histogram": false, + "include_field_sample_values": true, + "allow_deny_patterns": { + "allow": [ + ".*" + ], + "deny": [], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "max_number_of_fields_to_profile": null, + "profile_if_updated_since_days": 1, + "max_workers": 50, + "query_combiner_enabled": true, + "catch_exceptions": true, + "partition_profiling_enabled": true, + "bigquery_temp_table_schema": null, + "partition_datetime": null + }, + "allOf": [ + { + "$ref": "#/definitions/GEProfilingConfig" + } + ] + }, + "username": { + "title": "Username", + "description": "username", + "type": "string" + }, + "password": { + "title": "Password", + "description": "password", + "type": "string", + "writeOnly": true, + "format": "password" + }, + "host_port": { + "title": "Host Port", + "description": "MySQL host URL.", + "default": "localhost:3306", + "type": "string" + }, + "database": { + "title": "Database", + "description": "database (catalog)", + "type": "string" + }, + "database_alias": { + "title": "Database Alias", + "description": "Alias to apply to database when ingesting.", + "type": "string" + }, + "scheme": { + "title": "Scheme", + "default": "mysql+pymysql", + "type": "string" + }, + "sqlalchemy_uri": { + "title": "Sqlalchemy Uri", + "description": "URI of database to connect to. See https://docs.sqlalchemy.org/en/14/core/engines.html#database-urls. Takes precedence over other connection parameters.", + "type": "string" + } + }, + "additionalProperties": false + }, + "trino": { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "trino" + ] + }, + "config": { + "$ref": "#/definitions/trino_config" + } + }, + "required": [ + "type", + "config" + ] + }, + "trino_config": { + "title": "TrinoConfig", + "description": "Base configuration class for stateful ingestion for source configs to inherit from.", + "type": "object", + "properties": { + "env": { + "title": "Env", + "description": "The environment that all assets produced by this connector belong to", + "default": "PROD", + "type": "string" + }, + "platform": { + "title": "Platform", + "description": "The platform that this source connects to", + "type": "string" + }, + "platform_instance": { + "title": "Platform Instance", + "description": "The instance of the platform that all assets produced by this recipe belong to", + "type": "string" + }, + "stateful_ingestion": { + "$ref": "#/definitions/SQLAlchemyStatefulIngestionConfig" + }, + "options": { + "title": "Options", + "default": {}, + "type": "object" + }, + "schema_pattern": { + "title": "Schema Pattern", + "description": "regex patterns for schemas to filter in ingestion.", + "default": { + "allow": [ + ".*" + ], + "deny": [], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "allOf": [ + { + "$ref": "#/definitions/AllowDenyPattern" + } + ] + }, + "table_pattern": { + "title": "Table Pattern", + "description": "regex patterns for tables to filter in ingestion.", + "default": { + "allow": [ + ".*" + ], + "deny": [], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "allOf": [ + { + "$ref": "#/definitions/AllowDenyPattern" + } + ] + }, + "view_pattern": { + "title": "View Pattern", + "description": "regex patterns for views to filter in ingestion.", + "default": { + "allow": [ + ".*" + ], + "deny": [], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "allOf": [ + { + "$ref": "#/definitions/AllowDenyPattern" + } + ] + }, + "profile_pattern": { + "title": "Profile Pattern", + "description": "regex patterns for profiles to filter in ingestion, allowed by the `table_pattern`.", + "default": { + "allow": [ + ".*" + ], + "deny": [], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "allOf": [ + { + "$ref": "#/definitions/AllowDenyPattern" + } + ] + }, + "domain": { + "title": "Domain", + "description": " regex patterns for tables/schemas to descide domain_key domain key (domain_key can be any string like \"sales\".) There can be multiple domain key specified.", + "default": {}, + "type": "object", + "additionalProperties": { + "$ref": "#/definitions/AllowDenyPattern" + } + }, + "include_views": { + "title": "Include Views", + "description": "Whether views should be ingested.", + "default": true, + "type": "boolean" + }, + "include_tables": { + "title": "Include Tables", + "description": "Whether tables should be ingested.", + "default": true, + "type": "boolean" + }, + "profiling": { + "title": "Profiling", + "default": { + "enabled": false, + "limit": null, + "offset": null, + "report_dropped_profiles": false, + "turn_off_expensive_profiling_metrics": false, + "profile_table_level_only": false, + "include_field_null_count": true, + "include_field_min_value": true, + "include_field_max_value": true, + "include_field_mean_value": true, + "include_field_median_value": true, + "include_field_stddev_value": true, + "include_field_quantiles": false, + "include_field_distinct_value_frequencies": false, + "include_field_histogram": false, + "include_field_sample_values": true, + "allow_deny_patterns": { + "allow": [ + ".*" + ], + "deny": [], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "max_number_of_fields_to_profile": null, + "profile_if_updated_since_days": 1, + "max_workers": 50, + "query_combiner_enabled": true, + "catch_exceptions": true, + "partition_profiling_enabled": true, + "bigquery_temp_table_schema": null, + "partition_datetime": null + }, + "allOf": [ + { + "$ref": "#/definitions/GEProfilingConfig" + } + ] + }, + "username": { + "title": "Username", + "description": "username", + "type": "string" + }, + "password": { + "title": "Password", + "description": "password", + "type": "string", + "writeOnly": true, + "format": "password" + }, + "host_port": { + "title": "Host Port", + "description": "host URL", + "type": "string" + }, + "database": { + "title": "Database", + "description": "database (catalog)", + "type": "string" + }, + "database_alias": { + "title": "Database Alias", + "description": "Alias to apply to database when ingesting.", + "type": "string" + }, + "scheme": { + "title": "Scheme", + "default": "trino", + "type": "string" + }, + "sqlalchemy_uri": { + "title": "Sqlalchemy Uri", + "description": "URI of database to connect to. See https://docs.sqlalchemy.org/en/14/core/engines.html#database-urls. Takes precedence over other connection parameters.", + "type": "string" + } + }, + "required": [ + "host_port" + ], + "additionalProperties": false + }, + "okta": { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "okta" + ] + }, + "config": { + "$ref": "#/definitions/okta_config" + } + }, + "required": [ + "type", + "config" + ] + }, + "okta_config": { + "title": "OktaConfig", + "type": "object", + "properties": { + "okta_domain": { + "title": "Okta Domain", + "description": "The location of your Okta Domain, without a protocol. Can be found in Okta Developer console.", + "default": "dev-33231928.okta.com", + "type": "string" + }, + "okta_api_token": { + "title": "Okta Api Token", + "description": "An API token generated for the DataHub application inside your Okta Developer Console.", + "default": "00be4R_M2MzDqXawbWgfKGpKee0kuEOfX1RCQSRx00", + "type": "string" + }, + "ingest_users": { + "title": "Ingest Users", + "description": "Whether users should be ingested into DataHub.", + "default": true, + "type": "boolean" + }, + "ingest_groups": { + "title": "Ingest Groups", + "description": "Whether groups should be ingested into DataHub.", + "default": true, + "type": "boolean" + }, + "ingest_group_membership": { + "title": "Ingest Group Membership", + "description": "Whether group membership should be ingested into DataHub. ingest_groups must be True if this is True.", + "default": true, + "type": "boolean" + }, + "okta_profile_to_username_attr": { + "title": "Okta Profile To Username Attr", + "description": "Which Okta User Profile attribute to use as input to DataHub username mapping.", + "default": "login", + "type": "string" + }, + "okta_profile_to_username_regex": { + "title": "Okta Profile To Username Regex", + "description": "A regex used to parse the DataHub username from the attribute specified in `okta_profile_to_username_attr`.", + "default": "([^@]+)", + "type": "string" + }, + "okta_profile_to_group_name_attr": { + "title": "Okta Profile To Group Name Attr", + "description": "Which Okta Group Profile attribute to use as input to DataHub group name mapping.", + "default": "name", + "type": "string" + }, + "okta_profile_to_group_name_regex": { + "title": "Okta Profile To Group Name Regex", + "description": "A regex used to parse the DataHub group name from the attribute specified in `okta_profile_to_group_name_attr`.", + "default": "(.*)", + "type": "string" + }, + "include_deprovisioned_users": { + "title": "Include Deprovisioned Users", + "description": "Whether to ingest users in the DEPROVISIONED state from Okta.", + "default": false, + "type": "boolean" + }, + "include_suspended_users": { + "title": "Include Suspended Users", + "description": "Whether to ingest users in the SUSPENDED state from Okta.", + "default": false, + "type": "boolean" + }, + "page_size": { + "title": "Page Size", + "description": "The number of entities requested from Okta's REST APIs in one request.", + "default": 100, + "type": "integer" + }, + "delay_seconds": { + "title": "Delay Seconds", + "description": "Number of seconds to wait between calls to Okta's REST APIs. (Okta rate limits). Defaults to 10ms.", + "default": 0.01, + "anyOf": [ + { + "type": "number" + }, + { + "type": "integer" + } + ] + }, + "okta_users_filter": { + "title": "Okta Users Filter", + "description": "Okta filter expression (not regex) for ingesting users. Only one of `okta_users_filter` and `okta_users_search` can be set. See (https://developer.okta.com/docs/reference/api/users/#list-users-with-a-filter) for more info.", + "type": "string" + }, + "okta_users_search": { + "title": "Okta Users Search", + "description": "Okta search expression (not regex) for ingesting users. Only one of `okta_users_filter` and `okta_users_search` can be set. See (https://developer.okta.com/docs/reference/api/users/#list-users-with-search) for more info.", + "type": "string" + }, + "okta_groups_filter": { + "title": "Okta Groups Filter", + "description": "Okta filter expression (not regex) for ingesting groups. Only one of `okta_groups_filter` and `okta_groups_search` can be set. See (https://developer.okta.com/docs/reference/api/groups/#filters) for more info.", + "type": "string" + }, + "okta_groups_search": { + "title": "Okta Groups Search", + "description": "Okta search expression (not regex) for ingesting groups. Only one of `okta_groups_filter` and `okta_groups_search` can be set. See (https://developer.okta.com/docs/reference/api/groups/#list-groups-with-search) for more info.", + "type": "string" + }, + "mask_group_id": { + "title": "Mask Group Id", + "default": true, + "type": "boolean" + }, + "mask_user_id": { + "title": "Mask User Id", + "default": true, + "type": "boolean" + } + }, + "additionalProperties": false + }, + "AllowDenyPattern": { + "title": "AllowDenyPattern", + "description": "A class to store allow deny regexes", + "type": "object", + "properties": { + "allow": { + "title": "Allow", + "description": "List of regex patterns for process groups to include in ingestion", + "default": [ + ".*" + ], + "type": "array", + "items": { + "type": "string" + } + }, + "deny": { + "title": "Deny", + "description": "List of regex patterns for process groups to exclude from ingestion.", + "default": [], + "type": "array", + "items": { + "type": "string" + } + }, + "ignoreCase": { + "title": "Ignorecase", + "description": "Whether to ignore case sensitivity during pattern matching.", + "default": true, + "type": "boolean" + }, + "alphabet": { + "title": "Alphabet", + "description": "Allowed alphabets pattern", + "default": "[A-Za-z0-9 _.-]", + "type": "string" + } + }, + "additionalProperties": false + }, + "AdlsSourceConfig": { + "title": "AdlsSourceConfig", + "description": "Common Azure credentials config.\n\nhttps://docs.microsoft.com/en-us/azure/storage/blobs/data-lake-storage-directory-file-acl-python", + "type": "object", + "properties": { + "base_path": { + "title": "Base Path", + "description": "Base folder in hierarchical namespaces to start from.", + "default": "/", + "type": "string" + }, + "container_name": { + "title": "Container Name", + "description": "Azure storage account container name.", + "type": "string" + }, + "account_name": { + "title": "Account Name", + "description": "Name of the Azure storage account. See [Microsoft official documentation on how to create a storage account.](https://docs.microsoft.com/en-us/azure/storage/blobs/create-data-lake-storage-account)", + "type": "string" + }, + "account_key": { + "title": "Account Key", + "description": "Azure storage account access key that can be used as a credential. **An account key, a SAS token or a client secret is required for authentication.**", + "type": "string" + }, + "sas_token": { + "title": "Sas Token", + "description": "Azure storage account Shared Access Signature (SAS) token that can be used as a credential. **An account key, a SAS token or a client secret is required for authentication.**", + "type": "string" + }, + "client_secret": { + "title": "Client Secret", + "description": "Azure client secret that can be used as a credential. **An account key, a SAS token or a client secret is required for authentication.**", + "type": "string" + }, + "client_id": { + "title": "Client Id", + "description": "Azure client (Application) ID required when a `client_secret` is used as a credential.", + "type": "string" + }, + "tenant_id": { + "title": "Tenant Id", + "description": "Azure tenant (Directory) ID required when a `client_secret` is used as a credential.", + "type": "string" + } + }, + "required": [ + "container_name", + "account_name" + ], + "additionalProperties": false + }, + "IcebergProfilingConfig": { + "title": "IcebergProfilingConfig", + "type": "object", + "properties": { + "enabled": { + "title": "Enabled", + "description": "Whether profiling should be done.", + "default": false, + "type": "boolean" + }, + "include_field_null_count": { + "title": "Include Field Null Count", + "description": "Whether to profile for the number of nulls for each column.", + "default": true, + "type": "boolean" + }, + "include_field_min_value": { + "title": "Include Field Min Value", + "description": "Whether to profile for the min value of numeric columns.", + "default": true, + "type": "boolean" + }, + "include_field_max_value": { + "title": "Include Field Max Value", + "description": "Whether to profile for the max value of numeric columns.", + "default": true, + "type": "boolean" + } + }, + "additionalProperties": false + }, + "DynamicTypedStateProviderConfig": { + "title": "DynamicTypedStateProviderConfig", + "type": "object", + "properties": { + "type": { + "title": "Type", + "description": "The type of the state provider to use. For DataHub use `datahub`", + "type": "string" + }, + "config": { + "title": "Config", + "description": "The configuration required for initializing the state provider. Default: The datahub_api config if set at pipeline level. Otherwise, the default DatahubClientConfig. See the defaults (https://github.com/datahub-project/datahub/blob/master/metadata-ingestion/src/datahub/ingestion/graph/client.py#L19)." + } + }, + "required": [ + "type" + ], + "additionalProperties": false + }, + "PulsarSourceStatefulIngestionConfig": { + "title": "PulsarSourceStatefulIngestionConfig", + "description": "Specialization of the basic StatefulIngestionConfig to add custom config.\nThis will be used to override the stateful_ingestion config param of StatefulIngestionConfigBase\nin the PulsarSourceConfig.", + "type": "object", + "properties": { + "enabled": { + "title": "Enabled", + "description": "The type of the ingestion state provider registered with datahub.", + "default": false, + "type": "boolean" + }, + "max_checkpoint_state_size": { + "title": "Max Checkpoint State Size", + "description": "The maximum size of the checkpoint state in bytes. Default is 16MB", + "default": 16777216, + "exclusiveMinimum": 0, + "type": "integer" + }, + "state_provider": { + "title": "State Provider", + "description": "The ingestion state provider configuration.", + "allOf": [ + { + "$ref": "#/definitions/DynamicTypedStateProviderConfig" + } + ] + }, + "ignore_old_state": { + "title": "Ignore Old State", + "description": "If set to True, ignores the previous checkpoint state.", + "default": false, + "type": "boolean" + }, + "ignore_new_state": { + "title": "Ignore New State", + "description": "If set to True, ignores the current checkpoint state.", + "default": false, + "type": "boolean" + }, + "remove_stale_metadata": { + "title": "Remove Stale Metadata", + "default": true, + "type": "boolean" + } + }, + "additionalProperties": false + }, + "NamingPattern": { + "title": "NamingPattern", + "type": "object", + "properties": { + "allowed_vars": { + "title": "Allowed Vars", + "type": "array", + "items": { + "type": "string" + } + }, + "pattern": { + "title": "Pattern", + "type": "string" + }, + "variables": { + "title": "Variables", + "type": "array", + "items": { + "type": "string" + } + } + }, + "required": [ + "allowed_vars", + "pattern" + ] + }, + "GitHubInfo": { + "title": "GitHubInfo", + "type": "object", + "properties": { + "repo": { + "title": "Repo", + "description": "Name of your github repo. e.g. repo for https://github.com/datahub-project/datahub is `datahub-project/datahub`.", + "type": "string" + }, + "branch": { + "title": "Branch", + "description": "Branch on which your files live by default. Typically main or master.", + "default": "main", + "type": "string" + }, + "base_url": { + "title": "Base Url", + "description": "Base url for Github", + "default": "https://github.com", + "type": "string" + } + }, + "required": [ + "repo" + ], + "additionalProperties": false + }, + "TransportOptionsConfig": { + "title": "TransportOptionsConfig", + "type": "object", + "properties": { + "timeout": { + "title": "Timeout", + "type": "integer" + }, + "headers": { + "title": "Headers", + "type": "object", + "additionalProperties": { + "type": "string" + } + } + }, + "required": [ + "timeout", + "headers" + ], + "additionalProperties": false + }, + "SQLAlchemyStatefulIngestionConfig": { + "title": "SQLAlchemyStatefulIngestionConfig", + "description": "Specialization of basic StatefulIngestionConfig to adding custom config.\nThis will be used to override the stateful_ingestion config param of StatefulIngestionConfigBase\nin the SQLAlchemyConfig.", + "type": "object", + "properties": { + "enabled": { + "title": "Enabled", + "description": "The type of the ingestion state provider registered with datahub.", + "default": false, + "type": "boolean" + }, + "max_checkpoint_state_size": { + "title": "Max Checkpoint State Size", + "description": "The maximum size of the checkpoint state in bytes. Default is 16MB", + "default": 16777216, + "exclusiveMinimum": 0, + "type": "integer" + }, + "state_provider": { + "title": "State Provider", + "description": "The ingestion state provider configuration.", + "allOf": [ + { + "$ref": "#/definitions/DynamicTypedStateProviderConfig" + } + ] + }, + "ignore_old_state": { + "title": "Ignore Old State", + "description": "If set to True, ignores the previous checkpoint state.", + "default": false, + "type": "boolean" + }, + "ignore_new_state": { + "title": "Ignore New State", + "description": "If set to True, ignores the current checkpoint state.", + "default": false, + "type": "boolean" + }, + "remove_stale_metadata": { + "title": "Remove Stale Metadata", + "description": "Soft-deletes the tables and views that were found in the last successful run but missing in the current run with stateful_ingestion enabled.", + "default": true, + "type": "boolean" + } + }, + "additionalProperties": false + }, + "GEProfilingConfig": { + "title": "GEProfilingConfig", + "type": "object", + "properties": { + "enabled": { + "title": "Enabled", + "description": "Whether profiling should be done.", + "default": false, + "type": "boolean" + }, + "limit": { + "title": "Limit", + "description": "Max number of documents to profile. By default, profiles all documents.", + "type": "integer" + }, + "offset": { + "title": "Offset", + "description": "Offset in documents to profile. By default, uses no offset.", + "type": "integer" + }, + "report_dropped_profiles": { + "title": "Report Dropped Profiles", + "description": "If datasets which were not profiled are reported in source report or not. Set to `True` for debugging purposes.", + "default": false, + "type": "boolean" + }, + "turn_off_expensive_profiling_metrics": { + "title": "Turn Off Expensive Profiling Metrics", + "description": "Whether to turn off expensive profiling or not. This turns off profiling for quantiles, distinct_value_frequencies, histogram & sample_values. This also limits maximum number of fields being profiled to 10.", + "default": false, + "type": "boolean" + }, + "profile_table_level_only": { + "title": "Profile Table Level Only", + "description": "Whether to perform profiling at table-level only, or include column-level profiling as well.", + "default": false, + "type": "boolean" + }, + "include_field_null_count": { + "title": "Include Field Null Count", + "description": "Whether to profile for the number of nulls for each column.", + "default": true, + "type": "boolean" + }, + "include_field_min_value": { + "title": "Include Field Min Value", + "description": "Whether to profile for the min value of numeric columns.", + "default": true, + "type": "boolean" + }, + "include_field_max_value": { + "title": "Include Field Max Value", + "description": "Whether to profile for the max value of numeric columns.", + "default": true, + "type": "boolean" + }, + "include_field_mean_value": { + "title": "Include Field Mean Value", + "description": "Whether to profile for the mean value of numeric columns.", + "default": true, + "type": "boolean" + }, + "include_field_median_value": { + "title": "Include Field Median Value", + "description": "Whether to profile for the median value of numeric columns.", + "default": true, + "type": "boolean" + }, + "include_field_stddev_value": { + "title": "Include Field Stddev Value", + "description": "Whether to profile for the standard deviation of numeric columns.", + "default": true, + "type": "boolean" + }, + "include_field_quantiles": { + "title": "Include Field Quantiles", + "description": "Whether to profile for the quantiles of numeric columns.", + "default": false, + "type": "boolean" + }, + "include_field_distinct_value_frequencies": { + "title": "Include Field Distinct Value Frequencies", + "description": "Whether to profile for distinct value frequencies.", + "default": false, + "type": "boolean" + }, + "include_field_histogram": { + "title": "Include Field Histogram", + "description": "Whether to profile for the histogram for numeric fields.", + "default": false, + "type": "boolean" + }, + "include_field_sample_values": { + "title": "Include Field Sample Values", + "description": "Whether to profile for the sample values for all columns.", + "default": true, + "type": "boolean" + }, + "allow_deny_patterns": { + "title": "Allow Deny Patterns", + "description": "regex patterns for filtering of tables or table columns to profile.", + "default": { + "allow": [ + ".*" + ], + "deny": [], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "allOf": [ + { + "$ref": "#/definitions/AllowDenyPattern" + } + ] + }, + "max_number_of_fields_to_profile": { + "title": "Max Number Of Fields To Profile", + "description": "A positive integer that specifies the maximum number of columns to profile for any table. `None` implies all columns. The cost of profiling goes up significantly as the number of columns to profile goes up.", + "exclusiveMinimum": 0, + "type": "integer" + }, + "profile_if_updated_since_days": { + "title": "Profile If Updated Since Days", + "description": "Profile table only if it has been updated since these many number of days. `None` implies profile all tables. Only Snowflake supports this.", + "default": 1, + "exclusiveMinimum": 0, + "type": "number" + }, + "max_workers": { + "title": "Max Workers", + "description": "Number of worker threads to use for profiling. Set to 1 to disable.", + "default": 50, + "type": "integer" + }, + "query_combiner_enabled": { + "title": "Query Combiner Enabled", + "description": "*This feature is still experimental and can be disabled if it causes issues.* Reduces the total number of queries issued and speeds up profiling by dynamically combining SQL queries where possible.", + "default": true, + "type": "boolean" + }, + "catch_exceptions": { + "title": "Catch Exceptions", + "default": true, + "type": "boolean" + }, + "partition_profiling_enabled": { + "title": "Partition Profiling Enabled", + "default": true, + "type": "boolean" + }, + "bigquery_temp_table_schema": { + "title": "Bigquery Temp Table Schema", + "description": "On bigquery for profiling partitioned tables needs to create temporary views. You have to define a schema where these will be created. Views will be cleaned up after profiler runs. (Great expectation tech details about this (https://legacy.docs.greatexpectations.io/en/0.9.0/reference/integrations/bigquery.html#custom-queries-with-sql-datasource).", + "type": "string" + }, + "partition_datetime": { + "title": "Partition Datetime", + "description": "For partitioned datasets profile only the partition which matches the datetime or profile the latest one if not set. Only Bigquery supports this.", + "type": "string", + "format": "date-time" + } + }, + "additionalProperties": false + }, + "BucketDuration": { + "title": "BucketDuration", + "description": "An enumeration.", + "enum": [ + "DAY", + "HOUR" + ], + "type": "string" + }, + "LookerConnectionDefinition": { + "title": "LookerConnectionDefinition", + "type": "object", + "properties": { + "platform": { + "title": "Platform", + "type": "string" + }, + "default_db": { + "title": "Default Db", + "type": "string" + }, + "default_schema": { + "title": "Default Schema", + "type": "string" + }, + "platform_instance": { + "title": "Platform Instance", + "type": "string" + }, + "platform_env": { + "title": "Platform Env", + "description": "The environment that the platform is located in. Leaving this empty will inherit defaults from the top level Looker configuration", + "type": "string" + } + }, + "required": [ + "platform", + "default_db" + ], + "additionalProperties": false + }, + "LookerAPIConfig": { + "title": "LookerAPIConfig", + "type": "object", + "properties": { + "client_id": { + "title": "Client Id", + "description": "Looker API client id.", + "type": "string" + }, + "client_secret": { + "title": "Client Secret", + "description": "Looker API client secret.", + "type": "string" + }, + "base_url": { + "title": "Base Url", + "description": "Url to your Looker instance: `https://company.looker.com:19999` or `https://looker.company.com`, or similar. Used for making API calls to Looker and constructing clickable dashboard and chart urls.", + "type": "string" + }, + "transport_options": { + "title": "Transport Options", + "description": "Populates the [TransportOptions](https://github.com/looker-open-source/sdk-codegen/blob/94d6047a0d52912ac082eb91616c1e7c379ab262/python/looker_sdk/rtl/transport.py#L70) struct for looker client", + "allOf": [ + { + "$ref": "#/definitions/TransportOptionsConfig" + } + ] + } + }, + "required": [ + "client_id", + "client_secret", + "base_url" + ], + "additionalProperties": false + }, + "ProvidedConfig": { + "title": "ProvidedConfig", + "type": "object", + "properties": { + "provider": { + "title": "Provider", + "type": "string" + }, + "path_key": { + "title": "Path Key", + "type": "string" + }, + "value": { + "title": "Value", + "type": "string" + } + }, + "required": [ + "provider", + "path_key", + "value" + ], + "additionalProperties": false + }, + "OauthConfiguration": { + "title": "OauthConfiguration", + "type": "object", + "properties": { + "provider": { + "title": "Provider", + "description": "Identity provider for oauth, e.g- microsoft", + "type": "string" + }, + "client_id": { + "title": "Client Id", + "description": "client id of your registered application", + "type": "string" + }, + "scopes": { + "title": "Scopes", + "description": "scopes required to connect to snowflake", + "type": "array", + "items": { + "type": "string" + } + }, + "use_certificate": { + "title": "Use Certificate", + "description": "Do you want to use certificate and private key to authenticate using oauth", + "default": false, + "type": "string" + }, + "client_secret": { + "title": "Client Secret", + "description": "client secret of the application if use_certificate = false", + "type": "string" + }, + "authority_url": { + "title": "Authority Url", + "description": "Authority url of your identity provider", + "type": "string" + }, + "encoded_oauth_public_key": { + "title": "Encoded Oauth Public Key", + "description": "base64 encoded certificate content if use_certificate = true", + "type": "string" + }, + "encoded_oauth_private_key": { + "title": "Encoded Oauth Private Key", + "description": "base64 encoded private key content if use_certificate = true", + "type": "string" + } + }, + "additionalProperties": false + }, + "SnowflakeProvisionRoleConfig": { + "title": "SnowflakeProvisionRoleConfig", + "type": "object", + "properties": { + "enabled": { + "title": "Enabled", + "description": "Whether provisioning of Snowflake role (used for ingestion) is enabled or not.", + "default": false, + "type": "boolean" + }, + "dry_run": { + "title": "Dry Run", + "description": "If provision_role is enabled, whether to dry run the sql commands for system admins to see what sql grant commands would be run without actually running the grant commands.", + "default": false, + "type": "boolean" + }, + "drop_role_if_exists": { + "title": "Drop Role If Exists", + "description": "Useful during testing to ensure you have a clean slate role. Not recommended for production use cases.", + "default": false, + "type": "boolean" + }, + "run_ingestion": { + "title": "Run Ingestion", + "description": "If system admins wish to skip actual ingestion of metadata during testing of the provisioning of role.", + "default": false, + "type": "boolean" + }, + "admin_role": { + "title": "Admin Role", + "description": "The Snowflake role of admin user used for provisioning of the role specified by role config. System admins can audit the open source code and decide to use a different role.", + "default": "accountadmin", + "type": "string" + }, + "admin_username": { + "title": "Admin Username", + "description": "The username to be used for provisioning of role.", + "type": "string" + }, + "admin_password": { + "title": "Admin Password", + "description": "The password to be used for provisioning of role.", + "type": "string", + "writeOnly": true, + "format": "password" + } + }, + "required": [ + "admin_username" + ], + "additionalProperties": false + }, + "SnowflakeStatefulIngestionConfig": { + "title": "SnowflakeStatefulIngestionConfig", + "description": "Specialization of basic StatefulIngestionConfig to adding custom config.\nThis will be used to override the stateful_ingestion config param of StatefulIngestionConfigBase\nin the SnowflakeUsageConfig.", + "type": "object", + "properties": { + "enabled": { + "title": "Enabled", + "description": "The type of the ingestion state provider registered with datahub.", + "default": false, + "type": "boolean" + }, + "max_checkpoint_state_size": { + "title": "Max Checkpoint State Size", + "description": "The maximum size of the checkpoint state in bytes. Default is 16MB", + "default": 16777216, + "exclusiveMinimum": 0, + "type": "integer" + }, + "state_provider": { + "title": "State Provider", + "description": "The ingestion state provider configuration.", + "allOf": [ + { + "$ref": "#/definitions/DynamicTypedStateProviderConfig" + } + ] + }, + "force_rerun": { + "title": "Force Rerun", + "default": false, + "type": "boolean" + }, + "ignore_new_state": { + "title": "Ignore New State", + "description": "If set to True, ignores the current checkpoint state.", + "default": false, + "type": "boolean" + } + }, + "additionalProperties": false + }, + "LineageMode": { + "title": "LineageMode", + "description": "An enumeration.", + "enum": [ + "sql_based", + "stl_scan_based", + "mixed" + ] + }, + "GlueProfilingConfig": { + "title": "GlueProfilingConfig", + "type": "object", + "properties": { + "row_count": { + "title": "Row Count", + "description": "The parameter name for row count in glue table.", + "type": "string" + }, + "column_count": { + "title": "Column Count", + "description": "The parameter name for column count in glue table.", + "type": "string" + }, + "unique_count": { + "title": "Unique Count", + "description": "The parameter name for the count of unique value in a column.", + "type": "string" + }, + "unique_proportion": { + "title": "Unique Proportion", + "description": "The parameter name for the proportion of unique values in a column.", + "type": "string" + }, + "null_count": { + "title": "Null Count", + "description": "The parameter name for the count of null values in a column.", + "type": "integer" + }, + "null_proportion": { + "title": "Null Proportion", + "description": "The parameter name for the proportion of null values in a column.", + "type": "string" + }, + "min": { + "title": "Min", + "description": "The parameter name for the min value of a column.", + "type": "string" + }, + "max": { + "title": "Max", + "description": "The parameter name for the max value of a column.", + "type": "string" + }, + "mean": { + "title": "Mean", + "description": "The parameter name for the mean value of a column.", + "type": "string" + }, + "median": { + "title": "Median", + "description": "The parameter name for the median value of a column.", + "type": "string" + }, + "stdev": { + "title": "Stdev", + "description": "The parameter name for the standard deviation of a column.", + "type": "string" + }, + "partition_patterns": { + "title": "Partition Patterns", + "description": "Regex patterns for filtering partitions for profile. The pattern should be a string like: \"{'key':'value'}\".", + "default": { + "allow": [ + ".*" + ], + "deny": [], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "allOf": [ + { + "$ref": "#/definitions/AllowDenyPattern" + } + ] + } + }, + "additionalProperties": false + }, + "ModeAPIConfig": { + "title": "ModeAPIConfig", + "type": "object", + "properties": { + "retry_backoff_multiplier": { + "title": "Retry Backoff Multiplier", + "description": "Multiplier for exponential backoff when waiting to retry", + "default": 2, + "anyOf": [ + { + "type": "integer" + }, + { + "type": "number" + } + ] + }, + "max_retry_interval": { + "title": "Max Retry Interval", + "description": "Maximum interval to wait when retrying", + "default": 10, + "anyOf": [ + { + "type": "integer" + }, + { + "type": "number" + } + ] + }, + "max_attempts": { + "title": "Max Attempts", + "description": "Maximum number of attempts to retry before failing", + "default": 5, + "type": "integer" + } + }, + "additionalProperties": false + }, + "AwsSourceConfig": { + "title": "AwsSourceConfig", + "description": "Common AWS credentials config.\n\nCurrently used by:\n - Glue source\n - SageMaker source", + "type": "object", + "properties": { + "aws_access_key_id": { + "title": "Aws Access Key Id", + "description": "Autodetected. See https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html", + "type": "string" + }, + "aws_secret_access_key": { + "title": "Aws Secret Access Key", + "description": "Autodetected. See https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html", + "type": "string" + }, + "aws_session_token": { + "title": "Aws Session Token", + "description": "Autodetected. See https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html", + "type": "string" + }, + "aws_role": { + "title": "Aws Role", + "description": "Autodetected. See https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html", + "anyOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "type": "string" + } + } + ] + }, + "aws_profile": { + "title": "Aws Profile", + "description": "Named AWS profile to use, if not set the default will be used", + "type": "string" + }, + "aws_region": { + "title": "Aws Region", + "description": "AWS region code.", + "type": "string" + }, + "aws_endpoint_url": { + "title": "Aws Endpoint Url", + "description": "Autodetected. See https://boto3.amazonaws.com/v1/documentation/api/latest/reference/core/session.html", + "type": "string" + }, + "aws_proxy": { + "title": "Aws Proxy", + "description": "Autodetected. See https://boto3.amazonaws.com/v1/documentation/api/latest/reference/core/session.html", + "type": "object", + "additionalProperties": { + "type": "string" + } + }, + "env": { + "title": "Env", + "description": "The environment that all assets produced by this connector belong to", + "default": "PROD", + "type": "string" + }, + "database_pattern": { + "title": "Database Pattern", + "description": "regex patterns for databases to filter in ingestion.", + "default": { + "allow": [ + ".*" + ], + "deny": [], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "allOf": [ + { + "$ref": "#/definitions/AllowDenyPattern" + } + ] + }, + "table_pattern": { + "title": "Table Pattern", + "description": "regex patterns for tables to filter in ingestion.", + "default": { + "allow": [ + ".*" + ], + "deny": [], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "allOf": [ + { + "$ref": "#/definitions/AllowDenyPattern" + } + ] + } + }, + "required": [ + "aws_region" + ], + "additionalProperties": false + }, + "DataLakeProfilerConfig": { + "title": "DataLakeProfilerConfig", + "type": "object", + "properties": { + "enabled": { + "title": "Enabled", + "description": "Whether profiling should be done.", + "default": false, + "type": "boolean" + }, + "profile_table_level_only": { + "title": "Profile Table Level Only", + "description": "Whether to perform profiling at table-level only or include column-level profiling as well.", + "default": false, + "type": "boolean" + }, + "allow_deny_patterns": { + "title": "Allow Deny Patterns", + "default": { + "allow": [ + ".*" + ], + "deny": [], + "ignoreCase": true, + "alphabet": "[A-Za-z0-9 _.-]" + }, + "allOf": [ + { + "$ref": "#/definitions/AllowDenyPattern" + } + ] + }, + "max_number_of_fields_to_profile": { + "title": "Max Number Of Fields To Profile", + "description": "A positive integer that specifies the maximum number of columns to profile for any table. `None` implies all columns. The cost of profiling goes up significantly as the number of columns to profile goes up.", + "exclusiveMinimum": 0, + "type": "integer" + }, + "include_field_null_count": { + "title": "Include Field Null Count", + "description": "Whether to profile for the number of nulls for each column.", + "default": true, + "type": "boolean" + }, + "include_field_min_value": { + "title": "Include Field Min Value", + "description": "Whether to profile for the min value of numeric columns.", + "default": true, + "type": "boolean" + }, + "include_field_max_value": { + "title": "Include Field Max Value", + "description": "Whether to profile for the max value of numeric columns.", + "default": true, + "type": "boolean" + }, + "include_field_mean_value": { + "title": "Include Field Mean Value", + "description": "Whether to profile for the mean value of numeric columns.", + "default": true, + "type": "boolean" + }, + "include_field_median_value": { + "title": "Include Field Median Value", + "description": "Whether to profile for the median value of numeric columns.", + "default": true, + "type": "boolean" + }, + "include_field_stddev_value": { + "title": "Include Field Stddev Value", + "description": "Whether to profile for the standard deviation of numeric columns.", + "default": true, + "type": "boolean" + }, + "include_field_quantiles": { + "title": "Include Field Quantiles", + "description": "Whether to profile for the quantiles of numeric columns.", + "default": true, + "type": "boolean" + }, + "include_field_distinct_value_frequencies": { + "title": "Include Field Distinct Value Frequencies", + "description": "Whether to profile for distinct value frequencies.", + "default": true, + "type": "boolean" + }, + "include_field_histogram": { + "title": "Include Field Histogram", + "description": "Whether to profile for the histogram for numeric fields.", + "default": true, + "type": "boolean" + }, + "include_field_sample_values": { + "title": "Include Field Sample Values", + "description": "Whether to profile for the sample values for all columns.", + "default": true, + "type": "boolean" + } + }, + "additionalProperties": false + }, + "PathSpec": { + "title": "PathSpec", + "type": "object", + "properties": { + "include": { + "title": "Include", + "description": "Path to table (s3 or local file system). Name variable {table} is used to mark the folder with dataset. In absence of {table}, file level dataset will be created. Check below examples for more details.", + "type": "string" + }, + "exclude": { + "title": "Exclude", + "description": "list of paths in glob pattern which will be excluded while scanning for the datasets", + "type": "array", + "items": { + "type": "string" + } + }, + "file_types": { + "title": "File Types", + "description": "Files with extenstions specified here (subset of default value) only will be scanned to create dataset. Other files will be omitted.", + "default": [ + "csv", + "tsv", + "json", + "parquet", + "avro" + ], + "type": "array", + "items": { + "type": "string" + } + }, + "default_extension": { + "title": "Default Extension", + "description": "For files without extension it will assume the specified file type. If it is not set the files without extensions will be skipped.", + "type": "string" + }, + "table_name": { + "title": "Table Name", + "description": "Display name of the dataset.Combination of named variableds from include path and strings", + "type": "string" + }, + "enable_compression": { + "title": "Enable Compression", + "description": "Enable or disable processing compressed files. Currenly .gz and .bz files are supported.", + "default": true, + "type": "boolean" + }, + "sample_files": { + "title": "Sample Files", + "description": "Not listing all the files but only taking a handful amount of sample file to infer the schema. File count and file size calculation will be disabled. This can affect performance significantly if enabled", + "default": true, + "type": "boolean" + } + }, + "required": [ + "include" + ], + "additionalProperties": false + }, + "NifiAuthType": { + "title": "NifiAuthType", + "description": "An enumeration.", + "enum": [ + "NO_AUTH", + "SINGLE_USER", + "CLIENT_CERT" + ] + }, + "DBTStatefulIngestionConfig": { + "title": "DBTStatefulIngestionConfig", + "description": "Specialization of basic StatefulIngestionConfig to adding custom config.\nThis will be used to override the stateful_ingestion config param of StatefulIngestionConfigBase\nin the SQLAlchemyConfig.", + "type": "object", + "properties": { + "enabled": { + "title": "Enabled", + "description": "The type of the ingestion state provider registered with datahub.", + "default": false, + "type": "boolean" + }, + "max_checkpoint_state_size": { + "title": "Max Checkpoint State Size", + "description": "The maximum size of the checkpoint state in bytes. Default is 16MB", + "default": 16777216, + "exclusiveMinimum": 0, + "type": "integer" + }, + "state_provider": { + "title": "State Provider", + "description": "The ingestion state provider configuration.", + "allOf": [ + { + "$ref": "#/definitions/DynamicTypedStateProviderConfig" + } + ] + }, + "ignore_old_state": { + "title": "Ignore Old State", + "description": "If set to True, ignores the previous checkpoint state.", + "default": false, + "type": "boolean" + }, + "ignore_new_state": { + "title": "Ignore New State", + "description": "If set to True, ignores the current checkpoint state.", + "default": false, + "type": "boolean" + }, + "remove_stale_metadata": { + "title": "Remove Stale Metadata", + "default": true, + "type": "boolean" + } + }, + "additionalProperties": false + }, + "AwsConnectionConfig": { + "title": "AwsConnectionConfig", + "description": "Common AWS credentials config.\n\nCurrently used by:\n - Glue source\n - SageMaker source\n - dbt source", + "type": "object", + "properties": { + "aws_access_key_id": { + "title": "Aws Access Key Id", + "description": "Autodetected. See https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html", + "type": "string" + }, + "aws_secret_access_key": { + "title": "Aws Secret Access Key", + "description": "Autodetected. See https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html", + "type": "string" + }, + "aws_session_token": { + "title": "Aws Session Token", + "description": "Autodetected. See https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html", + "type": "string" + }, + "aws_role": { + "title": "Aws Role", + "description": "Autodetected. See https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html", + "anyOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "type": "string" + } + } + ] + }, + "aws_profile": { + "title": "Aws Profile", + "description": "Named AWS profile to use, if not set the default will be used", + "type": "string" + }, + "aws_region": { + "title": "Aws Region", + "description": "AWS region code.", + "type": "string" + }, + "aws_endpoint_url": { + "title": "Aws Endpoint Url", + "description": "Autodetected. See https://boto3.amazonaws.com/v1/documentation/api/latest/reference/core/session.html", + "type": "string" + }, + "aws_proxy": { + "title": "Aws Proxy", + "description": "Autodetected. See https://boto3.amazonaws.com/v1/documentation/api/latest/reference/core/session.html", + "type": "object", + "additionalProperties": { + "type": "string" + } + } + }, + "required": [ + "aws_region" + ], + "additionalProperties": false + } + }, + "type": "object", + "properties": { + "source": { + "anyOf": [ + { + "$ref": "#/definitions/feast-legacy" + }, + { + "$ref": "#/definitions/redash" + }, + { + "$ref": "#/definitions/iceberg" + }, + { + "$ref": "#/definitions/pulsar" + }, + { + "$ref": "#/definitions/looker" + }, + { + "$ref": "#/definitions/sqlalchemy" + }, + { + "$ref": "#/definitions/azure-ad" + }, + { + "$ref": "#/definitions/starburst-trino-usage" + }, + { + "$ref": "#/definitions/postgres" + }, + { + "$ref": "#/definitions/tableau" + }, + { + "$ref": "#/definitions/lookml" + }, + { + "$ref": "#/definitions/powerbi" + }, + { + "$ref": "#/definitions/kafka-connect" + }, + { + "$ref": "#/definitions/snowflake" + }, + { + "$ref": "#/definitions/snowflake-usage" + }, + { + "$ref": "#/definitions/redshift-usage" + }, + { + "$ref": "#/definitions/hive" + }, + { + "$ref": "#/definitions/hana" + }, + { + "$ref": "#/definitions/sagemaker" + }, + { + "$ref": "#/definitions/glue" + }, + { + "$ref": "#/definitions/oracle" + }, + { + "$ref": "#/definitions/druid" + }, + { + "$ref": "#/definitions/mode" + }, + { + "$ref": "#/definitions/file" + }, + { + "$ref": "#/definitions/mssql" + }, + { + "$ref": "#/definitions/data-lake" + }, + { + "$ref": "#/definitions/presto-on-hive" + }, + { + "$ref": "#/definitions/vertica" + }, + { + "$ref": "#/definitions/csv-enricher" + }, + { + "$ref": "#/definitions/mariadb" + }, + { + "$ref": "#/definitions/feast" + }, + { + "$ref": "#/definitions/ldap" + }, + { + "$ref": "#/definitions/elasticsearch" + }, + { + "$ref": "#/definitions/superset" + }, + { + "$ref": "#/definitions/datahub-lineage-file" + }, + { + "$ref": "#/definitions/s3" + }, + { + "$ref": "#/definitions/athena" + }, + { + "$ref": "#/definitions/redshift" + }, + { + "$ref": "#/definitions/openapi" + }, + { + "$ref": "#/definitions/metabase" + }, + { + "$ref": "#/definitions/datahub-business-glossary" + }, + { + "$ref": "#/definitions/clickhouse-usage" + }, + { + "$ref": "#/definitions/mongodb" + }, + { + "$ref": "#/definitions/nifi" + }, + { + "$ref": "#/definitions/clickhouse" + }, + { + "$ref": "#/definitions/dbt" + }, + { + "$ref": "#/definitions/mysql" + }, + { + "$ref": "#/definitions/trino" + }, + { + "$ref": "#/definitions/okta" + } + ] + }, + "transformers": { + "type": "array", + "items": { + "type": "object", + "description": "Transformer configs see at https://datahubproject.io/docs/metadata-ingestion/transformers", + "properties": { + "type": { + "type": "string", + "description": "Transformer type" + }, + "config": { + "type": "object", + "description": "Transformer config" + } + }, + "required": [ + "type" + ], + "additionalProperties": false + } + }, + "sink": { + "description": "sink", + "anyOf": [ + { + "$ref": "#/definitions/datahub_kafka_sink" + }, + { + "$ref": "#/definitions/datahub_rest_sink" + }, + { + "$ref": "#/definitions/console_sink" + }, + { + "$ref": "#/definitions/file_sink" + } + ] + } + }, + "required": [ + "source" + ] +} \ No newline at end of file From 5136c46b170220f3ade148cf08621d4d6399151c Mon Sep 17 00:00:00 2001 From: Chris Collins Date: Mon, 27 Jun 2022 11:21:21 -0400 Subject: [PATCH 2/3] display source as its own section in sidebar --- .../profile/sidebar/SidebarAboutSection.tsx | 35 +++++++++++++------ .../Documentation/components/LinkList.tsx | 30 +--------------- .../src/app/entity/shared/types.ts | 1 + 3 files changed, 27 insertions(+), 39 deletions(-) diff --git a/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/SidebarAboutSection.tsx b/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/SidebarAboutSection.tsx index 94cef6f327c4a..f91ec9a36aaaa 100644 --- a/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/SidebarAboutSection.tsx +++ b/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/SidebarAboutSection.tsx @@ -14,7 +14,7 @@ const DescriptionTypography = styled(Typography.Paragraph)` `; const SidebarLinkList = styled.div` - margin-left: -15px; + margin: 0 0 10px -15px; min-width: 0; `; @@ -32,10 +32,14 @@ const LinkButton = styled(Button)` overflow: hidden; white-space: nowrap; text-overflow: ellipsis; - line-height: 1; } `; +const SourceButton = styled(LinkButton)` + padding: 0; + margin-top: -5px; +`; + interface Props { hideLinksButton?: boolean; } @@ -49,8 +53,8 @@ export const SidebarAboutSection = ({ properties }: { properties?: Props }) => { const description = entityData?.editableProperties?.description || entityData?.properties?.description; const links = entityData?.institutionalMemory?.elements || []; - console.log('entityData', entityData?.properties?.sourceUrl); const sourceUrl = entityData?.properties?.sourceUrl; + const sourceRef = entityData?.properties?.sourceRef; const isUntouched = !description && !(links?.length > 0); @@ -97,14 +101,8 @@ export const SidebarAboutSection = ({ properties }: { properties?: Props }) => { )} - {links?.length > 0 || !!sourceUrl ? ( + {links?.length > 0 ? ( - {sourceUrl && ( - - - Definition - - )} {(links || []).map((link) => ( { )} )} + {sourceRef && ( + <> + + + {sourceUrl ? ( + + + {sourceRef} + + ) : ( + { + sourceRef, + } + )} + + + )} ); }; diff --git a/datahub-web-react/src/app/entity/shared/tabs/Documentation/components/LinkList.tsx b/datahub-web-react/src/app/entity/shared/tabs/Documentation/components/LinkList.tsx index 90137fb32dfbb..aa1128646801a 100644 --- a/datahub-web-react/src/app/entity/shared/tabs/Documentation/components/LinkList.tsx +++ b/datahub-web-react/src/app/entity/shared/tabs/Documentation/components/LinkList.tsx @@ -1,7 +1,7 @@ import React from 'react'; import { Link } from 'react-router-dom'; import styled from 'styled-components/macro'; -import { message, Button, List, Typography, Divider } from 'antd'; +import { message, Button, List, Typography } from 'antd'; import { LinkOutlined, DeleteOutlined } from '@ant-design/icons'; import { EntityType } from '../../../../../../types.generated'; import { useEntityData } from '../../../EntityContext'; @@ -28,10 +28,6 @@ const ListOffsetIcon = styled.span` margin-right: 6px; `; -const StyledDivider = styled(Divider)` - margin: 0; -`; - type LinkListProps = { refetch?: () => Promise; }; @@ -41,7 +37,6 @@ export const LinkList = ({ refetch }: LinkListProps) => { const entityRegistry = useEntityRegistry(); const [removeLinkMutation] = useRemoveLinkMutation(); const links = entityData?.institutionalMemory?.elements || []; - const sourceUrl = entityData?.properties?.sourceUrl; const handleDeleteLink = async (linkUrl: string) => { try { @@ -60,29 +55,6 @@ export const LinkList = ({ refetch }: LinkListProps) => { return entityData ? ( <> - {sourceUrl && ( - ( - - - - - - - Definition - - - } - /> - - )} - /> - )} - {sourceUrl && links.length > 0 && } {links.length > 0 && ( ; qualifiedName?: Maybe; sourceUrl?: Maybe; + sourceRef?: Maybe; }>; globalTags?: Maybe; glossaryTerms?: Maybe; From 553916fc922a620e5f2897f37ef89cae2e863105 Mon Sep 17 00:00:00 2001 From: Chris Collins Date: Mon, 27 Jun 2022 11:25:42 -0400 Subject: [PATCH 3/3] revert commented out code --- .../glossaryTerm/GlossaryTermEntity.tsx | 19 - .../src/assets/datahub_ingestion_schema.json | 10055 ---------------- 2 files changed, 10074 deletions(-) delete mode 100644 datahub-web-react/src/assets/datahub_ingestion_schema.json diff --git a/datahub-web-react/src/app/entity/glossaryTerm/GlossaryTermEntity.tsx b/datahub-web-react/src/app/entity/glossaryTerm/GlossaryTermEntity.tsx index 0a3577eec910d..5c9e9fc8a92c7 100644 --- a/datahub-web-react/src/app/entity/glossaryTerm/GlossaryTermEntity.tsx +++ b/datahub-web-react/src/app/entity/glossaryTerm/GlossaryTermEntity.tsx @@ -118,31 +118,12 @@ export class GlossaryTermEntity implements Entity { }; getOverridePropertiesFromEntity = (glossaryTerm?: GlossaryTerm | null): GenericEntityProperties => { - // let institutionalMemory = glossaryTerm?.institutionalMemory; - // if (glossaryTerm?.properties?.sourceUrl) { - // const sourceInfo = { - // url: glossaryTerm.properties.sourceUrl, - // label: 'Definition', - // } as InstitutionalMemoryMetadata; - - // if (glossaryTerm.institutionalMemory) { - // const elements = glossaryTerm.institutionalMemory.elements || []; - // const updatedElements = [...elements, sourceInfo]; - // institutionalMemory = { ...glossaryTerm.institutionalMemory, elements: updatedElements }; - // } else { - // institutionalMemory = { elements: [sourceInfo], __typename: 'InstitutionalMemory' }; - // } - // } - // if dataset has subTypes filled out, pick the most specific subtype and return it return { customProperties: glossaryTerm?.properties?.customProperties, - // institutionalMemory, }; }; - // if - renderSearch = (result: SearchResult) => { return this.renderPreview(PreviewType.SEARCH, result.entity as GlossaryTerm); }; diff --git a/datahub-web-react/src/assets/datahub_ingestion_schema.json b/datahub-web-react/src/assets/datahub_ingestion_schema.json deleted file mode 100644 index a9819d98746fa..0000000000000 --- a/datahub-web-react/src/assets/datahub_ingestion_schema.json +++ /dev/null @@ -1,10055 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-04/schema#", - "id": "https://json.schemastore.org/datahub-ingestion", - "title": "Datahub Ingestion", - "description": "Root schema of Datahub Ingestion", - "definitions": { - "console_sink": { - "type": "object", - "properties": { - "type": { - "type": "string", - "enum": [ - "console" - ] - } - }, - "required": [ - "type" - ] - }, - "file_sink": { - "type": "object", - "properties": { - "type": { - "type": "string", - "enum": [ - "file" - ] - }, - "config": { - "$ref": "#/definitions/file_sink_config" - } - }, - "required": [ - "type", - "config" - ] - }, - "file_sink_config": { - "type": "object", - "properties": { - "filename": { - "description": "Path to file to write to.", - "type": "string" - } - }, - "required": [ - "filename" - ], - "additionalProperties": false - }, - "datahub_rest_sink": { - "type": "object", - "properties": { - "type": { - "type": "string", - "enum": [ - "datahub-rest" - ] - }, - "config": { - "$ref": "#/definitions/datahub_rest_sink_config" - } - }, - "required": [ - "type", - "config" - ], - "additionalProperties": false - }, - "datahub_rest_sink_config": { - "type": "object", - "properties": { - "ca_certificate_path": { - "type": "string", - "description": "Path to CA certificate for HTTPS communications." - }, - "max_threads": { - "type": "number", - "description": "Experimental: Max parallelism for REST API calls", - "default": 1 - }, - "retry_status_codes": { - "type": "array", - "items": { - "type": "number" - }, - "description": "Retry HTTP request also on these status codes", - "default": [ - 429, - 502, - 503, - 504 - ] - }, - "server": { - "type": "string", - "description": "URL of DataHub GMS endpoint." - }, - "timeout_sec": { - "type": "number", - "description": "Per-HTTP request timeout.", - "default": 30 - }, - "token": { - "type": "string", - "description": "Bearer token used for authentication." - }, - "extra_headers": { - "type": "string", - "description": "Extra headers which will be added to the request." - } - }, - "required": [ - "server" - ], - "additionalProperties": false - }, - "datahub_kafka_sink": { - "type": "object", - "properties": { - "type": { - "type": "string", - "enum": [ - "datahub-kafka" - ] - }, - "config": { - "$ref": "#/definitions/datahub_kafka_sink_config" - } - }, - "required": [ - "type", - "config" - ], - "additionalProperties": false - }, - "datahub_kafka_sink_config": { - "type": "object", - "properties": { - "connection": { - "type": "object", - "properties": { - "bootstrap": { - "type": "string", - "description": "Kafka bootstrap URL.", - "default": "localhost:9092" - }, - "producer_config": { - "type": "object", - "description": "Passed to https://docs.confluent.io/platform/current/clients/confluent-kafka-python/html/index.html#confluent_kafka.SerializingProducer" - }, - "schema_registry_url": { - "type": "string", - "description": "URL of schema registry being used.", - "default": "http://localhost:8081" - }, - "schema_registry_config": { - "type": "object", - "description": "Passed to https://docs.confluent.io/platform/current/clients/confluent-kafka-python/html/index.html#confluent_kafka.schema_registry.SchemaRegistryClient" - } - }, - "additionalProperties": false, - "required": [ - "bootstrap", - "schema_registry_url" - ] - }, - "topic_routes": { - "type": "object", - "properties": { - "MetadataChangeEvent": { - "type": "string", - "description": "Overridden Kafka topic name for the MetadataChangeEvent", - "default": "MetadataChangeEvent" - }, - "MetadataChangeProposal": { - "type": "string", - "description": "Overridden Kafka topic name for the MetadataChangeProposal", - "default": "MetadataChangeProposal" - } - }, - "additionalProperties": false - } - }, - "required": [ - "connection" - ], - "additionalProperties": false - }, - "feast-legacy": { - "type": "object", - "properties": { - "type": { - "type": "string", - "enum": [ - "feast-legacy" - ] - }, - "config": { - "$ref": "#/definitions/feast-legacy_config" - } - }, - "required": [ - "type", - "config" - ] - }, - "feast-legacy_config": { - "title": "FeastConfig", - "description": "Any source that produces dataset urns in a single environment should inherit this class", - "type": "object", - "properties": { - "env": { - "title": "Env", - "description": "The environment that all assets produced by this connector belong to", - "default": "PROD", - "type": "string" - }, - "core_url": { - "title": "Core Url", - "description": "URL of Feast Core instance.", - "default": "localhost:6565", - "type": "string" - }, - "use_local_build": { - "title": "Use Local Build", - "description": "Whether to build Feast ingestion Docker image locally.", - "default": false, - "type": "boolean" - } - }, - "additionalProperties": false - }, - "redash": { - "type": "object", - "properties": { - "type": { - "type": "string", - "enum": [ - "redash" - ] - }, - "config": { - "$ref": "#/definitions/redash_config" - } - }, - "required": [ - "type", - "config" - ] - }, - "redash_config": { - "title": "RedashConfig", - "type": "object", - "properties": { - "connect_uri": { - "title": "Connect Uri", - "description": "Redash base URL.", - "default": "http://localhost:5000", - "type": "string" - }, - "api_key": { - "title": "Api Key", - "description": "Redash user API key.", - "default": "REDASH_API_KEY", - "type": "string" - }, - "dashboard_patterns": { - "title": "Dashboard Patterns", - "description": "regex patterns for dashboards to filter for ingestion.", - "default": { - "allow": [ - ".*" - ], - "deny": [], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "allOf": [ - { - "$ref": "#/definitions/AllowDenyPattern" - } - ] - }, - "chart_patterns": { - "title": "Chart Patterns", - "description": "regex patterns for charts to filter for ingestion.", - "default": { - "allow": [ - ".*" - ], - "deny": [], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "allOf": [ - { - "$ref": "#/definitions/AllowDenyPattern" - } - ] - }, - "skip_draft": { - "title": "Skip Draft", - "description": "Only ingest published dashboards and charts.", - "default": true, - "type": "boolean" - }, - "page_size": { - "title": "Page Size", - "description": "Limit on number of items to be queried at once.", - "default": 25, - "type": "integer" - }, - "api_page_limit": { - "title": "Api Page Limit", - "description": "Limit on number of pages queried for ingesting dashboards and charts API during pagination.", - "default": 9223372036854775807, - "type": "integer" - }, - "parallelism": { - "title": "Parallelism", - "description": "Parallelism to use while processing.", - "default": 1, - "type": "integer" - }, - "parse_table_names_from_sql": { - "title": "Parse Table Names From Sql", - "description": "See note below.", - "default": false, - "type": "boolean" - }, - "sql_parser": { - "title": "Sql Parser", - "description": "custom SQL parser. See note below for details.", - "default": "datahub.utilities.sql_parser.DefaultSQLParser", - "type": "string" - }, - "env": { - "title": "Env", - "description": "Environment to use in namespace when constructing URNs.", - "default": "PROD", - "type": "string" - } - }, - "additionalProperties": false - }, - "iceberg": { - "type": "object", - "properties": { - "type": { - "type": "string", - "enum": [ - "iceberg" - ] - }, - "config": { - "$ref": "#/definitions/iceberg_config" - } - }, - "required": [ - "type", - "config" - ] - }, - "iceberg_config": { - "title": "IcebergSourceConfig", - "description": "Any source that is a primary producer of Dataset metadata should inherit this class", - "type": "object", - "properties": { - "env": { - "title": "Env", - "description": "The environment that all assets produced by this connector belong to", - "default": "PROD", - "type": "string" - }, - "platform": { - "title": "Platform", - "description": "The platform that this source connects to", - "type": "string" - }, - "platform_instance": { - "title": "Platform Instance", - "description": "The instance of the platform that all assets produced by this recipe belong to", - "type": "string" - }, - "adls": { - "title": "Adls", - "description": "[Azure Data Lake Storage](https://docs.microsoft.com/en-us/azure/storage/blobs/data-lake-storage-introduction) to crawl for Iceberg tables. This is one filesystem type supported by this source and **only one can be configured**.", - "allOf": [ - { - "$ref": "#/definitions/AdlsSourceConfig" - } - ] - }, - "localfs": { - "title": "Localfs", - "description": "Local path to crawl for Iceberg tables. This is one filesystem type supported by this source and **only one can be configured**.", - "type": "string" - }, - "max_path_depth": { - "title": "Max Path Depth", - "description": "Maximum folder depth to crawl for Iceberg tables. Folders deeper than this value will be silently ignored.", - "default": 2, - "type": "integer" - }, - "table_pattern": { - "title": "Table Pattern", - "description": "Regex patterns for tables to filter in ingestion.", - "default": { - "allow": [ - ".*" - ], - "deny": [], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "allOf": [ - { - "$ref": "#/definitions/AllowDenyPattern" - } - ] - }, - "user_ownership_property": { - "title": "User Ownership Property", - "description": "Iceberg table property to look for a `CorpUser` owner. Can only hold a single user value. If property has no value, no owner information will be emitted.", - "default": "owner", - "type": "string" - }, - "group_ownership_property": { - "title": "Group Ownership Property", - "description": "Iceberg table property to look for a `CorpGroup` owner. Can only hold a single group value. If property has no value, no owner information will be emitted.", - "type": "string" - }, - "profiling": { - "title": "Profiling", - "default": { - "enabled": false, - "include_field_null_count": true, - "include_field_min_value": true, - "include_field_max_value": true - }, - "allOf": [ - { - "$ref": "#/definitions/IcebergProfilingConfig" - } - ] - } - }, - "additionalProperties": false - }, - "pulsar": { - "type": "object", - "properties": { - "type": { - "type": "string", - "enum": [ - "pulsar" - ] - }, - "config": { - "$ref": "#/definitions/pulsar_config" - } - }, - "required": [ - "type", - "config" - ] - }, - "pulsar_config": { - "title": "PulsarSourceConfig", - "description": "Base configuration class for stateful ingestion for source configs to inherit from.", - "type": "object", - "properties": { - "env": { - "title": "Env", - "default": "PROD", - "type": "string" - }, - "platform": { - "title": "Platform", - "description": "The platform that this source connects to", - "type": "string" - }, - "platform_instance": { - "title": "Platform Instance", - "description": "The instance of the platform that all assets produced by this recipe belong to", - "type": "string" - }, - "stateful_ingestion": { - "title": "Stateful Ingestion", - "description": "see Stateful Ingestion", - "allOf": [ - { - "$ref": "#/definitions/PulsarSourceStatefulIngestionConfig" - } - ] - }, - "web_service_url": { - "title": "Web Service Url", - "description": "The web URL for the cluster.", - "default": "http://localhost:8080", - "type": "string" - }, - "timeout": { - "title": "Timeout", - "description": "Timout setting, how long to wait for the Pulsar rest api to send data before giving up", - "default": 5, - "type": "integer" - }, - "issuer_url": { - "title": "Issuer Url", - "description": "The complete URL for a Custom Authorization Server. Mandatory for OAuth based authentication.", - "type": "string" - }, - "client_id": { - "title": "Client Id", - "description": "The application's client ID", - "type": "string" - }, - "client_secret": { - "title": "Client Secret", - "description": "The application's client secret", - "type": "string" - }, - "token": { - "title": "Token", - "description": "The access token for the application. Mandatory for token based authentication.", - "type": "string" - }, - "verify_ssl": { - "title": "Verify Ssl", - "description": "Either a boolean, in which case it controls whether we verify the server's TLS certificate, or a string, in which case it must be a path to a CA bundle to use.", - "default": true, - "anyOf": [ - { - "type": "boolean" - }, - { - "type": "string" - } - ] - }, - "tenant_patterns": { - "title": "Tenant Patterns", - "description": "List of regex patterns for tenants to include/exclude from ingestion. By default all tenants are allowed.", - "default": { - "allow": [ - ".*" - ], - "deny": [ - "pulsar" - ], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "allOf": [ - { - "$ref": "#/definitions/AllowDenyPattern" - } - ] - }, - "namespace_patterns": { - "title": "Namespace Patterns", - "description": "List of regex patterns for namespaces to include/exclude from ingestion. By default the functions namespace is denied.", - "default": { - "allow": [ - ".*" - ], - "deny": [ - "public/functions" - ], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "allOf": [ - { - "$ref": "#/definitions/AllowDenyPattern" - } - ] - }, - "topic_patterns": { - "title": "Topic Patterns", - "description": "List of regex patterns for topics to include/exclude from ingestion. By default the Pulsar system topics are denied.", - "default": { - "allow": [ - ".*" - ], - "deny": [ - "/__.*$" - ], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "allOf": [ - { - "$ref": "#/definitions/AllowDenyPattern" - } - ] - }, - "exclude_individual_partitions": { - "title": "Exclude Individual Partitions", - "description": "Extract each individual partitioned topic. e.g. when turned off a topic with 100 partitions will result in 100 Datesets.", - "default": true, - "type": "boolean" - }, - "tenants": { - "title": "Tenants", - "description": "Listing all tenants requires superUser role, alternative you can set a list of tenants you want to scrape using the tenant admin role", - "default": [], - "type": "array", - "items": { - "type": "string" - } - }, - "domain": { - "title": "Domain", - "description": "Domain patterns", - "type": "object", - "additionalProperties": { - "$ref": "#/definitions/AllowDenyPattern" - } - }, - "oid_config": { - "title": "Oid Config", - "description": "Placeholder for OpenId discovery document", - "type": "object" - } - }, - "additionalProperties": false - }, - "looker": { - "type": "object", - "properties": { - "type": { - "type": "string", - "enum": [ - "looker" - ] - }, - "config": { - "$ref": "#/definitions/looker_config" - } - }, - "required": [ - "type", - "config" - ] - }, - "looker_config": { - "title": "LookerDashboardSourceConfig", - "description": "Any source that is a primary producer of Dataset metadata should inherit this class", - "type": "object", - "properties": { - "env": { - "title": "Env", - "description": "The environment that all assets produced by this connector belong to", - "default": "PROD", - "type": "string" - }, - "platform": { - "title": "Platform", - "description": "The platform that this source connects to", - "type": "string" - }, - "platform_instance": { - "title": "Platform Instance", - "description": "The instance of the platform that all assets produced by this recipe belong to", - "type": "string" - }, - "explore_naming_pattern": { - "title": "Explore Naming Pattern", - "description": "Pattern for providing dataset names to explores. Allowed variables are {project}, {model}, {name}. Default is `{model}.explore.{name}`", - "default": { - "allowed_vars": [ - "platform", - "env", - "project", - "model", - "name" - ], - "pattern": "{model}.explore.{name}", - "variables": null - }, - "allOf": [ - { - "$ref": "#/definitions/NamingPattern" - } - ] - }, - "explore_browse_pattern": { - "title": "Explore Browse Pattern", - "default": { - "allowed_vars": [ - "platform", - "env", - "project", - "model", - "name" - ], - "pattern": "/{env}/{platform}/{project}/explores/{model}.{name}", - "variables": null - }, - "allOf": [ - { - "$ref": "#/definitions/NamingPattern" - } - ] - }, - "view_naming_pattern": { - "title": "View Naming Pattern", - "description": "Pattern for providing dataset names to views. Allowed variables are `{project}`, `{model}`, `{name}`", - "default": { - "allowed_vars": [ - "platform", - "env", - "project", - "model", - "name" - ], - "pattern": "{project}.view.{name}", - "variables": null - }, - "allOf": [ - { - "$ref": "#/definitions/NamingPattern" - } - ] - }, - "view_browse_pattern": { - "title": "View Browse Pattern", - "description": "Pattern for providing browse paths to views. Allowed variables are `{project}`, `{model}`, `{name}`, `{platform}` and `{env}`", - "default": { - "allowed_vars": [ - "platform", - "env", - "project", - "model", - "name" - ], - "pattern": "/{env}/{platform}/{project}/views/{name}", - "variables": null - }, - "allOf": [ - { - "$ref": "#/definitions/NamingPattern" - } - ] - }, - "tag_measures_and_dimensions": { - "title": "Tag Measures And Dimensions", - "description": "When enabled, attaches tags to measures, dimensions and dimension groups to make them more discoverable. When disabled, adds this information to the description of the column.", - "default": true, - "type": "boolean" - }, - "platform_name": { - "title": "Platform Name", - "description": "Default platform name. Don't change.", - "default": "looker", - "type": "string" - }, - "github_info": { - "title": "Github Info", - "description": "Reference to your github location to enable easy navigation from DataHub to your LookML files", - "allOf": [ - { - "$ref": "#/definitions/GitHubInfo" - } - ] - }, - "client_id": { - "title": "Client Id", - "description": "Looker API client id.", - "type": "string" - }, - "client_secret": { - "title": "Client Secret", - "description": "Looker API client secret.", - "type": "string" - }, - "base_url": { - "title": "Base Url", - "description": "Url to your Looker instance: `https://company.looker.com:19999` or `https://looker.company.com`, or similar. Used for making API calls to Looker and constructing clickable dashboard and chart urls.", - "type": "string" - }, - "transport_options": { - "title": "Transport Options", - "description": "Populates the [TransportOptions](https://github.com/looker-open-source/sdk-codegen/blob/94d6047a0d52912ac082eb91616c1e7c379ab262/python/looker_sdk/rtl/transport.py#L70) struct for looker client", - "allOf": [ - { - "$ref": "#/definitions/TransportOptionsConfig" - } - ] - }, - "dashboard_pattern": { - "title": "Dashboard Pattern", - "description": "Patterns for selecting dashboard ids that are to be included", - "default": { - "allow": [ - ".*" - ], - "deny": [], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "allOf": [ - { - "$ref": "#/definitions/AllowDenyPattern" - } - ] - }, - "chart_pattern": { - "title": "Chart Pattern", - "description": "Patterns for selecting chart ids that are to be included", - "default": { - "allow": [ - ".*" - ], - "deny": [], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "allOf": [ - { - "$ref": "#/definitions/AllowDenyPattern" - } - ] - }, - "include_deleted": { - "title": "Include Deleted", - "description": "Whether to include deleted dashboards.", - "default": false, - "type": "boolean" - }, - "extract_owners": { - "title": "Extract Owners", - "description": "When enabled, extracts ownership from Looker directly. When disabled, ownership is left empty for dashboards and charts.", - "default": true, - "type": "boolean" - }, - "actor": { - "title": "Actor", - "description": "This config is deprecated in favor of `extract_owners`. Previously, was the actor to use in ownership properties of ingested metadata.", - "type": "string" - }, - "strip_user_ids_from_email": { - "title": "Strip User Ids From Email", - "description": "When enabled, converts Looker user emails of the form name@domain.com to urn:li:corpuser:name when assigning ownership", - "default": false, - "type": "boolean" - }, - "skip_personal_folders": { - "title": "Skip Personal Folders", - "description": "Whether to skip ingestion of dashboards in personal folders. Setting this to True will only ingest dashboards in the Shared folder space.", - "default": false, - "type": "boolean" - }, - "max_threads": { - "title": "Max Threads", - "description": "Max parallelism for Looker API calls. Defaults to cpuCount or 40", - "default": 10, - "type": "integer" - }, - "external_base_url": { - "title": "External Base Url", - "description": "Optional URL to use when constructing external URLs to Looker if the `base_url` is not the correct one to use. For example, `https://looker-public.company.com`. If not provided, the external base URL will default to `base_url`.", - "type": "string" - } - }, - "required": [ - "client_id", - "client_secret", - "base_url" - ], - "additionalProperties": false - }, - "sqlalchemy": { - "type": "object", - "properties": { - "type": { - "type": "string", - "enum": [ - "sqlalchemy" - ] - }, - "config": { - "$ref": "#/definitions/sqlalchemy_config" - } - }, - "required": [ - "type", - "config" - ] - }, - "sqlalchemy_config": { - "title": "SQLAlchemyGenericConfig", - "description": "Base configuration class for stateful ingestion for source configs to inherit from.", - "type": "object", - "properties": { - "env": { - "title": "Env", - "description": "The environment that all assets produced by this connector belong to", - "default": "PROD", - "type": "string" - }, - "platform": { - "title": "Platform", - "description": "Name of platform being ingested, used in constructing URNs.", - "type": "string" - }, - "platform_instance": { - "title": "Platform Instance", - "description": "The instance of the platform that all assets produced by this recipe belong to", - "type": "string" - }, - "stateful_ingestion": { - "$ref": "#/definitions/SQLAlchemyStatefulIngestionConfig" - }, - "options": { - "title": "Options", - "default": {}, - "type": "object" - }, - "schema_pattern": { - "title": "Schema Pattern", - "description": "regex patterns for schemas to filter in ingestion.", - "default": { - "allow": [ - ".*" - ], - "deny": [], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "allOf": [ - { - "$ref": "#/definitions/AllowDenyPattern" - } - ] - }, - "table_pattern": { - "title": "Table Pattern", - "description": "regex patterns for tables to filter in ingestion.", - "default": { - "allow": [ - ".*" - ], - "deny": [], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "allOf": [ - { - "$ref": "#/definitions/AllowDenyPattern" - } - ] - }, - "view_pattern": { - "title": "View Pattern", - "description": "regex patterns for views to filter in ingestion.", - "default": { - "allow": [ - ".*" - ], - "deny": [], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "allOf": [ - { - "$ref": "#/definitions/AllowDenyPattern" - } - ] - }, - "profile_pattern": { - "title": "Profile Pattern", - "description": "regex patterns for profiles to filter in ingestion, allowed by the `table_pattern`.", - "default": { - "allow": [ - ".*" - ], - "deny": [], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "allOf": [ - { - "$ref": "#/definitions/AllowDenyPattern" - } - ] - }, - "domain": { - "title": "Domain", - "description": " regex patterns for tables/schemas to descide domain_key domain key (domain_key can be any string like \"sales\".) There can be multiple domain key specified.", - "default": {}, - "type": "object", - "additionalProperties": { - "$ref": "#/definitions/AllowDenyPattern" - } - }, - "include_views": { - "title": "Include Views", - "description": "Whether views should be ingested.", - "default": true, - "type": "boolean" - }, - "include_tables": { - "title": "Include Tables", - "description": "Whether tables should be ingested.", - "default": true, - "type": "boolean" - }, - "profiling": { - "title": "Profiling", - "default": { - "enabled": false, - "limit": null, - "offset": null, - "report_dropped_profiles": false, - "turn_off_expensive_profiling_metrics": false, - "profile_table_level_only": false, - "include_field_null_count": true, - "include_field_min_value": true, - "include_field_max_value": true, - "include_field_mean_value": true, - "include_field_median_value": true, - "include_field_stddev_value": true, - "include_field_quantiles": false, - "include_field_distinct_value_frequencies": false, - "include_field_histogram": false, - "include_field_sample_values": true, - "allow_deny_patterns": { - "allow": [ - ".*" - ], - "deny": [], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "max_number_of_fields_to_profile": null, - "profile_if_updated_since_days": 1, - "max_workers": 50, - "query_combiner_enabled": true, - "catch_exceptions": true, - "partition_profiling_enabled": true, - "bigquery_temp_table_schema": null, - "partition_datetime": null - }, - "allOf": [ - { - "$ref": "#/definitions/GEProfilingConfig" - } - ] - }, - "connect_uri": { - "title": "Connect Uri", - "description": "URI of database to connect to. See https://docs.sqlalchemy.org/en/14/core/engines.html#database-urls", - "type": "string" - } - }, - "required": [ - "platform", - "connect_uri" - ], - "additionalProperties": false - }, - "azure-ad": { - "type": "object", - "properties": { - "type": { - "type": "string", - "enum": [ - "azure-ad" - ] - }, - "config": { - "$ref": "#/definitions/azure-ad_config" - } - }, - "required": [ - "type", - "config" - ] - }, - "azure-ad_config": { - "title": "AzureADConfig", - "description": "Config to create a token and connect to Azure AD instance", - "type": "object", - "properties": { - "client_id": { - "title": "Client Id", - "description": "Application ID. Found in your app registration on Azure AD Portal", - "type": "string" - }, - "tenant_id": { - "title": "Tenant Id", - "description": "Directory ID. Found in your app registration on Azure AD Portal", - "type": "string" - }, - "client_secret": { - "title": "Client Secret", - "description": "Client secret. Found in your app registration on Azure AD Portal", - "type": "string" - }, - "authority": { - "title": "Authority", - "description": "The authority (https://docs.microsoft.com/en-us/azure/active-directory/develop/msal-client-application-configuration) is a URL that indicates a directory that MSAL can request tokens from.", - "type": "string" - }, - "token_url": { - "title": "Token Url", - "description": "The token URL that acquires a token from Azure AD for authorizing requests. This source will only work with v1.0 endpoint.", - "type": "string" - }, - "redirect": { - "title": "Redirect", - "description": "Redirect URI. Found in your app registration on Azure AD Portal.", - "default": "https://login.microsoftonline.com/common/oauth2/nativeclient", - "type": "string" - }, - "graph_url": { - "title": "Graph Url", - "description": "[Microsoft Graph API endpoint](https://docs.microsoft.com/en-us/graph/use-the-api)", - "default": "https://graph.microsoft.com/v1.0", - "type": "string" - }, - "azure_ad_response_to_username_attr": { - "title": "Azure Ad Response To Username Attr", - "description": "Which Azure AD User Response attribute to use as input to DataHub username mapping.", - "default": "userPrincipalName", - "type": "string" - }, - "azure_ad_response_to_username_regex": { - "title": "Azure Ad Response To Username Regex", - "description": "A regex used to parse the DataHub username from the attribute specified in `azure_ad_response_to_username_attr`.", - "default": "(.*)", - "type": "string" - }, - "azure_ad_response_to_groupname_attr": { - "title": "Azure Ad Response To Groupname Attr", - "description": "Which Azure AD Group Response attribute to use as input to DataHub group name mapping.", - "default": "displayName", - "type": "string" - }, - "azure_ad_response_to_groupname_regex": { - "title": "Azure Ad Response To Groupname Regex", - "description": "A regex used to parse the DataHub group name from the attribute specified in `azure_ad_response_to_groupname_attr`.", - "default": "(.*)", - "type": "string" - }, - "ingest_users": { - "title": "Ingest Users", - "description": "Whether users should be ingested into DataHub.", - "default": true, - "type": "boolean" - }, - "ingest_groups": { - "title": "Ingest Groups", - "description": "Whether groups should be ingested into DataHub.", - "default": true, - "type": "boolean" - }, - "ingest_group_membership": { - "title": "Ingest Group Membership", - "description": "Whether group membership should be ingested into DataHub. ingest_groups must be True if this is True.", - "default": true, - "type": "boolean" - }, - "ingest_groups_users": { - "title": "Ingest Groups Users", - "description": "This option is useful only when `ingest_users` is set to False and `ingest_group_membership` to True. As effect, only the users which belongs to the selected groups will be ingested.", - "default": true, - "type": "boolean" - }, - "users_pattern": { - "title": "Users Pattern", - "description": "regex patterns for users to filter in ingestion.", - "default": { - "allow": [ - ".*" - ], - "deny": [], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "allOf": [ - { - "$ref": "#/definitions/AllowDenyPattern" - } - ] - }, - "groups_pattern": { - "title": "Groups Pattern", - "description": "regex patterns for groups to include in ingestion.", - "default": { - "allow": [ - ".*" - ], - "deny": [], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "allOf": [ - { - "$ref": "#/definitions/AllowDenyPattern" - } - ] - }, - "filtered_tracking": { - "title": "Filtered Tracking", - "description": "If enabled, report will contain names of filtered users and groups.", - "default": true, - "type": "boolean" - }, - "mask_group_id": { - "title": "Mask Group Id", - "description": "Whether workunit ID's for groups should be masked to avoid leaking sensitive information.", - "default": true, - "type": "boolean" - }, - "mask_user_id": { - "title": "Mask User Id", - "description": "Whether workunit ID's for users should be masked to avoid leaking sensitive information.", - "default": true, - "type": "boolean" - } - }, - "required": [ - "client_id", - "tenant_id", - "client_secret", - "authority", - "token_url" - ], - "additionalProperties": false - }, - "starburst-trino-usage": { - "type": "object", - "properties": { - "type": { - "type": "string", - "enum": [ - "starburst-trino-usage" - ] - }, - "config": { - "$ref": "#/definitions/starburst-trino-usage_config" - } - }, - "required": [ - "type", - "config" - ] - }, - "starburst-trino-usage_config": { - "title": "TrinoUsageConfig", - "description": "Base configuration class for stateful ingestion for source configs to inherit from.", - "type": "object", - "properties": { - "bucket_duration": { - "description": "Size of the time window to aggregate usage stats.", - "default": "DAY", - "allOf": [ - { - "$ref": "#/definitions/BucketDuration" - } - ] - }, - "end_time": { - "title": "End Time", - "description": "Latest date of usage to consider. Default: Last full day in UTC (or hour, depending on `bucket_duration`)", - "type": "string", - "format": "date-time" - }, - "start_time": { - "title": "Start Time", - "description": "Earliest date of usage to consider. Default: Last full day in UTC (or hour, depending on `bucket_duration`)", - "type": "string", - "format": "date-time" - }, - "top_n_queries": { - "title": "Top N Queries", - "description": "Number of top queries to save to each table.", - "default": 10, - "exclusiveMinimum": 0, - "type": "integer" - }, - "user_email_pattern": { - "title": "User Email Pattern", - "description": "regex patterns for user emails to filter in usage.", - "default": { - "allow": [ - ".*" - ], - "deny": [], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "allOf": [ - { - "$ref": "#/definitions/AllowDenyPattern" - } - ] - }, - "include_operational_stats": { - "title": "Include Operational Stats", - "description": "Whether to display operational stats.", - "default": true, - "type": "boolean" - }, - "include_read_operational_stats": { - "title": "Include Read Operational Stats", - "description": "Whether to report read operational stats. Experimental.", - "default": false, - "type": "boolean" - }, - "format_sql_queries": { - "title": "Format Sql Queries", - "description": "Whether to format sql queries", - "default": false, - "type": "boolean" - }, - "include_top_n_queries": { - "title": "Include Top N Queries", - "description": "Whether to ingest the top_n_queries.", - "default": true, - "type": "boolean" - }, - "env": { - "title": "Env", - "description": "The environment that all assets produced by this connector belong to", - "default": "PROD", - "type": "string" - }, - "platform": { - "title": "Platform", - "description": "The platform that this source connects to", - "type": "string" - }, - "platform_instance": { - "title": "Platform Instance", - "description": "The instance of the platform that all assets produced by this recipe belong to", - "type": "string" - }, - "stateful_ingestion": { - "$ref": "#/definitions/SQLAlchemyStatefulIngestionConfig" - }, - "options": { - "title": "Options", - "default": {}, - "type": "object" - }, - "schema_pattern": { - "title": "Schema Pattern", - "description": "regex patterns for schemas to filter in ingestion.", - "default": { - "allow": [ - ".*" - ], - "deny": [], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "allOf": [ - { - "$ref": "#/definitions/AllowDenyPattern" - } - ] - }, - "table_pattern": { - "title": "Table Pattern", - "description": "regex patterns for tables to filter in ingestion.", - "default": { - "allow": [ - ".*" - ], - "deny": [], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "allOf": [ - { - "$ref": "#/definitions/AllowDenyPattern" - } - ] - }, - "view_pattern": { - "title": "View Pattern", - "description": "regex patterns for views to filter in ingestion.", - "default": { - "allow": [ - ".*" - ], - "deny": [], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "allOf": [ - { - "$ref": "#/definitions/AllowDenyPattern" - } - ] - }, - "profile_pattern": { - "title": "Profile Pattern", - "description": "regex patterns for profiles to filter in ingestion, allowed by the `table_pattern`.", - "default": { - "allow": [ - ".*" - ], - "deny": [], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "allOf": [ - { - "$ref": "#/definitions/AllowDenyPattern" - } - ] - }, - "domain": { - "title": "Domain", - "description": " regex patterns for tables/schemas to descide domain_key domain key (domain_key can be any string like \"sales\".) There can be multiple domain key specified.", - "default": {}, - "type": "object", - "additionalProperties": { - "$ref": "#/definitions/AllowDenyPattern" - } - }, - "include_views": { - "title": "Include Views", - "description": "Whether views should be ingested.", - "default": true, - "type": "boolean" - }, - "include_tables": { - "title": "Include Tables", - "description": "Whether tables should be ingested.", - "default": true, - "type": "boolean" - }, - "profiling": { - "title": "Profiling", - "default": { - "enabled": false, - "limit": null, - "offset": null, - "report_dropped_profiles": false, - "turn_off_expensive_profiling_metrics": false, - "profile_table_level_only": false, - "include_field_null_count": true, - "include_field_min_value": true, - "include_field_max_value": true, - "include_field_mean_value": true, - "include_field_median_value": true, - "include_field_stddev_value": true, - "include_field_quantiles": false, - "include_field_distinct_value_frequencies": false, - "include_field_histogram": false, - "include_field_sample_values": true, - "allow_deny_patterns": { - "allow": [ - ".*" - ], - "deny": [], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "max_number_of_fields_to_profile": null, - "profile_if_updated_since_days": 1, - "max_workers": 50, - "query_combiner_enabled": true, - "catch_exceptions": true, - "partition_profiling_enabled": true, - "bigquery_temp_table_schema": null, - "partition_datetime": null - }, - "allOf": [ - { - "$ref": "#/definitions/GEProfilingConfig" - } - ] - }, - "username": { - "title": "Username", - "description": "username", - "type": "string" - }, - "password": { - "title": "Password", - "description": "password", - "type": "string", - "writeOnly": true, - "format": "password" - }, - "host_port": { - "title": "Host Port", - "description": "host URL", - "type": "string" - }, - "database": { - "title": "Database", - "description": "The name of the catalog from getting the usage", - "type": "string" - }, - "database_alias": { - "title": "Database Alias", - "description": "Alias to apply to database when ingesting.", - "type": "string" - }, - "scheme": { - "title": "Scheme", - "default": "trino", - "type": "string" - }, - "sqlalchemy_uri": { - "title": "Sqlalchemy Uri", - "description": "URI of database to connect to. See https://docs.sqlalchemy.org/en/14/core/engines.html#database-urls. Takes precedence over other connection parameters.", - "type": "string" - }, - "email_domain": { - "title": "Email Domain", - "description": "The email domain which will be appended to the users ", - "type": "string" - }, - "audit_catalog": { - "title": "Audit Catalog", - "description": "The catalog name where the audit table can be found ", - "type": "string" - }, - "audit_schema": { - "title": "Audit Schema", - "description": "The schema name where the audit table can be found", - "type": "string" - } - }, - "required": [ - "host_port", - "database", - "email_domain", - "audit_catalog", - "audit_schema" - ], - "additionalProperties": false - }, - "postgres": { - "type": "object", - "properties": { - "type": { - "type": "string", - "enum": [ - "postgres" - ] - }, - "config": { - "$ref": "#/definitions/postgres_config" - } - }, - "required": [ - "type", - "config" - ] - }, - "postgres_config": { - "title": "PostgresConfig", - "description": "Base configuration class for stateful ingestion for source configs to inherit from.", - "type": "object", - "properties": { - "env": { - "title": "Env", - "description": "The environment that all assets produced by this connector belong to", - "default": "PROD", - "type": "string" - }, - "platform": { - "title": "Platform", - "description": "The platform that this source connects to", - "type": "string" - }, - "platform_instance": { - "title": "Platform Instance", - "description": "The instance of the platform that all assets produced by this recipe belong to", - "type": "string" - }, - "stateful_ingestion": { - "$ref": "#/definitions/SQLAlchemyStatefulIngestionConfig" - }, - "options": { - "title": "Options", - "default": {}, - "type": "object" - }, - "schema_pattern": { - "title": "Schema Pattern", - "default": { - "allow": [ - ".*" - ], - "deny": [ - "information_schema" - ], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "allOf": [ - { - "$ref": "#/definitions/AllowDenyPattern" - } - ] - }, - "table_pattern": { - "title": "Table Pattern", - "description": "regex patterns for tables to filter in ingestion.", - "default": { - "allow": [ - ".*" - ], - "deny": [], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "allOf": [ - { - "$ref": "#/definitions/AllowDenyPattern" - } - ] - }, - "view_pattern": { - "title": "View Pattern", - "description": "regex patterns for views to filter in ingestion.", - "default": { - "allow": [ - ".*" - ], - "deny": [], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "allOf": [ - { - "$ref": "#/definitions/AllowDenyPattern" - } - ] - }, - "profile_pattern": { - "title": "Profile Pattern", - "description": "regex patterns for profiles to filter in ingestion, allowed by the `table_pattern`.", - "default": { - "allow": [ - ".*" - ], - "deny": [], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "allOf": [ - { - "$ref": "#/definitions/AllowDenyPattern" - } - ] - }, - "domain": { - "title": "Domain", - "description": " regex patterns for tables/schemas to descide domain_key domain key (domain_key can be any string like \"sales\".) There can be multiple domain key specified.", - "default": {}, - "type": "object", - "additionalProperties": { - "$ref": "#/definitions/AllowDenyPattern" - } - }, - "include_views": { - "title": "Include Views", - "description": "Whether views should be ingested.", - "default": true, - "type": "boolean" - }, - "include_tables": { - "title": "Include Tables", - "description": "Whether tables should be ingested.", - "default": true, - "type": "boolean" - }, - "profiling": { - "title": "Profiling", - "default": { - "enabled": false, - "limit": null, - "offset": null, - "report_dropped_profiles": false, - "turn_off_expensive_profiling_metrics": false, - "profile_table_level_only": false, - "include_field_null_count": true, - "include_field_min_value": true, - "include_field_max_value": true, - "include_field_mean_value": true, - "include_field_median_value": true, - "include_field_stddev_value": true, - "include_field_quantiles": false, - "include_field_distinct_value_frequencies": false, - "include_field_histogram": false, - "include_field_sample_values": true, - "allow_deny_patterns": { - "allow": [ - ".*" - ], - "deny": [], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "max_number_of_fields_to_profile": null, - "profile_if_updated_since_days": 1, - "max_workers": 50, - "query_combiner_enabled": true, - "catch_exceptions": true, - "partition_profiling_enabled": true, - "bigquery_temp_table_schema": null, - "partition_datetime": null - }, - "allOf": [ - { - "$ref": "#/definitions/GEProfilingConfig" - } - ] - }, - "username": { - "title": "Username", - "description": "username", - "type": "string" - }, - "password": { - "title": "Password", - "description": "password", - "type": "string", - "writeOnly": true, - "format": "password" - }, - "host_port": { - "title": "Host Port", - "description": "host URL", - "type": "string" - }, - "database": { - "title": "Database", - "description": "database (catalog)", - "type": "string" - }, - "database_alias": { - "title": "Database Alias", - "description": "Alias to apply to database when ingesting.", - "type": "string" - }, - "scheme": { - "title": "Scheme", - "description": "database scheme", - "default": "postgresql+psycopg2", - "type": "string" - }, - "sqlalchemy_uri": { - "title": "Sqlalchemy Uri", - "description": "URI of database to connect to. See https://docs.sqlalchemy.org/en/14/core/engines.html#database-urls. Takes precedence over other connection parameters.", - "type": "string" - } - }, - "required": [ - "host_port" - ], - "additionalProperties": false - }, - "tableau": { - "type": "object", - "properties": { - "type": { - "type": "string", - "enum": [ - "tableau" - ] - }, - "config": { - "$ref": "#/definitions/tableau_config" - } - }, - "required": [ - "type", - "config" - ] - }, - "tableau_config": { - "title": "TableauConfig", - "type": "object", - "properties": { - "connect_uri": { - "title": "Connect Uri", - "description": "Tableau host URL.", - "type": "string" - }, - "username": { - "title": "Username", - "description": "Tableau username, must be set if authenticating using username/password.", - "type": "string" - }, - "password": { - "title": "Password", - "description": "Tableau password, must be set if authenticating using username/password.", - "type": "string" - }, - "token_name": { - "title": "Token Name", - "description": "Tableau token name, must be set if authenticating using a personal access token.", - "type": "string" - }, - "token_value": { - "title": "Token Value", - "description": "Tableau token value, must be set if authenticating using a personal access token.", - "type": "string" - }, - "site": { - "title": "Site", - "description": "Tableau Site. Always required for Tableau Online. Use emptystring to connect with Default site on Tableau Server.", - "default": "", - "type": "string" - }, - "projects": { - "title": "Projects", - "description": "List of projects", - "default": [ - "default" - ], - "type": "array", - "items": { - "type": "string" - } - }, - "default_schema_map": { - "title": "Default Schema Map", - "description": "Default schema to use when schema is not found.", - "default": {}, - "type": "object" - }, - "ingest_tags": { - "title": "Ingest Tags", - "description": "Ingest Tags from source. This will override Tags entered from UI", - "default": false, - "type": "boolean" - }, - "ingest_owner": { - "title": "Ingest Owner", - "description": "Ingest Owner from source. This will override Owner info entered from UI", - "default": false, - "type": "boolean" - }, - "ingest_tables_external": { - "title": "Ingest Tables External", - "description": "Ingest details for tables external to (not embedded in) tableau as entities.", - "default": false, - "type": "boolean" - }, - "workbooks_page_size": { - "title": "Workbooks Page Size", - "description": "@deprecated(use page_size instead) Number of workbooks to query at a time using Tableau api.", - "type": "integer" - }, - "page_size": { - "title": "Page Size", - "description": "Number of metadata objects (e.g. CustomSQLTable, PublishedDatasource, etc) to query at a time using Tableau api.", - "default": 10, - "type": "integer" - }, - "env": { - "title": "Env", - "description": "Environment to use in namespace when constructing URNs.", - "default": "PROD", - "type": "string" - } - }, - "required": [ - "connect_uri" - ], - "additionalProperties": false - }, - "lookml": { - "type": "object", - "properties": { - "type": { - "type": "string", - "enum": [ - "lookml" - ] - }, - "config": { - "$ref": "#/definitions/lookml_config" - } - }, - "required": [ - "type", - "config" - ] - }, - "lookml_config": { - "title": "LookMLSourceConfig", - "description": "Any source that is a primary producer of Dataset metadata should inherit this class", - "type": "object", - "properties": { - "env": { - "title": "Env", - "description": "The environment that all assets produced by this connector belong to", - "default": "PROD", - "type": "string" - }, - "platform": { - "title": "Platform", - "description": "The platform that this source connects to", - "type": "string" - }, - "platform_instance": { - "title": "Platform Instance", - "description": "The instance of the platform that all assets produced by this recipe belong to", - "type": "string" - }, - "explore_naming_pattern": { - "title": "Explore Naming Pattern", - "description": "Pattern for providing dataset names to explores. Allowed variables are {project}, {model}, {name}. Default is `{model}.explore.{name}`", - "default": { - "allowed_vars": [ - "platform", - "env", - "project", - "model", - "name" - ], - "pattern": "{model}.explore.{name}", - "variables": null - }, - "allOf": [ - { - "$ref": "#/definitions/NamingPattern" - } - ] - }, - "explore_browse_pattern": { - "title": "Explore Browse Pattern", - "default": { - "allowed_vars": [ - "platform", - "env", - "project", - "model", - "name" - ], - "pattern": "/{env}/{platform}/{project}/explores/{model}.{name}", - "variables": null - }, - "allOf": [ - { - "$ref": "#/definitions/NamingPattern" - } - ] - }, - "view_naming_pattern": { - "title": "View Naming Pattern", - "description": "Pattern for providing dataset names to views. Allowed variables are `{project}`, `{model}`, `{name}`", - "default": { - "allowed_vars": [ - "platform", - "env", - "project", - "model", - "name" - ], - "pattern": "{project}.view.{name}", - "variables": null - }, - "allOf": [ - { - "$ref": "#/definitions/NamingPattern" - } - ] - }, - "view_browse_pattern": { - "title": "View Browse Pattern", - "description": "Pattern for providing browse paths to views. Allowed variables are `{project}`, `{model}`, `{name}`, `{platform}` and `{env}`", - "default": { - "allowed_vars": [ - "platform", - "env", - "project", - "model", - "name" - ], - "pattern": "/{env}/{platform}/{project}/views/{name}", - "variables": null - }, - "allOf": [ - { - "$ref": "#/definitions/NamingPattern" - } - ] - }, - "tag_measures_and_dimensions": { - "title": "Tag Measures And Dimensions", - "description": "When enabled, attaches tags to measures, dimensions and dimension groups to make them more discoverable. When disabled, adds this information to the description of the column.", - "default": true, - "type": "boolean" - }, - "platform_name": { - "title": "Platform Name", - "description": "Default platform name. Don't change.", - "default": "looker", - "type": "string" - }, - "github_info": { - "title": "Github Info", - "description": "Reference to your github location to enable easy navigation from DataHub to your LookML files", - "allOf": [ - { - "$ref": "#/definitions/GitHubInfo" - } - ] - }, - "base_folder": { - "title": "Base Folder", - "description": "Local filepath where the root of the LookML repo lives. This is typically the root folder where the `*.model.lkml` and `*.view.lkml` files are stored. e.g. If you have checked out your LookML repo under `/Users/jdoe/workspace/my-lookml-repo`, then set `base_folder` to `/Users/jdoe/workspace/my-lookml-repo`.", - "format": "directory-path", - "type": "string" - }, - "connection_to_platform_map": { - "title": "Connection To Platform Map", - "description": "A mapping of [Looker connection names](https://docs.looker.com/reference/model-params/connection-for-model) to DataHub platform, database, and schema values.", - "type": "object", - "additionalProperties": { - "$ref": "#/definitions/LookerConnectionDefinition" - } - }, - "model_pattern": { - "title": "Model Pattern", - "description": "List of regex patterns for LookML models to include in the extraction.", - "default": { - "allow": [ - ".*" - ], - "deny": [], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "allOf": [ - { - "$ref": "#/definitions/AllowDenyPattern" - } - ] - }, - "view_pattern": { - "title": "View Pattern", - "description": "List of regex patterns for LookML views to include in the extraction.", - "default": { - "allow": [ - ".*" - ], - "deny": [], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "allOf": [ - { - "$ref": "#/definitions/AllowDenyPattern" - } - ] - }, - "parse_table_names_from_sql": { - "title": "Parse Table Names From Sql", - "description": "See note below.", - "default": false, - "type": "boolean" - }, - "sql_parser": { - "title": "Sql Parser", - "description": "See note below.", - "default": "datahub.utilities.sql_parser.DefaultSQLParser", - "type": "string" - }, - "api": { - "$ref": "#/definitions/LookerAPIConfig" - }, - "project_name": { - "title": "Project Name", - "description": "Required if you don't specify the `api` section. The project name within which all the model files live. See (https://docs.looker.com/data-modeling/getting-started/how-project-works) to understand what the Looker project name should be. The simplest way to see your projects is to click on `Develop` followed by `Manage LookML Projects` in the Looker application.", - "type": "string" - }, - "transport_options": { - "title": "Transport Options", - "description": "Populates the [TransportOptions](https://github.com/looker-open-source/sdk-codegen/blob/94d6047a0d52912ac082eb91616c1e7c379ab262/python/looker_sdk/rtl/transport.py#L70) struct for looker client", - "allOf": [ - { - "$ref": "#/definitions/TransportOptionsConfig" - } - ] - }, - "max_file_snippet_length": { - "title": "Max File Snippet Length", - "description": "When extracting the view definition from a lookml file, the maximum number of characters to extract.", - "default": 512000, - "type": "integer" - } - }, - "required": [ - "base_folder" - ], - "additionalProperties": false - }, - "powerbi": { - "type": "object", - "properties": { - "type": { - "type": "string", - "enum": [ - "powerbi" - ] - }, - "config": { - "$ref": "#/definitions/powerbi_config" - } - }, - "required": [ - "type", - "config" - ] - }, - "powerbi_config": { - "title": "PowerBiDashboardSourceConfig", - "description": "Any source that produces dataset urns in a single environment should inherit this class", - "type": "object", - "properties": { - "env": { - "title": "Env", - "description": "The environment that all assets produced by this connector belong to", - "default": "PROD", - "type": "string" - }, - "tenant_id": { - "title": "Tenant Id", - "description": "Power BI tenant identifier.", - "type": "string" - }, - "workspace_id": { - "title": "Workspace Id", - "description": "Power BI workspace identifier.", - "type": "string" - }, - "dataset_type_mapping": { - "title": "Dataset Type Mapping", - "description": "Mapping of Power BI datasource type to Datahub dataset.", - "type": "object", - "additionalProperties": { - "type": "string" - } - }, - "client_id": { - "title": "Client Id", - "description": "Azure AD App client identifier.", - "type": "string" - }, - "client_secret": { - "title": "Client Secret", - "description": "Azure AD App client secret.", - "type": "string" - }, - "scan_timeout": { - "title": "Scan Timeout", - "description": "time in seconds to wait for Power BI metadata scan result.", - "default": 60, - "type": "integer" - }, - "scope": { - "title": "Scope", - "default": "https://analysis.windows.net/powerbi/api/.default", - "type": "string" - }, - "base_url": { - "title": "Base Url", - "default": "https://api.powerbi.com/v1.0/myorg/groups", - "type": "string" - }, - "admin_base_url": { - "title": "Admin Base Url", - "default": "https://api.powerbi.com/v1.0/myorg/admin", - "type": "string" - }, - "authority": { - "title": "Authority", - "default": "https://login.microsoftonline.com/", - "type": "string" - }, - "platform_name": { - "title": "Platform Name", - "default": "powerbi", - "type": "string" - }, - "platform_urn": { - "title": "Platform Urn", - "default": "urn:li:dataPlatform:powerbi", - "type": "string" - }, - "dashboard_pattern": { - "title": "Dashboard Pattern", - "default": { - "allow": [ - ".*" - ], - "deny": [], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "allOf": [ - { - "$ref": "#/definitions/AllowDenyPattern" - } - ] - }, - "chart_pattern": { - "title": "Chart Pattern", - "default": { - "allow": [ - ".*" - ], - "deny": [], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "allOf": [ - { - "$ref": "#/definitions/AllowDenyPattern" - } - ] - } - }, - "required": [ - "tenant_id", - "workspace_id", - "dataset_type_mapping", - "client_id", - "client_secret" - ], - "additionalProperties": false - }, - "kafka-connect": { - "type": "object", - "properties": { - "type": { - "type": "string", - "enum": [ - "kafka-connect" - ] - }, - "config": { - "$ref": "#/definitions/kafka-connect_config" - } - }, - "required": [ - "type", - "config" - ] - }, - "kafka-connect_config": { - "title": "KafkaConnectSourceConfig", - "description": "Any non-Dataset source that produces lineage to Datasets should inherit this class.\ne.g. Orchestrators, Pipelines, BI Tools etc.", - "type": "object", - "properties": { - "env": { - "title": "Env", - "description": "The environment that all assets produced by this connector belong to", - "default": "PROD", - "type": "string" - }, - "platform_instance_map": { - "title": "Platform Instance Map", - "description": "Platform instance mapping to use when constructing URNs. e.g.`platform_instance_map: { \"hive\": \"warehouse\" }`", - "type": "object", - "additionalProperties": { - "type": "string" - } - }, - "connect_uri": { - "title": "Connect Uri", - "description": "URI to connect to.", - "default": "http://localhost:8083/", - "type": "string" - }, - "username": { - "title": "Username", - "description": "Kafka Connect username.", - "type": "string" - }, - "password": { - "title": "Password", - "description": "Kafka Connect password.", - "type": "string" - }, - "cluster_name": { - "title": "Cluster Name", - "description": "Cluster to ingest from.", - "default": "connect-cluster", - "type": "string" - }, - "construct_lineage_workunits": { - "title": "Construct Lineage Workunits", - "description": "Whether to create the input and output Dataset entities", - "default": true, - "type": "boolean" - }, - "connector_patterns": { - "title": "Connector Patterns", - "description": "regex patterns for connectors to filter for ingestion.", - "default": { - "allow": [ - ".*" - ], - "deny": [], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "allOf": [ - { - "$ref": "#/definitions/AllowDenyPattern" - } - ] - }, - "provided_configs": { - "title": "Provided Configs", - "description": "Provided Configurations", - "type": "array", - "items": { - "$ref": "#/definitions/ProvidedConfig" - } - }, - "connect_to_platform_map": { - "title": "Connect To Platform Map", - "description": "Platform instance mapping when multiple instances for a platform is available. Entry for a platform should be in either `platform_instance_map` or `connect_to_platform_map`. e.g.`connect_to_platform_map: { \"postgres-connector-finance-db\": \"postgres\": \"core_finance_instance\" }`", - "type": "object" - } - }, - "additionalProperties": false - }, - "snowflake": { - "type": "object", - "properties": { - "type": { - "type": "string", - "enum": [ - "snowflake" - ] - }, - "config": { - "$ref": "#/definitions/snowflake_config" - } - }, - "required": [ - "type", - "config" - ] - }, - "snowflake_config": { - "title": "SnowflakeConfig", - "description": "Base configuration class for stateful ingestion for source configs to inherit from.", - "type": "object", - "properties": { - "env": { - "title": "Env", - "description": "The environment that all assets produced by this connector belong to", - "default": "PROD", - "type": "string" - }, - "platform": { - "title": "Platform", - "description": "The platform that this source connects to", - "type": "string" - }, - "platform_instance": { - "title": "Platform Instance", - "description": "The instance of the platform that all assets produced by this recipe belong to", - "type": "string" - }, - "stateful_ingestion": { - "$ref": "#/definitions/SQLAlchemyStatefulIngestionConfig" - }, - "options": { - "title": "Options", - "default": {}, - "type": "object" - }, - "schema_pattern": { - "title": "Schema Pattern", - "description": "regex patterns for schemas to filter in ingestion.", - "default": { - "allow": [ - ".*" - ], - "deny": [], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "allOf": [ - { - "$ref": "#/definitions/AllowDenyPattern" - } - ] - }, - "table_pattern": { - "title": "Table Pattern", - "description": "regex patterns for tables to filter in ingestion.", - "default": { - "allow": [ - ".*" - ], - "deny": [], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "allOf": [ - { - "$ref": "#/definitions/AllowDenyPattern" - } - ] - }, - "view_pattern": { - "title": "View Pattern", - "description": "regex patterns for views to filter in ingestion.", - "default": { - "allow": [ - ".*" - ], - "deny": [], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "allOf": [ - { - "$ref": "#/definitions/AllowDenyPattern" - } - ] - }, - "profile_pattern": { - "title": "Profile Pattern", - "description": "regex patterns for profiles to filter in ingestion, allowed by the `table_pattern`.", - "default": { - "allow": [ - ".*" - ], - "deny": [], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "allOf": [ - { - "$ref": "#/definitions/AllowDenyPattern" - } - ] - }, - "domain": { - "title": "Domain", - "description": " regex patterns for tables/schemas to descide domain_key domain key (domain_key can be any string like \"sales\".) There can be multiple domain key specified.", - "default": {}, - "type": "object", - "additionalProperties": { - "$ref": "#/definitions/AllowDenyPattern" - } - }, - "include_views": { - "title": "Include Views", - "description": "Whether views should be ingested.", - "default": true, - "type": "boolean" - }, - "include_tables": { - "title": "Include Tables", - "description": "Whether tables should be ingested.", - "default": true, - "type": "boolean" - }, - "profiling": { - "title": "Profiling", - "default": { - "enabled": false, - "limit": null, - "offset": null, - "report_dropped_profiles": false, - "turn_off_expensive_profiling_metrics": false, - "profile_table_level_only": false, - "include_field_null_count": true, - "include_field_min_value": true, - "include_field_max_value": true, - "include_field_mean_value": true, - "include_field_median_value": true, - "include_field_stddev_value": true, - "include_field_quantiles": false, - "include_field_distinct_value_frequencies": false, - "include_field_histogram": false, - "include_field_sample_values": true, - "allow_deny_patterns": { - "allow": [ - ".*" - ], - "deny": [], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "max_number_of_fields_to_profile": null, - "profile_if_updated_since_days": 1, - "max_workers": 50, - "query_combiner_enabled": true, - "catch_exceptions": true, - "partition_profiling_enabled": true, - "bigquery_temp_table_schema": null, - "partition_datetime": null - }, - "allOf": [ - { - "$ref": "#/definitions/GEProfilingConfig" - } - ] - }, - "bucket_duration": { - "description": "Size of the time window to aggregate usage stats.", - "default": "DAY", - "allOf": [ - { - "$ref": "#/definitions/BucketDuration" - } - ] - }, - "end_time": { - "title": "End Time", - "description": "Latest date of usage to consider. Default: Last full day in UTC (or hour, depending on `bucket_duration`)", - "type": "string", - "format": "date-time" - }, - "start_time": { - "title": "Start Time", - "description": "Earliest date of usage to consider. Default: Last full day in UTC (or hour, depending on `bucket_duration`)", - "type": "string", - "format": "date-time" - }, - "scheme": { - "title": "Scheme", - "default": "snowflake", - "type": "string" - }, - "username": { - "title": "Username", - "description": "Snowflake username.", - "type": "string" - }, - "password": { - "title": "Password", - "description": "Snowflake password.", - "type": "string", - "writeOnly": true, - "format": "password" - }, - "private_key_path": { - "title": "Private Key Path", - "description": "The path to the private key if using key pair authentication. See: https://docs.snowflake.com/en/user-guide/key-pair-auth.html", - "type": "string" - }, - "private_key_password": { - "title": "Private Key Password", - "description": "Password for your private key if using key pair authentication.", - "type": "string", - "writeOnly": true, - "format": "password" - }, - "oauth_config": { - "title": "Oauth Config", - "description": "oauth configuration - https://docs.snowflake.com/en/user-guide/python-connector-example.html#connecting-with-oauth", - "allOf": [ - { - "$ref": "#/definitions/OauthConfiguration" - } - ] - }, - "authentication_type": { - "title": "Authentication Type", - "description": "The type of authenticator to use when connecting to Snowflake. Supports \"DEFAULT_AUTHENTICATOR\", \"EXTERNAL_BROWSER_AUTHENTICATOR\" and \"KEY_PAIR_AUTHENTICATOR\".", - "default": "DEFAULT_AUTHENTICATOR", - "type": "string" - }, - "host_port": { - "title": "Host Port", - "description": "DEPRECATED: Snowflake account. e.g. abc48144", - "type": "string" - }, - "account_id": { - "title": "Account Id", - "description": "Snowflake account. e.g. abc48144", - "type": "string" - }, - "warehouse": { - "title": "Warehouse", - "description": "Snowflake warehouse.", - "type": "string" - }, - "role": { - "title": "Role", - "description": "Snowflake role.", - "type": "string" - }, - "include_table_lineage": { - "title": "Include Table Lineage", - "description": "If enabled, populates the snowflake table-to-table and s3-to-snowflake table lineage. Requires appropriate grants given to the role.", - "default": true, - "type": "boolean" - }, - "include_view_lineage": { - "title": "Include View Lineage", - "description": "If enabled, populates the snowflake view->table and table->view lineages (no view->view lineage yet). Requires appropriate grants given to the role, and include_table_lineage to be True.", - "default": true, - "type": "boolean" - }, - "connect_args": { - "title": "Connect Args", - "description": "Connect args to pass to Snowflake SqlAlchemy driver", - "type": "object" - }, - "check_role_grants": { - "title": "Check Role Grants", - "description": "If set to True then checks role grants at the beginning of the ingestion run. To be used for debugging purposes. If you think everything is working fine then set it to False. In some cases this can take long depending on how many roles you might have.", - "default": false, - "type": "boolean" - }, - "database_pattern": { - "title": "Database Pattern", - "default": { - "allow": [ - ".*" - ], - "deny": [ - "^UTIL_DB$", - "^SNOWFLAKE$", - "^SNOWFLAKE_SAMPLE_DATA$" - ], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "allOf": [ - { - "$ref": "#/definitions/AllowDenyPattern" - } - ] - }, - "provision_role": { - "$ref": "#/definitions/SnowflakeProvisionRoleConfig" - }, - "ignore_start_time_lineage": { - "title": "Ignore Start Time Lineage", - "default": false, - "type": "boolean" - }, - "upstream_lineage_in_report": { - "title": "Upstream Lineage In Report", - "default": false, - "type": "boolean" - } - }, - "additionalProperties": false - }, - "snowflake-usage": { - "type": "object", - "properties": { - "type": { - "type": "string", - "enum": [ - "snowflake-usage" - ] - }, - "config": { - "$ref": "#/definitions/snowflake-usage_config" - } - }, - "required": [ - "type", - "config" - ] - }, - "snowflake-usage_config": { - "title": "SnowflakeUsageConfig", - "description": "Base configuration class for stateful ingestion for source configs to inherit from.", - "type": "object", - "properties": { - "env": { - "title": "Env", - "description": "The environment that all assets produced by this connector belong to", - "default": "PROD", - "type": "string" - }, - "platform": { - "title": "Platform", - "description": "The platform that this source connects to", - "type": "string" - }, - "platform_instance": { - "title": "Platform Instance", - "description": "The instance of the platform that all assets produced by this recipe belong to", - "type": "string" - }, - "stateful_ingestion": { - "title": "Stateful Ingestion", - "description": "Stateful ingestion related configs", - "allOf": [ - { - "$ref": "#/definitions/SnowflakeStatefulIngestionConfig" - } - ] - }, - "bucket_duration": { - "description": "Size of the time window to aggregate usage stats.", - "default": "DAY", - "allOf": [ - { - "$ref": "#/definitions/BucketDuration" - } - ] - }, - "end_time": { - "title": "End Time", - "description": "Latest date of usage to consider. Default: Last full day in UTC (or hour, depending on `bucket_duration`)", - "type": "string", - "format": "date-time" - }, - "start_time": { - "title": "Start Time", - "description": "Earliest date of usage to consider. Default: Last full day in UTC (or hour, depending on `bucket_duration`)", - "type": "string", - "format": "date-time" - }, - "top_n_queries": { - "title": "Top N Queries", - "description": "Number of top queries to save to each table.", - "default": 10, - "exclusiveMinimum": 0, - "type": "integer" - }, - "user_email_pattern": { - "title": "User Email Pattern", - "description": "regex patterns for user emails to filter in usage.", - "default": { - "allow": [ - ".*" - ], - "deny": [], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "allOf": [ - { - "$ref": "#/definitions/AllowDenyPattern" - } - ] - }, - "include_operational_stats": { - "title": "Include Operational Stats", - "description": "Whether to display operational stats.", - "default": true, - "type": "boolean" - }, - "include_read_operational_stats": { - "title": "Include Read Operational Stats", - "description": "Whether to report read operational stats. Experimental.", - "default": false, - "type": "boolean" - }, - "format_sql_queries": { - "title": "Format Sql Queries", - "description": "Whether to format sql queries", - "default": false, - "type": "boolean" - }, - "include_top_n_queries": { - "title": "Include Top N Queries", - "description": "Whether to ingest the top_n_queries.", - "default": true, - "type": "boolean" - }, - "scheme": { - "title": "Scheme", - "default": "snowflake", - "type": "string" - }, - "username": { - "title": "Username", - "description": "Snowflake username.", - "type": "string" - }, - "password": { - "title": "Password", - "description": "Snowflake password.", - "type": "string", - "writeOnly": true, - "format": "password" - }, - "private_key_path": { - "title": "Private Key Path", - "description": "The path to the private key if using key pair authentication. See: https://docs.snowflake.com/en/user-guide/key-pair-auth.html", - "type": "string" - }, - "private_key_password": { - "title": "Private Key Password", - "description": "Password for your private key if using key pair authentication.", - "type": "string", - "writeOnly": true, - "format": "password" - }, - "oauth_config": { - "title": "Oauth Config", - "description": "oauth configuration - https://docs.snowflake.com/en/user-guide/python-connector-example.html#connecting-with-oauth", - "allOf": [ - { - "$ref": "#/definitions/OauthConfiguration" - } - ] - }, - "authentication_type": { - "title": "Authentication Type", - "description": "The type of authenticator to use when connecting to Snowflake. Supports \"DEFAULT_AUTHENTICATOR\", \"EXTERNAL_BROWSER_AUTHENTICATOR\" and \"KEY_PAIR_AUTHENTICATOR\".", - "default": "DEFAULT_AUTHENTICATOR", - "type": "string" - }, - "host_port": { - "title": "Host Port", - "description": "DEPRECATED: Snowflake account. e.g. abc48144", - "type": "string" - }, - "account_id": { - "title": "Account Id", - "description": "Snowflake account. e.g. abc48144", - "type": "string" - }, - "warehouse": { - "title": "Warehouse", - "description": "Snowflake warehouse.", - "type": "string" - }, - "role": { - "title": "Role", - "description": "Snowflake role.", - "type": "string" - }, - "include_table_lineage": { - "title": "Include Table Lineage", - "description": "If enabled, populates the snowflake table-to-table and s3-to-snowflake table lineage. Requires appropriate grants given to the role.", - "default": true, - "type": "boolean" - }, - "include_view_lineage": { - "title": "Include View Lineage", - "description": "If enabled, populates the snowflake view->table and table->view lineages (no view->view lineage yet). Requires appropriate grants given to the role, and include_table_lineage to be True.", - "default": true, - "type": "boolean" - }, - "connect_args": { - "title": "Connect Args", - "description": "Connect args to pass to Snowflake SqlAlchemy driver", - "type": "object" - }, - "check_role_grants": { - "title": "Check Role Grants", - "description": "If set to True then checks role grants at the beginning of the ingestion run. To be used for debugging purposes. If you think everything is working fine then set it to False. In some cases this can take long depending on how many roles you might have.", - "default": false, - "type": "boolean" - }, - "options": { - "title": "Options", - "description": "Any options specified here will be passed to SQLAlchemy's create_engine as kwargs. See https://docs.sqlalchemy.org/en/14/core/engines.html#sqlalchemy.create_engine for details.", - "type": "object" - }, - "database_pattern": { - "title": "Database Pattern", - "description": "List of regex patterns for databases to include/exclude in usage ingestion.", - "default": { - "allow": [ - ".*" - ], - "deny": [ - "^UTIL_DB$", - "^SNOWFLAKE$", - "^SNOWFLAKE_SAMPLE_DATA$" - ], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "allOf": [ - { - "$ref": "#/definitions/AllowDenyPattern" - } - ] - }, - "email_domain": { - "title": "Email Domain", - "description": "Email domain of your organisation so users can be displayed on UI appropriately.", - "type": "string" - }, - "schema_pattern": { - "title": "Schema Pattern", - "description": "List of regex patterns for schemas to include/exclude in usage ingestion.", - "default": { - "allow": [ - ".*" - ], - "deny": [], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "allOf": [ - { - "$ref": "#/definitions/AllowDenyPattern" - } - ] - }, - "table_pattern": { - "title": "Table Pattern", - "description": "List of regex patterns for tables to include in ingestion.", - "default": { - "allow": [ - ".*" - ], - "deny": [], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "allOf": [ - { - "$ref": "#/definitions/AllowDenyPattern" - } - ] - }, - "view_pattern": { - "title": "View Pattern", - "description": "List of regex patterns for views to include in ingestion.", - "default": { - "allow": [ - ".*" - ], - "deny": [], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "allOf": [ - { - "$ref": "#/definitions/AllowDenyPattern" - } - ] - }, - "apply_view_usage_to_tables": { - "title": "Apply View Usage To Tables", - "description": "Allow/deny patterns for views in snowflake dataset names.", - "default": false, - "type": "boolean" - } - }, - "additionalProperties": false - }, - "redshift-usage": { - "type": "object", - "properties": { - "type": { - "type": "string", - "enum": [ - "redshift-usage" - ] - }, - "config": { - "$ref": "#/definitions/redshift-usage_config" - } - }, - "required": [ - "type", - "config" - ] - }, - "redshift-usage_config": { - "title": "RedshiftUsageConfig", - "description": "Base configuration class for stateful ingestion for source configs to inherit from.", - "type": "object", - "properties": { - "env": { - "title": "Env", - "description": "The environment that all assets produced by this connector belong to", - "default": "PROD", - "type": "string" - }, - "bucket_duration": { - "description": "Size of the time window to aggregate usage stats.", - "default": "DAY", - "allOf": [ - { - "$ref": "#/definitions/BucketDuration" - } - ] - }, - "end_time": { - "title": "End Time", - "description": "Latest date of usage to consider. Default: Last full day in UTC (or hour, depending on `bucket_duration`)", - "type": "string", - "format": "date-time" - }, - "start_time": { - "title": "Start Time", - "description": "Earliest date of usage to consider. Default: Last full day in UTC (or hour, depending on `bucket_duration`)", - "type": "string", - "format": "date-time" - }, - "top_n_queries": { - "title": "Top N Queries", - "description": "Number of top queries to save to each table.", - "default": 10, - "exclusiveMinimum": 0, - "type": "integer" - }, - "user_email_pattern": { - "title": "User Email Pattern", - "description": "regex patterns for user emails to filter in usage.", - "default": { - "allow": [ - ".*" - ], - "deny": [], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "allOf": [ - { - "$ref": "#/definitions/AllowDenyPattern" - } - ] - }, - "include_operational_stats": { - "title": "Include Operational Stats", - "description": "Whether to display operational stats.", - "default": true, - "type": "boolean" - }, - "include_read_operational_stats": { - "title": "Include Read Operational Stats", - "description": "Whether to report read operational stats. Experimental.", - "default": false, - "type": "boolean" - }, - "format_sql_queries": { - "title": "Format Sql Queries", - "description": "Whether to format sql queries", - "default": false, - "type": "boolean" - }, - "include_top_n_queries": { - "title": "Include Top N Queries", - "description": "Whether to ingest the top_n_queries.", - "default": true, - "type": "boolean" - }, - "platform_instance_map": { - "title": "Platform Instance Map", - "description": "A holder for platform -> platform_instance mappings to generate correct dataset urns", - "type": "object", - "additionalProperties": { - "type": "string" - } - }, - "platform": { - "title": "Platform", - "description": "The platform that this source connects to", - "type": "string" - }, - "platform_instance": { - "title": "Platform Instance", - "description": "The instance of the platform that all assets produced by this recipe belong to", - "type": "string" - }, - "stateful_ingestion": { - "$ref": "#/definitions/SQLAlchemyStatefulIngestionConfig" - }, - "options": { - "title": "Options", - "description": "Any options specified here will be passed to SQLAlchemy's create_engine as kwargs.See https://docs.sqlalchemy.org/en/14/core/engines.html#sqlalchemy.create_engine for details.", - "default": {}, - "type": "object" - }, - "schema_pattern": { - "title": "Schema Pattern", - "default": { - "allow": [ - ".*" - ], - "deny": [ - "information_schema" - ], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "allOf": [ - { - "$ref": "#/definitions/AllowDenyPattern" - } - ] - }, - "table_pattern": { - "title": "Table Pattern", - "description": "regex patterns for tables to filter in ingestion.", - "default": { - "allow": [ - ".*" - ], - "deny": [], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "allOf": [ - { - "$ref": "#/definitions/AllowDenyPattern" - } - ] - }, - "view_pattern": { - "title": "View Pattern", - "description": "regex patterns for views to filter in ingestion.", - "default": { - "allow": [ - ".*" - ], - "deny": [], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "allOf": [ - { - "$ref": "#/definitions/AllowDenyPattern" - } - ] - }, - "profile_pattern": { - "title": "Profile Pattern", - "description": "regex patterns for profiles to filter in ingestion, allowed by the `table_pattern`.", - "default": { - "allow": [ - ".*" - ], - "deny": [], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "allOf": [ - { - "$ref": "#/definitions/AllowDenyPattern" - } - ] - }, - "domain": { - "title": "Domain", - "description": " regex patterns for tables/schemas to descide domain_key domain key (domain_key can be any string like \"sales\".) There can be multiple domain key specified.", - "default": {}, - "type": "object", - "additionalProperties": { - "$ref": "#/definitions/AllowDenyPattern" - } - }, - "include_views": { - "title": "Include Views", - "description": "Whether views should be ingested.", - "default": true, - "type": "boolean" - }, - "include_tables": { - "title": "Include Tables", - "description": "Whether tables should be ingested.", - "default": true, - "type": "boolean" - }, - "profiling": { - "title": "Profiling", - "default": { - "enabled": false, - "limit": null, - "offset": null, - "report_dropped_profiles": false, - "turn_off_expensive_profiling_metrics": false, - "profile_table_level_only": false, - "include_field_null_count": true, - "include_field_min_value": true, - "include_field_max_value": true, - "include_field_mean_value": true, - "include_field_median_value": true, - "include_field_stddev_value": true, - "include_field_quantiles": false, - "include_field_distinct_value_frequencies": false, - "include_field_histogram": false, - "include_field_sample_values": true, - "allow_deny_patterns": { - "allow": [ - ".*" - ], - "deny": [], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "max_number_of_fields_to_profile": null, - "profile_if_updated_since_days": 1, - "max_workers": 50, - "query_combiner_enabled": true, - "catch_exceptions": true, - "partition_profiling_enabled": true, - "bigquery_temp_table_schema": null, - "partition_datetime": null - }, - "allOf": [ - { - "$ref": "#/definitions/GEProfilingConfig" - } - ] - }, - "username": { - "title": "Username", - "description": "username", - "type": "string" - }, - "password": { - "title": "Password", - "description": "password", - "type": "string", - "writeOnly": true, - "format": "password" - }, - "host_port": { - "title": "Host Port", - "description": "host URL", - "type": "string" - }, - "database": { - "title": "Database", - "description": "database (catalog)", - "type": "string" - }, - "database_alias": { - "title": "Database Alias", - "description": "Alias to apply to database when ingesting.", - "type": "string" - }, - "scheme": { - "title": "Scheme", - "default": "redshift+psycopg2", - "type": "string" - }, - "sqlalchemy_uri": { - "title": "Sqlalchemy Uri", - "description": "URI of database to connect to. See https://docs.sqlalchemy.org/en/14/core/engines.html#database-urls. Takes precedence over other connection parameters.", - "type": "string" - }, - "default_schema": { - "title": "Default Schema", - "description": "The default schema to use if the sql parser fails to parse the schema with `sql_based` lineage collector", - "default": "public", - "type": "string" - }, - "include_table_lineage": { - "title": "Include Table Lineage", - "description": "Whether table lineage should be ingested.", - "default": true, - "type": "boolean" - }, - "include_copy_lineage": { - "title": "Include Copy Lineage", - "description": "Whether lineage should be collected from copy commands", - "default": true, - "type": "boolean" - }, - "capture_lineage_query_parser_failures": { - "title": "Capture Lineage Query Parser Failures", - "description": "Whether to capture lineage query parser errors with dataset properties for debuggings", - "default": false, - "type": "boolean" - }, - "table_lineage_mode": { - "description": "Which table lineage collector mode to use. Available modes are: [stl_scan_based, sql_based, mixed]", - "default": "stl_scan_based", - "allOf": [ - { - "$ref": "#/definitions/LineageMode" - } - ] - }, - "email_domain": { - "title": "Email Domain", - "description": "Email domain of your organisation so users can be displayed on UI appropriately.", - "type": "string" - } - }, - "required": [ - "host_port", - "email_domain" - ], - "additionalProperties": false - }, - "hive": { - "type": "object", - "properties": { - "type": { - "type": "string", - "enum": [ - "hive" - ] - }, - "config": { - "$ref": "#/definitions/hive_config" - } - }, - "required": [ - "type", - "config" - ] - }, - "hive_config": { - "title": "HiveConfig", - "description": "Base configuration class for stateful ingestion for source configs to inherit from.", - "type": "object", - "properties": { - "env": { - "title": "Env", - "description": "The environment that all assets produced by this connector belong to", - "default": "PROD", - "type": "string" - }, - "platform": { - "title": "Platform", - "description": "The platform that this source connects to", - "type": "string" - }, - "platform_instance": { - "title": "Platform Instance", - "description": "The instance of the platform that all assets produced by this recipe belong to", - "type": "string" - }, - "stateful_ingestion": { - "$ref": "#/definitions/SQLAlchemyStatefulIngestionConfig" - }, - "options": { - "title": "Options", - "default": {}, - "type": "object" - }, - "schema_pattern": { - "title": "Schema Pattern", - "description": "regex patterns for schemas to filter in ingestion.", - "default": { - "allow": [ - ".*" - ], - "deny": [], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "allOf": [ - { - "$ref": "#/definitions/AllowDenyPattern" - } - ] - }, - "table_pattern": { - "title": "Table Pattern", - "description": "regex patterns for tables to filter in ingestion.", - "default": { - "allow": [ - ".*" - ], - "deny": [], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "allOf": [ - { - "$ref": "#/definitions/AllowDenyPattern" - } - ] - }, - "view_pattern": { - "title": "View Pattern", - "description": "regex patterns for views to filter in ingestion.", - "default": { - "allow": [ - ".*" - ], - "deny": [], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "allOf": [ - { - "$ref": "#/definitions/AllowDenyPattern" - } - ] - }, - "profile_pattern": { - "title": "Profile Pattern", - "description": "regex patterns for profiles to filter in ingestion, allowed by the `table_pattern`.", - "default": { - "allow": [ - ".*" - ], - "deny": [], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "allOf": [ - { - "$ref": "#/definitions/AllowDenyPattern" - } - ] - }, - "domain": { - "title": "Domain", - "description": " regex patterns for tables/schemas to descide domain_key domain key (domain_key can be any string like \"sales\".) There can be multiple domain key specified.", - "default": {}, - "type": "object", - "additionalProperties": { - "$ref": "#/definitions/AllowDenyPattern" - } - }, - "include_views": { - "title": "Include Views", - "description": "Hive SQLAlchemy connector returns views as tables. See https://github.com/dropbox/PyHive/blob/b21c507a24ed2f2b0cf15b0b6abb1c43f31d3ee0/pyhive/sqlalchemy_hive.py#L270-L273. Disabling views helps us prevent this duplication.", - "default": false, - "type": "boolean" - }, - "include_tables": { - "title": "Include Tables", - "description": "Whether tables should be ingested.", - "default": true, - "type": "boolean" - }, - "profiling": { - "title": "Profiling", - "default": { - "enabled": false, - "limit": null, - "offset": null, - "report_dropped_profiles": false, - "turn_off_expensive_profiling_metrics": false, - "profile_table_level_only": false, - "include_field_null_count": true, - "include_field_min_value": true, - "include_field_max_value": true, - "include_field_mean_value": true, - "include_field_median_value": true, - "include_field_stddev_value": true, - "include_field_quantiles": false, - "include_field_distinct_value_frequencies": false, - "include_field_histogram": false, - "include_field_sample_values": true, - "allow_deny_patterns": { - "allow": [ - ".*" - ], - "deny": [], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "max_number_of_fields_to_profile": null, - "profile_if_updated_since_days": 1, - "max_workers": 50, - "query_combiner_enabled": true, - "catch_exceptions": true, - "partition_profiling_enabled": true, - "bigquery_temp_table_schema": null, - "partition_datetime": null - }, - "allOf": [ - { - "$ref": "#/definitions/GEProfilingConfig" - } - ] - }, - "username": { - "title": "Username", - "description": "username", - "type": "string" - }, - "password": { - "title": "Password", - "description": "password", - "type": "string", - "writeOnly": true, - "format": "password" - }, - "host_port": { - "title": "Host Port", - "description": "host URL", - "type": "string" - }, - "database": { - "title": "Database", - "description": "database (catalog)", - "type": "string" - }, - "database_alias": { - "title": "Database Alias", - "description": "Alias to apply to database when ingesting.", - "type": "string" - }, - "scheme": { - "title": "Scheme", - "default": "hive", - "type": "string" - }, - "sqlalchemy_uri": { - "title": "Sqlalchemy Uri", - "description": "URI of database to connect to. See https://docs.sqlalchemy.org/en/14/core/engines.html#database-urls. Takes precedence over other connection parameters.", - "type": "string" - } - }, - "required": [ - "host_port" - ], - "additionalProperties": false - }, - "hana": { - "type": "object", - "properties": { - "type": { - "type": "string", - "enum": [ - "hana" - ] - }, - "config": { - "$ref": "#/definitions/hana_config" - } - }, - "required": [ - "type", - "config" - ] - }, - "hana_config": { - "title": "HanaConfig", - "description": "Base configuration class for stateful ingestion for source configs to inherit from.", - "type": "object", - "properties": { - "env": { - "title": "Env", - "description": "The environment that all assets produced by this connector belong to", - "default": "PROD", - "type": "string" - }, - "platform": { - "title": "Platform", - "description": "The platform that this source connects to", - "type": "string" - }, - "platform_instance": { - "title": "Platform Instance", - "description": "The instance of the platform that all assets produced by this recipe belong to", - "type": "string" - }, - "stateful_ingestion": { - "$ref": "#/definitions/SQLAlchemyStatefulIngestionConfig" - }, - "options": { - "title": "Options", - "default": {}, - "type": "object" - }, - "schema_pattern": { - "title": "Schema Pattern", - "description": "regex patterns for schemas to filter in ingestion.", - "default": { - "allow": [ - ".*" - ], - "deny": [], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "allOf": [ - { - "$ref": "#/definitions/AllowDenyPattern" - } - ] - }, - "table_pattern": { - "title": "Table Pattern", - "description": "regex patterns for tables to filter in ingestion.", - "default": { - "allow": [ - ".*" - ], - "deny": [], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "allOf": [ - { - "$ref": "#/definitions/AllowDenyPattern" - } - ] - }, - "view_pattern": { - "title": "View Pattern", - "description": "regex patterns for views to filter in ingestion.", - "default": { - "allow": [ - ".*" - ], - "deny": [], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "allOf": [ - { - "$ref": "#/definitions/AllowDenyPattern" - } - ] - }, - "profile_pattern": { - "title": "Profile Pattern", - "description": "regex patterns for profiles to filter in ingestion, allowed by the `table_pattern`.", - "default": { - "allow": [ - ".*" - ], - "deny": [], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "allOf": [ - { - "$ref": "#/definitions/AllowDenyPattern" - } - ] - }, - "domain": { - "title": "Domain", - "description": " regex patterns for tables/schemas to descide domain_key domain key (domain_key can be any string like \"sales\".) There can be multiple domain key specified.", - "default": {}, - "type": "object", - "additionalProperties": { - "$ref": "#/definitions/AllowDenyPattern" - } - }, - "include_views": { - "title": "Include Views", - "description": "Whether views should be ingested.", - "default": true, - "type": "boolean" - }, - "include_tables": { - "title": "Include Tables", - "description": "Whether tables should be ingested.", - "default": true, - "type": "boolean" - }, - "profiling": { - "title": "Profiling", - "default": { - "enabled": false, - "limit": null, - "offset": null, - "report_dropped_profiles": false, - "turn_off_expensive_profiling_metrics": false, - "profile_table_level_only": false, - "include_field_null_count": true, - "include_field_min_value": true, - "include_field_max_value": true, - "include_field_mean_value": true, - "include_field_median_value": true, - "include_field_stddev_value": true, - "include_field_quantiles": false, - "include_field_distinct_value_frequencies": false, - "include_field_histogram": false, - "include_field_sample_values": true, - "allow_deny_patterns": { - "allow": [ - ".*" - ], - "deny": [], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "max_number_of_fields_to_profile": null, - "profile_if_updated_since_days": 1, - "max_workers": 50, - "query_combiner_enabled": true, - "catch_exceptions": true, - "partition_profiling_enabled": true, - "bigquery_temp_table_schema": null, - "partition_datetime": null - }, - "allOf": [ - { - "$ref": "#/definitions/GEProfilingConfig" - } - ] - }, - "username": { - "title": "Username", - "description": "username", - "type": "string" - }, - "password": { - "title": "Password", - "description": "password", - "type": "string", - "writeOnly": true, - "format": "password" - }, - "host_port": { - "title": "Host Port", - "default": "localhost:39041", - "type": "string" - }, - "database": { - "title": "Database", - "description": "database (catalog)", - "type": "string" - }, - "database_alias": { - "title": "Database Alias", - "description": "Alias to apply to database when ingesting.", - "type": "string" - }, - "scheme": { - "title": "Scheme", - "default": "hana+hdbcli", - "type": "string" - }, - "sqlalchemy_uri": { - "title": "Sqlalchemy Uri", - "description": "URI of database to connect to. See https://docs.sqlalchemy.org/en/14/core/engines.html#database-urls. Takes precedence over other connection parameters.", - "type": "string" - } - }, - "additionalProperties": false - }, - "sagemaker": { - "type": "object", - "properties": { - "type": { - "type": "string", - "enum": [ - "sagemaker" - ] - }, - "config": { - "$ref": "#/definitions/sagemaker_config" - } - }, - "required": [ - "type", - "config" - ] - }, - "sagemaker_config": { - "title": "SagemakerSourceConfig", - "description": "Common AWS credentials config.\n\nCurrently used by:\n - Glue source\n - SageMaker source", - "type": "object", - "properties": { - "aws_access_key_id": { - "title": "Aws Access Key Id", - "description": "Autodetected. See https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html", - "type": "string" - }, - "aws_secret_access_key": { - "title": "Aws Secret Access Key", - "description": "Autodetected. See https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html", - "type": "string" - }, - "aws_session_token": { - "title": "Aws Session Token", - "description": "Autodetected. See https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html", - "type": "string" - }, - "aws_role": { - "title": "Aws Role", - "description": "Autodetected. See https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html", - "anyOf": [ - { - "type": "string" - }, - { - "type": "array", - "items": { - "type": "string" - } - } - ] - }, - "aws_profile": { - "title": "Aws Profile", - "description": "Named AWS profile to use, if not set the default will be used", - "type": "string" - }, - "aws_region": { - "title": "Aws Region", - "description": "AWS region code.", - "type": "string" - }, - "aws_endpoint_url": { - "title": "Aws Endpoint Url", - "description": "Autodetected. See https://boto3.amazonaws.com/v1/documentation/api/latest/reference/core/session.html", - "type": "string" - }, - "aws_proxy": { - "title": "Aws Proxy", - "description": "Autodetected. See https://boto3.amazonaws.com/v1/documentation/api/latest/reference/core/session.html", - "type": "object", - "additionalProperties": { - "type": "string" - } - }, - "env": { - "title": "Env", - "description": "The environment that all assets produced by this connector belong to", - "default": "PROD", - "type": "string" - }, - "database_pattern": { - "title": "Database Pattern", - "description": "regex patterns for databases to filter in ingestion.", - "default": { - "allow": [ - ".*" - ], - "deny": [], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "allOf": [ - { - "$ref": "#/definitions/AllowDenyPattern" - } - ] - }, - "table_pattern": { - "title": "Table Pattern", - "description": "regex patterns for tables to filter in ingestion.", - "default": { - "allow": [ - ".*" - ], - "deny": [], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "allOf": [ - { - "$ref": "#/definitions/AllowDenyPattern" - } - ] - }, - "extract_feature_groups": { - "title": "Extract Feature Groups", - "description": "Whether to extract feature groups.", - "default": true, - "type": "boolean" - }, - "extract_models": { - "title": "Extract Models", - "description": "Whether to extract models.", - "default": true, - "type": "boolean" - }, - "extract_jobs": { - "title": "Extract Jobs", - "description": "Whether to extract AutoML jobs.", - "default": true, - "anyOf": [ - { - "type": "object", - "additionalProperties": { - "type": "string" - } - }, - { - "type": "boolean" - } - ] - } - }, - "required": [ - "aws_region" - ], - "additionalProperties": false - }, - "glue": { - "type": "object", - "properties": { - "type": { - "type": "string", - "enum": [ - "glue" - ] - }, - "config": { - "$ref": "#/definitions/glue_config" - } - }, - "required": [ - "type", - "config" - ] - }, - "glue_config": { - "title": "GlueSourceConfig", - "description": "Common AWS credentials config.\n\nCurrently used by:\n - Glue source\n - SageMaker source", - "type": "object", - "properties": { - "row_count": { - "title": "Row Count", - "description": "The parameter name for row count in glue table.", - "type": "string" - }, - "column_count": { - "title": "Column Count", - "description": "The parameter name for column count in glue table.", - "type": "string" - }, - "unique_count": { - "title": "Unique Count", - "description": "The parameter name for the count of unique value in a column.", - "type": "string" - }, - "unique_proportion": { - "title": "Unique Proportion", - "description": "The parameter name for the proportion of unique values in a column.", - "type": "string" - }, - "null_count": { - "title": "Null Count", - "description": "The parameter name for the count of null values in a column.", - "type": "integer" - }, - "null_proportion": { - "title": "Null Proportion", - "description": "The parameter name for the proportion of null values in a column.", - "type": "string" - }, - "min": { - "title": "Min", - "description": "The parameter name for the min value of a column.", - "type": "string" - }, - "max": { - "title": "Max", - "description": "The parameter name for the max value of a column.", - "type": "string" - }, - "mean": { - "title": "Mean", - "description": "The parameter name for the mean value of a column.", - "type": "string" - }, - "median": { - "title": "Median", - "description": "The parameter name for the median value of a column.", - "type": "string" - }, - "stdev": { - "title": "Stdev", - "description": "The parameter name for the standard deviation of a column.", - "type": "string" - }, - "partition_patterns": { - "title": "Partition Patterns", - "description": "Regex patterns for filtering partitions for profile. The pattern should be a string like: \"{'key':'value'}\".", - "default": { - "allow": [ - ".*" - ], - "deny": [], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "allOf": [ - { - "$ref": "#/definitions/AllowDenyPattern" - } - ] - }, - "platform": { - "title": "Platform", - "description": "The platform that this source connects to", - "type": "string" - }, - "platform_instance": { - "title": "Platform Instance", - "description": "The instance of the platform that all assets produced by this recipe belong to", - "type": "string" - }, - "aws_access_key_id": { - "title": "Aws Access Key Id", - "description": "Autodetected. See https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html", - "type": "string" - }, - "aws_secret_access_key": { - "title": "Aws Secret Access Key", - "description": "Autodetected. See https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html", - "type": "string" - }, - "aws_session_token": { - "title": "Aws Session Token", - "description": "Autodetected. See https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html", - "type": "string" - }, - "aws_role": { - "title": "Aws Role", - "description": "Autodetected. See https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html", - "anyOf": [ - { - "type": "string" - }, - { - "type": "array", - "items": { - "type": "string" - } - } - ] - }, - "aws_profile": { - "title": "Aws Profile", - "description": "Named AWS profile to use, if not set the default will be used", - "type": "string" - }, - "aws_region": { - "title": "Aws Region", - "description": "AWS region code.", - "type": "string" - }, - "aws_endpoint_url": { - "title": "Aws Endpoint Url", - "description": "Autodetected. See https://boto3.amazonaws.com/v1/documentation/api/latest/reference/core/session.html", - "type": "string" - }, - "aws_proxy": { - "title": "Aws Proxy", - "description": "Autodetected. See https://boto3.amazonaws.com/v1/documentation/api/latest/reference/core/session.html", - "type": "object", - "additionalProperties": { - "type": "string" - } - }, - "env": { - "title": "Env", - "description": "The environment that all assets produced by this connector belong to", - "default": "PROD", - "type": "string" - }, - "database_pattern": { - "title": "Database Pattern", - "description": "regex patterns for databases to filter in ingestion.", - "default": { - "allow": [ - ".*" - ], - "deny": [], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "allOf": [ - { - "$ref": "#/definitions/AllowDenyPattern" - } - ] - }, - "table_pattern": { - "title": "Table Pattern", - "description": "regex patterns for tables to filter in ingestion.", - "default": { - "allow": [ - ".*" - ], - "deny": [], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "allOf": [ - { - "$ref": "#/definitions/AllowDenyPattern" - } - ] - }, - "extract_owners": { - "title": "Extract Owners", - "description": "When enabled, extracts ownership from Glue directly and overwrites existing owners. When disabled, ownership is left empty for datasets.", - "default": true, - "type": "boolean" - }, - "extract_transforms": { - "title": "Extract Transforms", - "description": "Whether to extract Glue transform jobs.", - "default": true, - "type": "boolean" - }, - "underlying_platform": { - "title": "Underlying Platform", - "description": "@deprecated(Use `platform`) Override for platform name. Allowed values - `glue`, `athena`", - "type": "string" - }, - "ignore_unsupported_connectors": { - "title": "Ignore Unsupported Connectors", - "description": "Whether to ignore unsupported connectors. If disabled, an error will be raised.", - "default": true, - "type": "boolean" - }, - "emit_s3_lineage": { - "title": "Emit S3 Lineage", - "description": " Whether to emit S3-to-Glue lineage.", - "default": false, - "type": "boolean" - }, - "glue_s3_lineage_direction": { - "title": "Glue S3 Lineage Direction", - "description": "If `upstream`, S3 is upstream to Glue. If `downstream` S3 is downstream to Glue.", - "default": "upstream", - "type": "string" - }, - "domain": { - "title": "Domain", - "description": "regex patterns for tables to filter to assign domain_key. ", - "default": {}, - "type": "object", - "additionalProperties": { - "$ref": "#/definitions/AllowDenyPattern" - } - }, - "catalog_id": { - "title": "Catalog Id", - "description": "The aws account id where the target glue catalog lives. If None, datahub will ingest glue in aws caller's account.", - "type": "string" - }, - "use_s3_bucket_tags": { - "title": "Use S3 Bucket Tags", - "description": "If an S3 Buckets Tags should be created for the Tables ingested by Glue. Please Note that this will not apply tags to any folders ingested, only the files.", - "default": false, - "type": "boolean" - }, - "use_s3_object_tags": { - "title": "Use S3 Object Tags", - "description": "If an S3 Objects Tags should be created for the Tables ingested by Glue.", - "default": false, - "type": "boolean" - }, - "profiling": { - "title": "Profiling", - "description": "Configs to ingest data profiles from glue table", - "allOf": [ - { - "$ref": "#/definitions/GlueProfilingConfig" - } - ] - } - }, - "required": [ - "aws_region" - ], - "additionalProperties": false - }, - "oracle": { - "type": "object", - "properties": { - "type": { - "type": "string", - "enum": [ - "oracle" - ] - }, - "config": { - "$ref": "#/definitions/oracle_config" - } - }, - "required": [ - "type", - "config" - ] - }, - "oracle_config": { - "title": "OracleConfig", - "description": "Base configuration class for stateful ingestion for source configs to inherit from.", - "type": "object", - "properties": { - "env": { - "title": "Env", - "description": "The environment that all assets produced by this connector belong to", - "default": "PROD", - "type": "string" - }, - "platform": { - "title": "Platform", - "description": "The platform that this source connects to", - "type": "string" - }, - "platform_instance": { - "title": "Platform Instance", - "description": "The instance of the platform that all assets produced by this recipe belong to", - "type": "string" - }, - "stateful_ingestion": { - "$ref": "#/definitions/SQLAlchemyStatefulIngestionConfig" - }, - "options": { - "title": "Options", - "default": {}, - "type": "object" - }, - "schema_pattern": { - "title": "Schema Pattern", - "description": "regex patterns for schemas to filter in ingestion.", - "default": { - "allow": [ - ".*" - ], - "deny": [], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "allOf": [ - { - "$ref": "#/definitions/AllowDenyPattern" - } - ] - }, - "table_pattern": { - "title": "Table Pattern", - "description": "regex patterns for tables to filter in ingestion.", - "default": { - "allow": [ - ".*" - ], - "deny": [], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "allOf": [ - { - "$ref": "#/definitions/AllowDenyPattern" - } - ] - }, - "view_pattern": { - "title": "View Pattern", - "description": "regex patterns for views to filter in ingestion.", - "default": { - "allow": [ - ".*" - ], - "deny": [], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "allOf": [ - { - "$ref": "#/definitions/AllowDenyPattern" - } - ] - }, - "profile_pattern": { - "title": "Profile Pattern", - "description": "regex patterns for profiles to filter in ingestion, allowed by the `table_pattern`.", - "default": { - "allow": [ - ".*" - ], - "deny": [], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "allOf": [ - { - "$ref": "#/definitions/AllowDenyPattern" - } - ] - }, - "domain": { - "title": "Domain", - "description": " regex patterns for tables/schemas to descide domain_key domain key (domain_key can be any string like \"sales\".) There can be multiple domain key specified.", - "default": {}, - "type": "object", - "additionalProperties": { - "$ref": "#/definitions/AllowDenyPattern" - } - }, - "include_views": { - "title": "Include Views", - "description": "Whether views should be ingested.", - "default": true, - "type": "boolean" - }, - "include_tables": { - "title": "Include Tables", - "description": "Whether tables should be ingested.", - "default": true, - "type": "boolean" - }, - "profiling": { - "title": "Profiling", - "default": { - "enabled": false, - "limit": null, - "offset": null, - "report_dropped_profiles": false, - "turn_off_expensive_profiling_metrics": false, - "profile_table_level_only": false, - "include_field_null_count": true, - "include_field_min_value": true, - "include_field_max_value": true, - "include_field_mean_value": true, - "include_field_median_value": true, - "include_field_stddev_value": true, - "include_field_quantiles": false, - "include_field_distinct_value_frequencies": false, - "include_field_histogram": false, - "include_field_sample_values": true, - "allow_deny_patterns": { - "allow": [ - ".*" - ], - "deny": [], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "max_number_of_fields_to_profile": null, - "profile_if_updated_since_days": 1, - "max_workers": 50, - "query_combiner_enabled": true, - "catch_exceptions": true, - "partition_profiling_enabled": true, - "bigquery_temp_table_schema": null, - "partition_datetime": null - }, - "allOf": [ - { - "$ref": "#/definitions/GEProfilingConfig" - } - ] - }, - "username": { - "title": "Username", - "description": "username", - "type": "string" - }, - "password": { - "title": "Password", - "description": "password", - "type": "string", - "writeOnly": true, - "format": "password" - }, - "host_port": { - "title": "Host Port", - "description": "host URL", - "type": "string" - }, - "database": { - "title": "Database", - "description": "If using, omit `service_name`.", - "type": "string" - }, - "database_alias": { - "title": "Database Alias", - "description": "Alias to apply to database when ingesting.", - "type": "string" - }, - "scheme": { - "title": "Scheme", - "description": "Will be set automatically to default value.", - "default": "oracle+cx_oracle", - "type": "string" - }, - "sqlalchemy_uri": { - "title": "Sqlalchemy Uri", - "description": "URI of database to connect to. See https://docs.sqlalchemy.org/en/14/core/engines.html#database-urls. Takes precedence over other connection parameters.", - "type": "string" - }, - "service_name": { - "title": "Service Name", - "description": "Oracle service name. If using, omit `database`.", - "type": "string" - } - }, - "required": [ - "host_port" - ], - "additionalProperties": false - }, - "druid": { - "type": "object", - "properties": { - "type": { - "type": "string", - "enum": [ - "druid" - ] - }, - "config": { - "$ref": "#/definitions/druid_config" - } - }, - "required": [ - "type", - "config" - ] - }, - "druid_config": { - "title": "DruidConfig", - "description": "Base configuration class for stateful ingestion for source configs to inherit from.", - "type": "object", - "properties": { - "env": { - "title": "Env", - "description": "The environment that all assets produced by this connector belong to", - "default": "PROD", - "type": "string" - }, - "platform": { - "title": "Platform", - "description": "The platform that this source connects to", - "type": "string" - }, - "platform_instance": { - "title": "Platform Instance", - "description": "The instance of the platform that all assets produced by this recipe belong to", - "type": "string" - }, - "stateful_ingestion": { - "$ref": "#/definitions/SQLAlchemyStatefulIngestionConfig" - }, - "options": { - "title": "Options", - "default": {}, - "type": "object" - }, - "schema_pattern": { - "title": "Schema Pattern", - "description": "regex patterns for schemas to filter in ingestion.", - "default": { - "allow": [ - ".*" - ], - "deny": [ - "^(lookup|sys).*" - ], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "allOf": [ - { - "$ref": "#/definitions/AllowDenyPattern" - } - ] - }, - "table_pattern": { - "title": "Table Pattern", - "description": "regex patterns for tables to filter in ingestion.", - "default": { - "allow": [ - ".*" - ], - "deny": [], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "allOf": [ - { - "$ref": "#/definitions/AllowDenyPattern" - } - ] - }, - "view_pattern": { - "title": "View Pattern", - "description": "regex patterns for views to filter in ingestion.", - "default": { - "allow": [ - ".*" - ], - "deny": [], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "allOf": [ - { - "$ref": "#/definitions/AllowDenyPattern" - } - ] - }, - "profile_pattern": { - "title": "Profile Pattern", - "description": "regex patterns for profiles to filter in ingestion, allowed by the `table_pattern`.", - "default": { - "allow": [ - ".*" - ], - "deny": [], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "allOf": [ - { - "$ref": "#/definitions/AllowDenyPattern" - } - ] - }, - "domain": { - "title": "Domain", - "description": " regex patterns for tables/schemas to descide domain_key domain key (domain_key can be any string like \"sales\".) There can be multiple domain key specified.", - "default": {}, - "type": "object", - "additionalProperties": { - "$ref": "#/definitions/AllowDenyPattern" - } - }, - "include_views": { - "title": "Include Views", - "description": "Whether views should be ingested.", - "default": true, - "type": "boolean" - }, - "include_tables": { - "title": "Include Tables", - "description": "Whether tables should be ingested.", - "default": true, - "type": "boolean" - }, - "profiling": { - "title": "Profiling", - "default": { - "enabled": false, - "limit": null, - "offset": null, - "report_dropped_profiles": false, - "turn_off_expensive_profiling_metrics": false, - "profile_table_level_only": false, - "include_field_null_count": true, - "include_field_min_value": true, - "include_field_max_value": true, - "include_field_mean_value": true, - "include_field_median_value": true, - "include_field_stddev_value": true, - "include_field_quantiles": false, - "include_field_distinct_value_frequencies": false, - "include_field_histogram": false, - "include_field_sample_values": true, - "allow_deny_patterns": { - "allow": [ - ".*" - ], - "deny": [], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "max_number_of_fields_to_profile": null, - "profile_if_updated_since_days": 1, - "max_workers": 50, - "query_combiner_enabled": true, - "catch_exceptions": true, - "partition_profiling_enabled": true, - "bigquery_temp_table_schema": null, - "partition_datetime": null - }, - "allOf": [ - { - "$ref": "#/definitions/GEProfilingConfig" - } - ] - }, - "username": { - "title": "Username", - "description": "username", - "type": "string" - }, - "password": { - "title": "Password", - "description": "password", - "type": "string", - "writeOnly": true, - "format": "password" - }, - "host_port": { - "title": "Host Port", - "description": "host URL", - "type": "string" - }, - "database": { - "title": "Database", - "description": "database (catalog)", - "type": "string" - }, - "database_alias": { - "title": "Database Alias", - "description": "Alias to apply to database when ingesting.", - "type": "string" - }, - "scheme": { - "title": "Scheme", - "default": "druid", - "type": "string" - }, - "sqlalchemy_uri": { - "title": "Sqlalchemy Uri", - "description": "URI of database to connect to. See https://docs.sqlalchemy.org/en/14/core/engines.html#database-urls. Takes precedence over other connection parameters.", - "type": "string" - } - }, - "required": [ - "host_port" - ], - "additionalProperties": false - }, - "mode": { - "type": "object", - "properties": { - "type": { - "type": "string", - "enum": [ - "mode" - ] - }, - "config": { - "$ref": "#/definitions/mode_config" - } - }, - "required": [ - "type", - "config" - ] - }, - "mode_config": { - "title": "ModeConfig", - "description": "Any non-Dataset source that produces lineage to Datasets should inherit this class.\ne.g. Orchestrators, Pipelines, BI Tools etc.", - "type": "object", - "properties": { - "env": { - "title": "Env", - "description": "The environment that all assets produced by this connector belong to", - "default": "PROD", - "type": "string" - }, - "platform_instance_map": { - "title": "Platform Instance Map", - "description": "A holder for platform -> platform_instance mappings to generate correct dataset urns", - "type": "object", - "additionalProperties": { - "type": "string" - } - }, - "connect_uri": { - "title": "Connect Uri", - "description": "Mode host URL.", - "default": "https://app.mode.com", - "type": "string" - }, - "token": { - "title": "Token", - "description": "Mode user token.", - "type": "string" - }, - "password": { - "title": "Password", - "description": "Mode password for authentication.", - "type": "string" - }, - "workspace": { - "title": "Workspace", - "type": "string" - }, - "default_schema": { - "title": "Default Schema", - "description": "Default schema to use when schema is not provided in an SQL query", - "default": "public", - "type": "string" - }, - "owner_username_instead_of_email": { - "title": "Owner Username Instead Of Email", - "description": "Use username for owner URN instead of Email", - "default": true, - "type": "boolean" - }, - "api_options": { - "title": "Api Options", - "description": "Retry/Wait settings for Mode API to avoid \"Too many Requests\" error. See Mode API Options below", - "default": { - "retry_backoff_multiplier": 2, - "max_retry_interval": 10, - "max_attempts": 5 - }, - "allOf": [ - { - "$ref": "#/definitions/ModeAPIConfig" - } - ] - } - }, - "additionalProperties": false - }, - "file": { - "type": "object", - "properties": { - "type": { - "type": "string", - "enum": [ - "file" - ] - }, - "config": { - "$ref": "#/definitions/file_config" - } - }, - "required": [ - "type", - "config" - ] - }, - "file_config": { - "title": "FileSourceConfig", - "type": "object", - "properties": { - "filename": { - "title": "Filename", - "description": "Path to file to ingest.", - "type": "string" - } - }, - "required": [ - "filename" - ], - "additionalProperties": false - }, - "mssql": { - "type": "object", - "properties": { - "type": { - "type": "string", - "enum": [ - "mssql" - ] - }, - "config": { - "$ref": "#/definitions/mssql_config" - } - }, - "required": [ - "type", - "config" - ] - }, - "mssql_config": { - "title": "SQLServerConfig", - "description": "Base configuration class for stateful ingestion for source configs to inherit from.", - "type": "object", - "properties": { - "env": { - "title": "Env", - "description": "The environment that all assets produced by this connector belong to", - "default": "PROD", - "type": "string" - }, - "platform": { - "title": "Platform", - "description": "The platform that this source connects to", - "type": "string" - }, - "platform_instance": { - "title": "Platform Instance", - "description": "The instance of the platform that all assets produced by this recipe belong to", - "type": "string" - }, - "stateful_ingestion": { - "$ref": "#/definitions/SQLAlchemyStatefulIngestionConfig" - }, - "options": { - "title": "Options", - "default": {}, - "type": "object" - }, - "schema_pattern": { - "title": "Schema Pattern", - "description": "regex patterns for schemas to filter in ingestion.", - "default": { - "allow": [ - ".*" - ], - "deny": [], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "allOf": [ - { - "$ref": "#/definitions/AllowDenyPattern" - } - ] - }, - "table_pattern": { - "title": "Table Pattern", - "description": "regex patterns for tables to filter in ingestion.", - "default": { - "allow": [ - ".*" - ], - "deny": [], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "allOf": [ - { - "$ref": "#/definitions/AllowDenyPattern" - } - ] - }, - "view_pattern": { - "title": "View Pattern", - "description": "regex patterns for views to filter in ingestion.", - "default": { - "allow": [ - ".*" - ], - "deny": [], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "allOf": [ - { - "$ref": "#/definitions/AllowDenyPattern" - } - ] - }, - "profile_pattern": { - "title": "Profile Pattern", - "description": "regex patterns for profiles to filter in ingestion, allowed by the `table_pattern`.", - "default": { - "allow": [ - ".*" - ], - "deny": [], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "allOf": [ - { - "$ref": "#/definitions/AllowDenyPattern" - } - ] - }, - "domain": { - "title": "Domain", - "description": " regex patterns for tables/schemas to descide domain_key domain key (domain_key can be any string like \"sales\".) There can be multiple domain key specified.", - "default": {}, - "type": "object", - "additionalProperties": { - "$ref": "#/definitions/AllowDenyPattern" - } - }, - "include_views": { - "title": "Include Views", - "description": "Whether views should be ingested.", - "default": true, - "type": "boolean" - }, - "include_tables": { - "title": "Include Tables", - "description": "Whether tables should be ingested.", - "default": true, - "type": "boolean" - }, - "profiling": { - "title": "Profiling", - "default": { - "enabled": false, - "limit": null, - "offset": null, - "report_dropped_profiles": false, - "turn_off_expensive_profiling_metrics": false, - "profile_table_level_only": false, - "include_field_null_count": true, - "include_field_min_value": true, - "include_field_max_value": true, - "include_field_mean_value": true, - "include_field_median_value": true, - "include_field_stddev_value": true, - "include_field_quantiles": false, - "include_field_distinct_value_frequencies": false, - "include_field_histogram": false, - "include_field_sample_values": true, - "allow_deny_patterns": { - "allow": [ - ".*" - ], - "deny": [], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "max_number_of_fields_to_profile": null, - "profile_if_updated_since_days": 1, - "max_workers": 50, - "query_combiner_enabled": true, - "catch_exceptions": true, - "partition_profiling_enabled": true, - "bigquery_temp_table_schema": null, - "partition_datetime": null - }, - "allOf": [ - { - "$ref": "#/definitions/GEProfilingConfig" - } - ] - }, - "username": { - "title": "Username", - "description": "username", - "type": "string" - }, - "password": { - "title": "Password", - "description": "password", - "type": "string", - "writeOnly": true, - "format": "password" - }, - "host_port": { - "title": "Host Port", - "description": "MSSQL host URL.", - "default": "localhost:1433", - "type": "string" - }, - "database": { - "title": "Database", - "description": "database (catalog)", - "type": "string" - }, - "database_alias": { - "title": "Database Alias", - "description": "Alias to apply to database when ingesting.", - "type": "string" - }, - "scheme": { - "title": "Scheme", - "default": "mssql+pytds", - "type": "string" - }, - "sqlalchemy_uri": { - "title": "Sqlalchemy Uri", - "description": "URI of database to connect to. See https://docs.sqlalchemy.org/en/14/core/engines.html#database-urls. Takes precedence over other connection parameters.", - "type": "string" - }, - "use_odbc": { - "title": "Use Odbc", - "description": "See https://docs.sqlalchemy.org/en/14/dialects/mssql.html#module-sqlalchemy.dialects.mssql.pyodbc.", - "default": false, - "type": "boolean" - }, - "uri_args": { - "title": "Uri Args", - "description": "Arguments to URL-encode when connecting. See https://docs.microsoft.com/en-us/sql/connect/odbc/dsn-connection-string-attribute?view=sql-server-ver15.", - "default": {}, - "type": "object", - "additionalProperties": { - "type": "string" - } - } - }, - "additionalProperties": false - }, - "data-lake": { - "type": "object", - "properties": { - "type": { - "type": "string", - "enum": [ - "data-lake" - ] - }, - "config": { - "$ref": "#/definitions/data-lake_config" - } - }, - "required": [ - "type", - "config" - ] - }, - "data-lake_config": { - "title": "DataLakeSourceConfig", - "description": "Any source that produces dataset urns in a single environment should inherit this class", - "type": "object", - "properties": { - "env": { - "title": "Env", - "description": "The environment that all assets produced by this connector belong to", - "default": "PROD", - "type": "string" - }, - "base_path": { - "title": "Base Path", - "description": "Path of the base folder to crawl. Unless `schema_patterns` and `profile_patterns` are set, the connector will ingest all files in this folder.", - "type": "string" - }, - "platform": { - "title": "Platform", - "description": "Autodetected. Platform to use in namespace when constructing URNs. If left blank, local paths will correspond to `file` and S3 paths will correspond to `s3`.", - "default": "", - "type": "string" - }, - "use_relative_path": { - "title": "Use Relative Path", - "description": "Whether to use the relative path when constructing URNs. Has no effect when a `path_spec` is provided.", - "default": false, - "type": "boolean" - }, - "ignore_dotfiles": { - "title": "Ignore Dotfiles", - "description": "Whether to ignore files that start with `.`. For instance, `.DS_Store`, `.bash_profile`, etc.", - "default": true, - "type": "boolean" - }, - "aws_config": { - "title": "Aws Config", - "description": "AWS details", - "allOf": [ - { - "$ref": "#/definitions/AwsSourceConfig" - } - ] - }, - "schema_patterns": { - "title": "Schema Patterns", - "description": "regex patterns for tables to filter for ingestion.", - "default": { - "allow": [ - ".*" - ], - "deny": [], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "allOf": [ - { - "$ref": "#/definitions/AllowDenyPattern" - } - ] - }, - "profile_patterns": { - "title": "Profile Patterns", - "description": "regex patterns for tables to profile ", - "default": { - "allow": [ - ".*" - ], - "deny": [], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "allOf": [ - { - "$ref": "#/definitions/AllowDenyPattern" - } - ] - }, - "path_spec": { - "title": "Path Spec", - "description": "Format string for constructing table identifiers from the relative path. See the above setup section for details.", - "type": "string" - }, - "profiling": { - "title": "Profiling", - "description": "Profiling configurations", - "default": { - "enabled": false, - "spark_cluster_manager": null, - "profile_table_level_only": false, - "allow_deny_patterns": { - "allow": [ - ".*" - ], - "deny": [], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "max_number_of_fields_to_profile": null, - "include_field_null_count": true, - "include_field_min_value": true, - "include_field_max_value": true, - "include_field_mean_value": true, - "include_field_median_value": true, - "include_field_stddev_value": true, - "include_field_quantiles": true, - "include_field_distinct_value_frequencies": true, - "include_field_histogram": true, - "include_field_sample_values": true - }, - "allOf": [ - { - "$ref": "#/definitions/DataLakeProfilerConfig" - } - ] - }, - "spark_driver_memory": { - "title": "Spark Driver Memory", - "description": "Max amount of memory to grant Spark.", - "default": "4g", - "type": "string" - }, - "max_rows": { - "title": "Max Rows", - "description": "Maximum number of rows to use when inferring schemas for TSV and CSV files.", - "default": 100, - "type": "integer" - } - }, - "required": [ - "base_path" - ], - "additionalProperties": false - }, - "presto-on-hive": { - "type": "object", - "properties": { - "type": { - "type": "string", - "enum": [ - "presto-on-hive" - ] - }, - "config": { - "$ref": "#/definitions/presto-on-hive_config" - } - }, - "required": [ - "type", - "config" - ] - }, - "presto-on-hive_config": { - "title": "PrestoOnHiveConfig", - "description": "Base configuration class for stateful ingestion for source configs to inherit from.", - "type": "object", - "properties": { - "env": { - "title": "Env", - "description": "The environment that all assets produced by this connector belong to", - "default": "PROD", - "type": "string" - }, - "platform": { - "title": "Platform", - "description": "The platform that this source connects to", - "type": "string" - }, - "platform_instance": { - "title": "Platform Instance", - "description": "The instance of the platform that all assets produced by this recipe belong to", - "type": "string" - }, - "stateful_ingestion": { - "$ref": "#/definitions/SQLAlchemyStatefulIngestionConfig" - }, - "options": { - "title": "Options", - "default": {}, - "type": "object" - }, - "schema_pattern": { - "title": "Schema Pattern", - "description": "regex patterns for schemas to filter in ingestion.", - "default": { - "allow": [ - ".*" - ], - "deny": [], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "allOf": [ - { - "$ref": "#/definitions/AllowDenyPattern" - } - ] - }, - "table_pattern": { - "title": "Table Pattern", - "description": "regex patterns for tables to filter in ingestion.", - "default": { - "allow": [ - ".*" - ], - "deny": [], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "allOf": [ - { - "$ref": "#/definitions/AllowDenyPattern" - } - ] - }, - "view_pattern": { - "title": "View Pattern", - "description": "regex patterns for views to filter in ingestion.", - "default": { - "allow": [ - ".*" - ], - "deny": [], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "allOf": [ - { - "$ref": "#/definitions/AllowDenyPattern" - } - ] - }, - "profile_pattern": { - "title": "Profile Pattern", - "description": "regex patterns for profiles to filter in ingestion, allowed by the `table_pattern`.", - "default": { - "allow": [ - ".*" - ], - "deny": [], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "allOf": [ - { - "$ref": "#/definitions/AllowDenyPattern" - } - ] - }, - "domain": { - "title": "Domain", - "description": " regex patterns for tables/schemas to descide domain_key domain key (domain_key can be any string like \"sales\".) There can be multiple domain key specified.", - "default": {}, - "type": "object", - "additionalProperties": { - "$ref": "#/definitions/AllowDenyPattern" - } - }, - "include_views": { - "title": "Include Views", - "description": "Whether views should be ingested.", - "default": true, - "type": "boolean" - }, - "include_tables": { - "title": "Include Tables", - "description": "Whether tables should be ingested.", - "default": true, - "type": "boolean" - }, - "profiling": { - "title": "Profiling", - "default": { - "enabled": false, - "limit": null, - "offset": null, - "report_dropped_profiles": false, - "turn_off_expensive_profiling_metrics": false, - "profile_table_level_only": false, - "include_field_null_count": true, - "include_field_min_value": true, - "include_field_max_value": true, - "include_field_mean_value": true, - "include_field_median_value": true, - "include_field_stddev_value": true, - "include_field_quantiles": false, - "include_field_distinct_value_frequencies": false, - "include_field_histogram": false, - "include_field_sample_values": true, - "allow_deny_patterns": { - "allow": [ - ".*" - ], - "deny": [], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "max_number_of_fields_to_profile": null, - "profile_if_updated_since_days": 1, - "max_workers": 50, - "query_combiner_enabled": true, - "catch_exceptions": true, - "partition_profiling_enabled": true, - "bigquery_temp_table_schema": null, - "partition_datetime": null - }, - "allOf": [ - { - "$ref": "#/definitions/GEProfilingConfig" - } - ] - }, - "username": { - "title": "Username", - "description": "username", - "type": "string" - }, - "password": { - "title": "Password", - "description": "password", - "type": "string", - "writeOnly": true, - "format": "password" - }, - "host_port": { - "title": "Host Port", - "description": "Host URL and port to connect to. Example: localhost:3306", - "default": "localhost:3306", - "type": "string" - }, - "database": { - "title": "Database", - "description": "database (catalog)", - "type": "string" - }, - "database_alias": { - "title": "Database Alias", - "description": "Alias to apply to database when ingesting.", - "type": "string" - }, - "scheme": { - "title": "Scheme", - "default": "mysql+pymysql", - "type": "string" - }, - "sqlalchemy_uri": { - "title": "Sqlalchemy Uri", - "description": "URI of database to connect to. See https://docs.sqlalchemy.org/en/14/core/engines.html#database-urls. Takes precedence over other connection parameters.", - "type": "string" - }, - "views_where_clause_suffix": { - "title": "Views Where Clause Suffix", - "description": "Where clause to specify what Presto views should be ingested.", - "default": "", - "type": "string" - }, - "tables_where_clause_suffix": { - "title": "Tables Where Clause Suffix", - "description": "Where clause to specify what Hive tables should be ingested.", - "default": "", - "type": "string" - }, - "schemas_where_clause_suffix": { - "title": "Schemas Where Clause Suffix", - "description": "Where clause to specify what Hive schemas should be ingested.", - "default": "", - "type": "string" - } - }, - "additionalProperties": false - }, - "vertica": { - "type": "object", - "properties": { - "type": { - "type": "string", - "enum": [ - "vertica" - ] - }, - "config": { - "$ref": "#/definitions/vertica_config" - } - }, - "required": [ - "type", - "config" - ] - }, - "vertica_config": { - "title": "VerticaConfig", - "description": "Base configuration class for stateful ingestion for source configs to inherit from.", - "type": "object", - "properties": { - "env": { - "title": "Env", - "description": "The environment that all assets produced by this connector belong to", - "default": "PROD", - "type": "string" - }, - "platform": { - "title": "Platform", - "description": "The platform that this source connects to", - "type": "string" - }, - "platform_instance": { - "title": "Platform Instance", - "description": "The instance of the platform that all assets produced by this recipe belong to", - "type": "string" - }, - "stateful_ingestion": { - "$ref": "#/definitions/SQLAlchemyStatefulIngestionConfig" - }, - "options": { - "title": "Options", - "default": {}, - "type": "object" - }, - "schema_pattern": { - "title": "Schema Pattern", - "description": "regex patterns for schemas to filter in ingestion.", - "default": { - "allow": [ - ".*" - ], - "deny": [], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "allOf": [ - { - "$ref": "#/definitions/AllowDenyPattern" - } - ] - }, - "table_pattern": { - "title": "Table Pattern", - "description": "regex patterns for tables to filter in ingestion.", - "default": { - "allow": [ - ".*" - ], - "deny": [], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "allOf": [ - { - "$ref": "#/definitions/AllowDenyPattern" - } - ] - }, - "view_pattern": { - "title": "View Pattern", - "description": "regex patterns for views to filter in ingestion.", - "default": { - "allow": [ - ".*" - ], - "deny": [], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "allOf": [ - { - "$ref": "#/definitions/AllowDenyPattern" - } - ] - }, - "profile_pattern": { - "title": "Profile Pattern", - "description": "regex patterns for profiles to filter in ingestion, allowed by the `table_pattern`.", - "default": { - "allow": [ - ".*" - ], - "deny": [], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "allOf": [ - { - "$ref": "#/definitions/AllowDenyPattern" - } - ] - }, - "domain": { - "title": "Domain", - "description": " regex patterns for tables/schemas to descide domain_key domain key (domain_key can be any string like \"sales\".) There can be multiple domain key specified.", - "default": {}, - "type": "object", - "additionalProperties": { - "$ref": "#/definitions/AllowDenyPattern" - } - }, - "include_views": { - "title": "Include Views", - "description": "Whether views should be ingested.", - "default": true, - "type": "boolean" - }, - "include_tables": { - "title": "Include Tables", - "description": "Whether tables should be ingested.", - "default": true, - "type": "boolean" - }, - "profiling": { - "title": "Profiling", - "default": { - "enabled": false, - "limit": null, - "offset": null, - "report_dropped_profiles": false, - "turn_off_expensive_profiling_metrics": false, - "profile_table_level_only": false, - "include_field_null_count": true, - "include_field_min_value": true, - "include_field_max_value": true, - "include_field_mean_value": true, - "include_field_median_value": true, - "include_field_stddev_value": true, - "include_field_quantiles": false, - "include_field_distinct_value_frequencies": false, - "include_field_histogram": false, - "include_field_sample_values": true, - "allow_deny_patterns": { - "allow": [ - ".*" - ], - "deny": [], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "max_number_of_fields_to_profile": null, - "profile_if_updated_since_days": 1, - "max_workers": 50, - "query_combiner_enabled": true, - "catch_exceptions": true, - "partition_profiling_enabled": true, - "bigquery_temp_table_schema": null, - "partition_datetime": null - }, - "allOf": [ - { - "$ref": "#/definitions/GEProfilingConfig" - } - ] - }, - "username": { - "title": "Username", - "description": "username", - "type": "string" - }, - "password": { - "title": "Password", - "description": "password", - "type": "string", - "writeOnly": true, - "format": "password" - }, - "host_port": { - "title": "Host Port", - "description": "host URL", - "type": "string" - }, - "database": { - "title": "Database", - "description": "database (catalog)", - "type": "string" - }, - "database_alias": { - "title": "Database Alias", - "description": "Alias to apply to database when ingesting.", - "type": "string" - }, - "scheme": { - "title": "Scheme", - "default": "vertica+vertica_python", - "type": "string" - }, - "sqlalchemy_uri": { - "title": "Sqlalchemy Uri", - "description": "URI of database to connect to. See https://docs.sqlalchemy.org/en/14/core/engines.html#database-urls. Takes precedence over other connection parameters.", - "type": "string" - } - }, - "required": [ - "host_port" - ], - "additionalProperties": false - }, - "csv-enricher": { - "type": "object", - "properties": { - "type": { - "type": "string", - "enum": [ - "csv-enricher" - ] - }, - "config": { - "$ref": "#/definitions/csv-enricher_config" - } - }, - "required": [ - "type", - "config" - ] - }, - "csv-enricher_config": { - "title": "CSVEnricherConfig", - "type": "object", - "properties": { - "filename": { - "title": "Filename", - "description": "Path to ingestion CSV file", - "type": "string" - }, - "should_overwrite": { - "title": "Should Overwrite", - "description": "Whether the ingestion should overwrite. Otherwise, we will append data.", - "default": false, - "type": "boolean" - }, - "delimiter": { - "title": "Delimiter", - "description": "Delimiter to use when parsing CSV", - "default": ",", - "type": "string" - }, - "array_delimiter": { - "title": "Array Delimiter", - "description": "Delimiter to use when parsing array fields (tags, terms, owners)", - "default": "|", - "type": "string" - } - }, - "required": [ - "filename" - ], - "additionalProperties": false - }, - "mariadb": { - "type": "object", - "properties": { - "type": { - "type": "string", - "enum": [ - "mariadb" - ] - }, - "config": { - "$ref": "#/definitions/mariadb_config" - } - }, - "required": [ - "type", - "config" - ] - }, - "mariadb_config": { - "title": "MySQLConfig", - "description": "Base configuration class for stateful ingestion for source configs to inherit from.", - "type": "object", - "properties": { - "env": { - "title": "Env", - "description": "The environment that all assets produced by this connector belong to", - "default": "PROD", - "type": "string" - }, - "platform": { - "title": "Platform", - "description": "The platform that this source connects to", - "type": "string" - }, - "platform_instance": { - "title": "Platform Instance", - "description": "The instance of the platform that all assets produced by this recipe belong to", - "type": "string" - }, - "stateful_ingestion": { - "$ref": "#/definitions/SQLAlchemyStatefulIngestionConfig" - }, - "options": { - "title": "Options", - "default": {}, - "type": "object" - }, - "schema_pattern": { - "title": "Schema Pattern", - "description": "regex patterns for schemas to filter in ingestion.", - "default": { - "allow": [ - ".*" - ], - "deny": [], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "allOf": [ - { - "$ref": "#/definitions/AllowDenyPattern" - } - ] - }, - "table_pattern": { - "title": "Table Pattern", - "description": "regex patterns for tables to filter in ingestion.", - "default": { - "allow": [ - ".*" - ], - "deny": [], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "allOf": [ - { - "$ref": "#/definitions/AllowDenyPattern" - } - ] - }, - "view_pattern": { - "title": "View Pattern", - "description": "regex patterns for views to filter in ingestion.", - "default": { - "allow": [ - ".*" - ], - "deny": [], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "allOf": [ - { - "$ref": "#/definitions/AllowDenyPattern" - } - ] - }, - "profile_pattern": { - "title": "Profile Pattern", - "description": "regex patterns for profiles to filter in ingestion, allowed by the `table_pattern`.", - "default": { - "allow": [ - ".*" - ], - "deny": [], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "allOf": [ - { - "$ref": "#/definitions/AllowDenyPattern" - } - ] - }, - "domain": { - "title": "Domain", - "description": " regex patterns for tables/schemas to descide domain_key domain key (domain_key can be any string like \"sales\".) There can be multiple domain key specified.", - "default": {}, - "type": "object", - "additionalProperties": { - "$ref": "#/definitions/AllowDenyPattern" - } - }, - "include_views": { - "title": "Include Views", - "description": "Whether views should be ingested.", - "default": true, - "type": "boolean" - }, - "include_tables": { - "title": "Include Tables", - "description": "Whether tables should be ingested.", - "default": true, - "type": "boolean" - }, - "profiling": { - "title": "Profiling", - "default": { - "enabled": false, - "limit": null, - "offset": null, - "report_dropped_profiles": false, - "turn_off_expensive_profiling_metrics": false, - "profile_table_level_only": false, - "include_field_null_count": true, - "include_field_min_value": true, - "include_field_max_value": true, - "include_field_mean_value": true, - "include_field_median_value": true, - "include_field_stddev_value": true, - "include_field_quantiles": false, - "include_field_distinct_value_frequencies": false, - "include_field_histogram": false, - "include_field_sample_values": true, - "allow_deny_patterns": { - "allow": [ - ".*" - ], - "deny": [], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "max_number_of_fields_to_profile": null, - "profile_if_updated_since_days": 1, - "max_workers": 50, - "query_combiner_enabled": true, - "catch_exceptions": true, - "partition_profiling_enabled": true, - "bigquery_temp_table_schema": null, - "partition_datetime": null - }, - "allOf": [ - { - "$ref": "#/definitions/GEProfilingConfig" - } - ] - }, - "username": { - "title": "Username", - "description": "username", - "type": "string" - }, - "password": { - "title": "Password", - "description": "password", - "type": "string", - "writeOnly": true, - "format": "password" - }, - "host_port": { - "title": "Host Port", - "description": "MySQL host URL.", - "default": "localhost:3306", - "type": "string" - }, - "database": { - "title": "Database", - "description": "database (catalog)", - "type": "string" - }, - "database_alias": { - "title": "Database Alias", - "description": "Alias to apply to database when ingesting.", - "type": "string" - }, - "scheme": { - "title": "Scheme", - "default": "mysql+pymysql", - "type": "string" - }, - "sqlalchemy_uri": { - "title": "Sqlalchemy Uri", - "description": "URI of database to connect to. See https://docs.sqlalchemy.org/en/14/core/engines.html#database-urls. Takes precedence over other connection parameters.", - "type": "string" - } - }, - "additionalProperties": false - }, - "feast": { - "type": "object", - "properties": { - "type": { - "type": "string", - "enum": [ - "feast" - ] - }, - "config": { - "$ref": "#/definitions/feast_config" - } - }, - "required": [ - "type", - "config" - ] - }, - "feast_config": { - "title": "FeastRepositorySourceConfig", - "type": "object", - "properties": { - "path": { - "title": "Path", - "description": "Path to Feast repository", - "type": "string" - }, - "environment": { - "title": "Environment", - "description": "Environment to use when constructing URNs", - "default": "PROD", - "type": "string" - } - }, - "required": [ - "path" - ], - "additionalProperties": false - }, - "ldap": { - "type": "object", - "properties": { - "type": { - "type": "string", - "enum": [ - "ldap" - ] - }, - "config": { - "$ref": "#/definitions/ldap_config" - } - }, - "required": [ - "type", - "config" - ] - }, - "ldap_config": { - "title": "LDAPSourceConfig", - "description": "Config used by the LDAP Source.", - "type": "object", - "properties": { - "ldap_server": { - "title": "Ldap Server", - "description": "LDAP server URL.", - "type": "string" - }, - "ldap_user": { - "title": "Ldap User", - "description": "LDAP user.", - "type": "string" - }, - "ldap_password": { - "title": "Ldap Password", - "description": "LDAP password.", - "type": "string" - }, - "base_dn": { - "title": "Base Dn", - "description": "LDAP DN.", - "type": "string" - }, - "filter": { - "title": "Filter", - "description": "LDAP extractor filter.", - "default": "(objectClass=*)", - "type": "string" - }, - "drop_missing_first_last_name": { - "title": "Drop Missing First Last Name", - "description": "If set to true, any users without first and last names will be dropped.", - "default": true, - "type": "boolean" - }, - "page_size": { - "title": "Page Size", - "description": "Size of each page to fetch when extracting metadata.", - "default": 20, - "type": "integer" - }, - "user_attrs_map": { - "title": "User Attrs Map", - "default": {}, - "type": "object" - }, - "group_attrs_map": { - "title": "Group Attrs Map", - "default": {}, - "type": "object" - } - }, - "required": [ - "ldap_server", - "ldap_user", - "ldap_password", - "base_dn" - ], - "additionalProperties": false - }, - "elasticsearch": { - "type": "object", - "properties": { - "type": { - "type": "string", - "enum": [ - "elasticsearch" - ] - }, - "config": { - "$ref": "#/definitions/elasticsearch_config" - } - }, - "required": [ - "type", - "config" - ] - }, - "elasticsearch_config": { - "title": "ElasticsearchSourceConfig", - "description": "Any source that is a primary producer of Dataset metadata should inherit this class", - "type": "object", - "properties": { - "env": { - "title": "Env", - "description": "The environment that all assets produced by this connector belong to", - "default": "PROD", - "type": "string" - }, - "platform": { - "title": "Platform", - "description": "The platform that this source connects to", - "type": "string" - }, - "platform_instance": { - "title": "Platform Instance", - "description": "The instance of the platform that all assets produced by this recipe belong to", - "type": "string" - }, - "host": { - "title": "Host", - "description": "The elastic search host URI.", - "default": "localhost:9200", - "type": "string" - }, - "username": { - "title": "Username", - "description": "The username credential.", - "type": "string" - }, - "password": { - "title": "Password", - "description": "The password credential.", - "type": "string" - }, - "use_ssl": { - "title": "Use Ssl", - "description": "Whether to use SSL for the connection or not.", - "default": false, - "type": "boolean" - }, - "verify_certs": { - "title": "Verify Certs", - "description": "Whether to verify SSL certificates.", - "default": false, - "type": "boolean" - }, - "ca_certs": { - "title": "Ca Certs", - "description": "Path to a certificate authority (CA) certificate.", - "type": "string" - }, - "client_cert": { - "title": "Client Cert", - "description": "Path to the file containing the private key and the certificate, or cert only if using client_key.", - "type": "string" - }, - "client_key": { - "title": "Client Key", - "description": "Path to the file containing the private key if using separate cert and key files.", - "type": "string" - }, - "ssl_assert_hostname": { - "title": "Ssl Assert Hostname", - "description": "Use hostname verification if not False.", - "default": false, - "type": "boolean" - }, - "ssl_assert_fingerprint": { - "title": "Ssl Assert Fingerprint", - "description": "Verify the supplied certificate fingerprint if not None.", - "type": "string" - }, - "url_prefix": { - "title": "Url Prefix", - "description": "There are cases where an enterprise would have multiple elastic search clusters. One way for them to manage is to have a single endpoint for all the elastic search clusters and use url_prefix for routing requests to different clusters.", - "default": "", - "type": "string" - }, - "index_pattern": { - "title": "Index Pattern", - "description": "regex patterns for indexes to filter in ingestion.", - "default": { - "allow": [ - ".*" - ], - "deny": [ - "^_.*", - "^ilm-history.*" - ], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "allOf": [ - { - "$ref": "#/definitions/AllowDenyPattern" - } - ] - } - }, - "additionalProperties": false - }, - "superset": { - "type": "object", - "properties": { - "type": { - "type": "string", - "enum": [ - "superset" - ] - }, - "config": { - "$ref": "#/definitions/superset_config" - } - }, - "required": [ - "type", - "config" - ] - }, - "superset_config": { - "title": "SupersetConfig", - "type": "object", - "properties": { - "connect_uri": { - "title": "Connect Uri", - "description": "Superset host URL.", - "default": "localhost:8088", - "type": "string" - }, - "username": { - "title": "Username", - "description": "Superset username.", - "type": "string" - }, - "password": { - "title": "Password", - "description": "Superset password.", - "type": "string" - }, - "provider": { - "title": "Provider", - "description": "Superset provider.", - "default": "db", - "type": "string" - }, - "options": { - "title": "Options", - "default": {}, - "type": "object" - }, - "env": { - "title": "Env", - "description": "Environment to use in namespace when constructing URNs", - "default": "PROD", - "type": "string" - }, - "database_alias": { - "title": "Database Alias", - "description": "Can be used to change mapping for database names in superset to what you have in datahub", - "default": {}, - "type": "object", - "additionalProperties": { - "type": "string" - } - } - }, - "additionalProperties": false - }, - "datahub-lineage-file": { - "type": "object", - "properties": { - "type": { - "type": "string", - "enum": [ - "datahub-lineage-file" - ] - }, - "config": { - "$ref": "#/definitions/datahub-lineage-file_config" - } - }, - "required": [ - "type", - "config" - ] - }, - "datahub-lineage-file_config": { - "title": "LineageFileSourceConfig", - "type": "object", - "properties": { - "file": { - "title": "File", - "description": "Path to lineage file to ingest.", - "type": "string" - }, - "preserve_upstream": { - "title": "Preserve Upstream", - "description": "Whether we want to query datahub-gms for upstream data. False means it will hard replace upstream data for a given entity. True means it will query the backend for existing upstreams and include it in the ingestion run", - "default": true, - "type": "boolean" - } - }, - "required": [ - "file" - ], - "additionalProperties": false - }, - "s3": { - "type": "object", - "properties": { - "type": { - "type": "string", - "enum": [ - "s3" - ] - }, - "config": { - "$ref": "#/definitions/s3_config" - } - }, - "required": [ - "type", - "config" - ] - }, - "s3_config": { - "title": "DataLakeSourceConfig", - "description": "Any source that connects to a platform should inherit this class", - "type": "object", - "properties": { - "env": { - "title": "Env", - "description": "The environment that all assets produced by this connector belong to", - "default": "PROD", - "type": "string" - }, - "platform": { - "title": "Platform", - "description": "The platform that this source connects to", - "default": "", - "type": "string" - }, - "platform_instance": { - "title": "Platform Instance", - "description": "The instance of the platform that all assets produced by this recipe belong to", - "type": "string" - }, - "path_specs": { - "title": "Path Specs", - "description": "List of PathSpec. See below the details about PathSpec", - "type": "array", - "items": { - "$ref": "#/definitions/PathSpec" - } - }, - "path_spec": { - "title": "Path Spec", - "description": "Path spec will be deprecated in favour of path_specs option.", - "allOf": [ - { - "$ref": "#/definitions/PathSpec" - } - ] - }, - "aws_config": { - "title": "Aws Config", - "description": "AWS configuration", - "allOf": [ - { - "$ref": "#/definitions/AwsSourceConfig" - } - ] - }, - "use_s3_bucket_tags": { - "title": "Use S3 Bucket Tags", - "description": "Whether or not to create tags in datahub from the s3 bucket", - "type": "boolean" - }, - "use_s3_object_tags": { - "title": "Use S3 Object Tags", - "description": "# Whether or not to create tags in datahub from the s3 object", - "type": "boolean" - }, - "profile_patterns": { - "title": "Profile Patterns", - "description": "regex patterns for tables to profile ", - "default": { - "allow": [ - ".*" - ], - "deny": [], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "allOf": [ - { - "$ref": "#/definitions/AllowDenyPattern" - } - ] - }, - "profiling": { - "title": "Profiling", - "description": "Data profiling configuration", - "default": { - "enabled": false, - "profile_table_level_only": false, - "allow_deny_patterns": { - "allow": [ - ".*" - ], - "deny": [], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "max_number_of_fields_to_profile": null, - "include_field_null_count": true, - "include_field_min_value": true, - "include_field_max_value": true, - "include_field_mean_value": true, - "include_field_median_value": true, - "include_field_stddev_value": true, - "include_field_quantiles": true, - "include_field_distinct_value_frequencies": true, - "include_field_histogram": true, - "include_field_sample_values": true - }, - "allOf": [ - { - "$ref": "#/definitions/DataLakeProfilerConfig" - } - ] - }, - "spark_driver_memory": { - "title": "Spark Driver Memory", - "description": "Max amount of memory to grant Spark.", - "default": "4g", - "type": "string" - }, - "max_rows": { - "title": "Max Rows", - "description": "Maximum number of rows to use when inferring schemas for TSV and CSV files.", - "default": 100, - "type": "integer" - } - }, - "additionalProperties": false - }, - "athena": { - "type": "object", - "properties": { - "type": { - "type": "string", - "enum": [ - "athena" - ] - }, - "config": { - "$ref": "#/definitions/athena_config" - } - }, - "required": [ - "type", - "config" - ] - }, - "athena_config": { - "title": "AthenaConfig", - "description": "Base configuration class for stateful ingestion for source configs to inherit from.", - "type": "object", - "properties": { - "env": { - "title": "Env", - "description": "The environment that all assets produced by this connector belong to", - "default": "PROD", - "type": "string" - }, - "platform": { - "title": "Platform", - "description": "The platform that this source connects to", - "type": "string" - }, - "platform_instance": { - "title": "Platform Instance", - "description": "The instance of the platform that all assets produced by this recipe belong to", - "type": "string" - }, - "stateful_ingestion": { - "$ref": "#/definitions/SQLAlchemyStatefulIngestionConfig" - }, - "options": { - "title": "Options", - "default": {}, - "type": "object" - }, - "schema_pattern": { - "title": "Schema Pattern", - "description": "regex patterns for schemas to filter in ingestion.", - "default": { - "allow": [ - ".*" - ], - "deny": [], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "allOf": [ - { - "$ref": "#/definitions/AllowDenyPattern" - } - ] - }, - "table_pattern": { - "title": "Table Pattern", - "description": "regex patterns for tables to filter in ingestion.", - "default": { - "allow": [ - ".*" - ], - "deny": [], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "allOf": [ - { - "$ref": "#/definitions/AllowDenyPattern" - } - ] - }, - "view_pattern": { - "title": "View Pattern", - "description": "regex patterns for views to filter in ingestion.", - "default": { - "allow": [ - ".*" - ], - "deny": [], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "allOf": [ - { - "$ref": "#/definitions/AllowDenyPattern" - } - ] - }, - "profile_pattern": { - "title": "Profile Pattern", - "description": "regex patterns for profiles to filter in ingestion, allowed by the `table_pattern`.", - "default": { - "allow": [ - ".*" - ], - "deny": [], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "allOf": [ - { - "$ref": "#/definitions/AllowDenyPattern" - } - ] - }, - "domain": { - "title": "Domain", - "description": " regex patterns for tables/schemas to descide domain_key domain key (domain_key can be any string like \"sales\".) There can be multiple domain key specified.", - "default": {}, - "type": "object", - "additionalProperties": { - "$ref": "#/definitions/AllowDenyPattern" - } - }, - "include_views": { - "title": "Include Views", - "default": false, - "type": "boolean" - }, - "include_tables": { - "title": "Include Tables", - "description": "Whether tables should be ingested.", - "default": true, - "type": "boolean" - }, - "profiling": { - "title": "Profiling", - "default": { - "enabled": false, - "limit": null, - "offset": null, - "report_dropped_profiles": false, - "turn_off_expensive_profiling_metrics": false, - "profile_table_level_only": false, - "include_field_null_count": true, - "include_field_min_value": true, - "include_field_max_value": true, - "include_field_mean_value": true, - "include_field_median_value": true, - "include_field_stddev_value": true, - "include_field_quantiles": false, - "include_field_distinct_value_frequencies": false, - "include_field_histogram": false, - "include_field_sample_values": true, - "allow_deny_patterns": { - "allow": [ - ".*" - ], - "deny": [], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "max_number_of_fields_to_profile": null, - "profile_if_updated_since_days": 1, - "max_workers": 50, - "query_combiner_enabled": true, - "catch_exceptions": true, - "partition_profiling_enabled": true, - "bigquery_temp_table_schema": null, - "partition_datetime": null - }, - "allOf": [ - { - "$ref": "#/definitions/GEProfilingConfig" - } - ] - }, - "scheme": { - "title": "Scheme", - "default": "awsathena+rest", - "type": "string" - }, - "username": { - "title": "Username", - "description": "Username credential. If not specified, detected with boto3 rules. See https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html", - "type": "string" - }, - "password": { - "title": "Password", - "description": "Same detection scheme as username", - "type": "string" - }, - "database": { - "title": "Database", - "description": "The athena database to ingest from. If not set it will be autodetected", - "type": "string" - }, - "aws_region": { - "title": "Aws Region", - "description": "Aws region where your Athena database is located", - "type": "string" - }, - "s3_staging_dir": { - "title": "S3 Staging Dir", - "description": "Staging s3 location where the Athena query results will be stored", - "type": "string" - }, - "work_group": { - "title": "Work Group", - "description": "The name of your Amazon Athena Workgroups", - "type": "string" - } - }, - "required": [ - "aws_region", - "s3_staging_dir", - "work_group" - ], - "additionalProperties": false - }, - "redshift": { - "type": "object", - "properties": { - "type": { - "type": "string", - "enum": [ - "redshift" - ] - }, - "config": { - "$ref": "#/definitions/redshift_config" - } - }, - "required": [ - "type", - "config" - ] - }, - "redshift_config": { - "title": "RedshiftConfig", - "description": "Base configuration class for stateful ingestion for source configs to inherit from.", - "type": "object", - "properties": { - "env": { - "title": "Env", - "description": "The environment that all assets produced by this connector belong to", - "default": "PROD", - "type": "string" - }, - "platform_instance_map": { - "title": "Platform Instance Map", - "description": "A holder for platform -> platform_instance mappings to generate correct dataset urns", - "type": "object", - "additionalProperties": { - "type": "string" - } - }, - "bucket_duration": { - "description": "Size of the time window to aggregate usage stats.", - "default": "DAY", - "allOf": [ - { - "$ref": "#/definitions/BucketDuration" - } - ] - }, - "end_time": { - "title": "End Time", - "description": "Latest date of usage to consider. Default: Last full day in UTC (or hour, depending on `bucket_duration`)", - "type": "string", - "format": "date-time" - }, - "start_time": { - "title": "Start Time", - "description": "Earliest date of usage to consider. Default: Last full day in UTC (or hour, depending on `bucket_duration`)", - "type": "string", - "format": "date-time" - }, - "platform": { - "title": "Platform", - "description": "The platform that this source connects to", - "type": "string" - }, - "platform_instance": { - "title": "Platform Instance", - "description": "The instance of the platform that all assets produced by this recipe belong to", - "type": "string" - }, - "stateful_ingestion": { - "$ref": "#/definitions/SQLAlchemyStatefulIngestionConfig" - }, - "options": { - "title": "Options", - "default": {}, - "type": "object" - }, - "schema_pattern": { - "title": "Schema Pattern", - "default": { - "allow": [ - ".*" - ], - "deny": [ - "information_schema" - ], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "allOf": [ - { - "$ref": "#/definitions/AllowDenyPattern" - } - ] - }, - "table_pattern": { - "title": "Table Pattern", - "description": "regex patterns for tables to filter in ingestion.", - "default": { - "allow": [ - ".*" - ], - "deny": [], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "allOf": [ - { - "$ref": "#/definitions/AllowDenyPattern" - } - ] - }, - "view_pattern": { - "title": "View Pattern", - "description": "regex patterns for views to filter in ingestion.", - "default": { - "allow": [ - ".*" - ], - "deny": [], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "allOf": [ - { - "$ref": "#/definitions/AllowDenyPattern" - } - ] - }, - "profile_pattern": { - "title": "Profile Pattern", - "description": "regex patterns for profiles to filter in ingestion, allowed by the `table_pattern`.", - "default": { - "allow": [ - ".*" - ], - "deny": [], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "allOf": [ - { - "$ref": "#/definitions/AllowDenyPattern" - } - ] - }, - "domain": { - "title": "Domain", - "description": " regex patterns for tables/schemas to descide domain_key domain key (domain_key can be any string like \"sales\".) There can be multiple domain key specified.", - "default": {}, - "type": "object", - "additionalProperties": { - "$ref": "#/definitions/AllowDenyPattern" - } - }, - "include_views": { - "title": "Include Views", - "description": "Whether views should be ingested.", - "default": true, - "type": "boolean" - }, - "include_tables": { - "title": "Include Tables", - "description": "Whether tables should be ingested.", - "default": true, - "type": "boolean" - }, - "profiling": { - "title": "Profiling", - "default": { - "enabled": false, - "limit": null, - "offset": null, - "report_dropped_profiles": false, - "turn_off_expensive_profiling_metrics": false, - "profile_table_level_only": false, - "include_field_null_count": true, - "include_field_min_value": true, - "include_field_max_value": true, - "include_field_mean_value": true, - "include_field_median_value": true, - "include_field_stddev_value": true, - "include_field_quantiles": false, - "include_field_distinct_value_frequencies": false, - "include_field_histogram": false, - "include_field_sample_values": true, - "allow_deny_patterns": { - "allow": [ - ".*" - ], - "deny": [], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "max_number_of_fields_to_profile": null, - "profile_if_updated_since_days": 1, - "max_workers": 50, - "query_combiner_enabled": true, - "catch_exceptions": true, - "partition_profiling_enabled": true, - "bigquery_temp_table_schema": null, - "partition_datetime": null - }, - "allOf": [ - { - "$ref": "#/definitions/GEProfilingConfig" - } - ] - }, - "username": { - "title": "Username", - "description": "username", - "type": "string" - }, - "password": { - "title": "Password", - "description": "password", - "type": "string", - "writeOnly": true, - "format": "password" - }, - "host_port": { - "title": "Host Port", - "description": "host URL", - "type": "string" - }, - "database": { - "title": "Database", - "description": "database (catalog)", - "type": "string" - }, - "database_alias": { - "title": "Database Alias", - "description": "Alias to apply to database when ingesting.", - "type": "string" - }, - "scheme": { - "title": "Scheme", - "default": "redshift+psycopg2", - "type": "string" - }, - "sqlalchemy_uri": { - "title": "Sqlalchemy Uri", - "description": "URI of database to connect to. See https://docs.sqlalchemy.org/en/14/core/engines.html#database-urls. Takes precedence over other connection parameters.", - "type": "string" - }, - "default_schema": { - "title": "Default Schema", - "description": "The default schema to use if the sql parser fails to parse the schema with `sql_based` lineage collector", - "default": "public", - "type": "string" - }, - "include_table_lineage": { - "title": "Include Table Lineage", - "description": "Whether table lineage should be ingested.", - "default": true, - "type": "boolean" - }, - "include_copy_lineage": { - "title": "Include Copy Lineage", - "description": "Whether lineage should be collected from copy commands", - "default": true, - "type": "boolean" - }, - "capture_lineage_query_parser_failures": { - "title": "Capture Lineage Query Parser Failures", - "description": "Whether to capture lineage query parser errors with dataset properties for debuggings", - "default": false, - "type": "boolean" - }, - "table_lineage_mode": { - "description": "Which table lineage collector mode to use. Available modes are: [stl_scan_based, sql_based, mixed]", - "default": "stl_scan_based", - "allOf": [ - { - "$ref": "#/definitions/LineageMode" - } - ] - } - }, - "required": [ - "host_port" - ], - "additionalProperties": false - }, - "openapi": { - "type": "object", - "properties": { - "type": { - "type": "string", - "enum": [ - "openapi" - ] - }, - "config": { - "$ref": "#/definitions/openapi_config" - } - }, - "required": [ - "type", - "config" - ] - }, - "openapi_config": { - "title": "OpenApiConfig", - "type": "object", - "properties": { - "name": { - "title": "Name", - "type": "string" - }, - "url": { - "title": "Url", - "type": "string" - }, - "swagger_file": { - "title": "Swagger File", - "type": "string" - }, - "ignore_endpoints": { - "title": "Ignore Endpoints", - "default": [], - "type": "array", - "items": {} - }, - "username": { - "title": "Username", - "default": "", - "type": "string" - }, - "password": { - "title": "Password", - "default": "", - "type": "string" - }, - "forced_examples": { - "title": "Forced Examples", - "default": {}, - "type": "object" - }, - "token": { - "title": "Token", - "type": "string" - }, - "get_token": { - "title": "Get Token", - "default": {}, - "type": "object" - } - }, - "required": [ - "name", - "url", - "swagger_file" - ], - "additionalProperties": false - }, - "metabase": { - "type": "object", - "properties": { - "type": { - "type": "string", - "enum": [ - "metabase" - ] - }, - "config": { - "$ref": "#/definitions/metabase_config" - } - }, - "required": [ - "type", - "config" - ] - }, - "metabase_config": { - "title": "MetabaseConfig", - "description": "Any non-Dataset source that produces lineage to Datasets should inherit this class.\ne.g. Orchestrators, Pipelines, BI Tools etc.", - "type": "object", - "properties": { - "env": { - "title": "Env", - "description": "The environment that all assets produced by this connector belong to", - "default": "PROD", - "type": "string" - }, - "platform_instance_map": { - "title": "Platform Instance Map", - "description": "A holder for platform -> platform_instance mappings to generate correct dataset urns", - "type": "object", - "additionalProperties": { - "type": "string" - } - }, - "connect_uri": { - "title": "Connect Uri", - "description": "Metabase host URL.", - "default": "localhost:3000", - "type": "string" - }, - "username": { - "title": "Username", - "description": "Metabase username.", - "type": "string" - }, - "password": { - "title": "Password", - "description": "Metabase password.", - "type": "string" - }, - "database_alias_map": { - "title": "Database Alias Map", - "description": "Database name map to use when constructing dataset URN.", - "type": "object" - }, - "engine_platform_map": { - "title": "Engine Platform Map", - "description": "Custom mappings between metabase database engines and DataHub platforms", - "type": "object", - "additionalProperties": { - "type": "string" - } - }, - "default_schema": { - "title": "Default Schema", - "description": "Default schema name to use when schema is not provided in an SQL query", - "default": "public", - "type": "string" - } - }, - "additionalProperties": false - }, - "datahub-business-glossary": { - "type": "object", - "properties": { - "type": { - "type": "string", - "enum": [ - "datahub-business-glossary" - ] - }, - "config": { - "$ref": "#/definitions/datahub-business-glossary_config" - } - }, - "required": [ - "type", - "config" - ] - }, - "datahub-business-glossary_config": { - "title": "BusinessGlossarySourceConfig", - "type": "object", - "properties": { - "file": { - "title": "File", - "description": "Path to business glossary file to ingest.", - "type": "string" - } - }, - "required": [ - "file" - ], - "additionalProperties": false - }, - "clickhouse-usage": { - "type": "object", - "properties": { - "type": { - "type": "string", - "enum": [ - "clickhouse-usage" - ] - }, - "config": { - "$ref": "#/definitions/clickhouse-usage_config" - } - }, - "required": [ - "type", - "config" - ] - }, - "clickhouse-usage_config": { - "title": "ClickHouseUsageConfig", - "description": "Base configuration class for stateful ingestion for source configs to inherit from.", - "type": "object", - "properties": { - "env": { - "title": "Env", - "description": "The environment that all assets produced by this connector belong to", - "default": "PROD", - "type": "string" - }, - "bucket_duration": { - "description": "Size of the time window to aggregate usage stats.", - "default": "DAY", - "allOf": [ - { - "$ref": "#/definitions/BucketDuration" - } - ] - }, - "end_time": { - "title": "End Time", - "description": "Latest date of usage to consider. Default: Last full day in UTC (or hour, depending on `bucket_duration`)", - "type": "string", - "format": "date-time" - }, - "start_time": { - "title": "Start Time", - "description": "Earliest date of usage to consider. Default: Last full day in UTC (or hour, depending on `bucket_duration`)", - "type": "string", - "format": "date-time" - }, - "top_n_queries": { - "title": "Top N Queries", - "description": "Number of top queries to save to each table.", - "default": 10, - "exclusiveMinimum": 0, - "type": "integer" - }, - "user_email_pattern": { - "title": "User Email Pattern", - "description": "regex patterns for user emails to filter in usage.", - "default": { - "allow": [ - ".*" - ], - "deny": [], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "allOf": [ - { - "$ref": "#/definitions/AllowDenyPattern" - } - ] - }, - "include_operational_stats": { - "title": "Include Operational Stats", - "description": "Whether to display operational stats.", - "default": true, - "type": "boolean" - }, - "include_read_operational_stats": { - "title": "Include Read Operational Stats", - "description": "Whether to report read operational stats. Experimental.", - "default": false, - "type": "boolean" - }, - "format_sql_queries": { - "title": "Format Sql Queries", - "description": "Whether to format sql queries", - "default": false, - "type": "boolean" - }, - "include_top_n_queries": { - "title": "Include Top N Queries", - "description": "Whether to ingest the top_n_queries.", - "default": true, - "type": "boolean" - }, - "platform_instance_map": { - "title": "Platform Instance Map", - "description": "A holder for platform -> platform_instance mappings to generate correct dataset urns", - "type": "object", - "additionalProperties": { - "type": "string" - } - }, - "platform": { - "title": "Platform", - "description": "The platform that this source connects to", - "type": "string" - }, - "platform_instance": { - "title": "Platform Instance", - "description": "The instance of the platform that all assets produced by this recipe belong to", - "type": "string" - }, - "stateful_ingestion": { - "$ref": "#/definitions/SQLAlchemyStatefulIngestionConfig" - }, - "options": { - "title": "Options", - "default": {}, - "type": "object" - }, - "schema_pattern": { - "title": "Schema Pattern", - "description": "regex patterns for schemas to filter in ingestion.", - "default": { - "allow": [ - ".*" - ], - "deny": [], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "allOf": [ - { - "$ref": "#/definitions/AllowDenyPattern" - } - ] - }, - "table_pattern": { - "title": "Table Pattern", - "description": "regex patterns for tables to filter in ingestion.", - "default": { - "allow": [ - ".*" - ], - "deny": [], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "allOf": [ - { - "$ref": "#/definitions/AllowDenyPattern" - } - ] - }, - "view_pattern": { - "title": "View Pattern", - "description": "regex patterns for views to filter in ingestion.", - "default": { - "allow": [ - ".*" - ], - "deny": [], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "allOf": [ - { - "$ref": "#/definitions/AllowDenyPattern" - } - ] - }, - "profile_pattern": { - "title": "Profile Pattern", - "description": "regex patterns for profiles to filter in ingestion, allowed by the `table_pattern`.", - "default": { - "allow": [ - ".*" - ], - "deny": [], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "allOf": [ - { - "$ref": "#/definitions/AllowDenyPattern" - } - ] - }, - "domain": { - "title": "Domain", - "description": " regex patterns for tables/schemas to descide domain_key domain key (domain_key can be any string like \"sales\".) There can be multiple domain key specified.", - "default": {}, - "type": "object", - "additionalProperties": { - "$ref": "#/definitions/AllowDenyPattern" - } - }, - "include_views": { - "title": "Include Views", - "description": "Whether views should be ingested.", - "default": true, - "type": "boolean" - }, - "include_tables": { - "title": "Include Tables", - "description": "Whether tables should be ingested.", - "default": true, - "type": "boolean" - }, - "profiling": { - "title": "Profiling", - "default": { - "enabled": false, - "limit": null, - "offset": null, - "report_dropped_profiles": false, - "turn_off_expensive_profiling_metrics": false, - "profile_table_level_only": false, - "include_field_null_count": true, - "include_field_min_value": true, - "include_field_max_value": true, - "include_field_mean_value": true, - "include_field_median_value": true, - "include_field_stddev_value": true, - "include_field_quantiles": false, - "include_field_distinct_value_frequencies": false, - "include_field_histogram": false, - "include_field_sample_values": true, - "allow_deny_patterns": { - "allow": [ - ".*" - ], - "deny": [], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "max_number_of_fields_to_profile": null, - "profile_if_updated_since_days": 1, - "max_workers": 50, - "query_combiner_enabled": true, - "catch_exceptions": true, - "partition_profiling_enabled": true, - "bigquery_temp_table_schema": null, - "partition_datetime": null - }, - "allOf": [ - { - "$ref": "#/definitions/GEProfilingConfig" - } - ] - }, - "username": { - "title": "Username", - "description": "username", - "type": "string" - }, - "password": { - "title": "Password", - "description": "password", - "type": "string", - "writeOnly": true, - "format": "password" - }, - "host_port": { - "title": "Host Port", - "description": "ClickHouse host URL.", - "default": "localhost:8123", - "type": "string" - }, - "database": { - "title": "Database", - "description": "database (catalog)", - "type": "string" - }, - "database_alias": { - "title": "Database Alias", - "description": "Alias to apply to database when ingesting.", - "type": "string" - }, - "scheme": { - "title": "Scheme", - "default": "clickhouse", - "type": "string" - }, - "sqlalchemy_uri": { - "title": "Sqlalchemy Uri", - "description": "URI of database to connect to. See https://docs.sqlalchemy.org/en/14/core/engines.html#database-urls. Takes precedence over other connection parameters.", - "type": "string" - }, - "secure": { - "title": "Secure", - "type": "boolean" - }, - "protocol": { - "title": "Protocol", - "type": "string" - }, - "include_table_lineage": { - "title": "Include Table Lineage", - "description": "Whether table lineage should be ingested.", - "default": true, - "type": "boolean" - }, - "include_materialized_views": { - "title": "Include Materialized Views", - "default": true, - "type": "boolean" - }, - "email_domain": { - "title": "Email Domain", - "type": "string" - }, - "query_log_table": { - "title": "Query Log Table", - "default": "system.query_log", - "type": "string" - } - }, - "required": [ - "email_domain" - ], - "additionalProperties": false - }, - "mongodb": { - "type": "object", - "properties": { - "type": { - "type": "string", - "enum": [ - "mongodb" - ] - }, - "config": { - "$ref": "#/definitions/mongodb_config" - } - }, - "required": [ - "type", - "config" - ] - }, - "mongodb_config": { - "title": "MongoDBConfig", - "description": "Any source that produces dataset urns in a single environment should inherit this class", - "type": "object", - "properties": { - "env": { - "title": "Env", - "description": "The environment that all assets produced by this connector belong to", - "default": "PROD", - "type": "string" - }, - "connect_uri": { - "title": "Connect Uri", - "description": "MongoDB connection URI.", - "default": "mongodb://localhost", - "type": "string" - }, - "username": { - "title": "Username", - "description": "MongoDB username.", - "type": "string" - }, - "password": { - "title": "Password", - "description": "MongoDB password.", - "type": "string" - }, - "authMechanism": { - "title": "Authmechanism", - "description": "MongoDB authentication mechanism.", - "type": "string" - }, - "options": { - "title": "Options", - "description": "Additional options to pass to `pymongo.MongoClient()`.", - "default": {}, - "type": "object" - }, - "enableSchemaInference": { - "title": "Enableschemainference", - "description": "Whether to infer schemas. ", - "default": true, - "type": "boolean" - }, - "schemaSamplingSize": { - "title": "Schemasamplingsize", - "description": "Number of documents to use when inferring schema size. If set to `0`, all documents will be scanned.", - "default": 1000, - "exclusiveMinimum": 0, - "type": "integer" - }, - "useRandomSampling": { - "title": "Userandomsampling", - "description": "If documents for schema inference should be randomly selected. If `False`, documents will be selected from start.", - "default": true, - "type": "boolean" - }, - "maxSchemaSize": { - "title": "Maxschemasize", - "description": "Maximum number of fields to include in the schema.", - "default": 300, - "exclusiveMinimum": 0, - "type": "integer" - }, - "maxDocumentSize": { - "title": "Maxdocumentsize", - "default": 16793600, - "exclusiveMinimum": 0, - "type": "integer" - }, - "database_pattern": { - "title": "Database Pattern", - "description": "regex patterns for databases to filter in ingestion.", - "default": { - "allow": [ - ".*" - ], - "deny": [], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "allOf": [ - { - "$ref": "#/definitions/AllowDenyPattern" - } - ] - }, - "collection_pattern": { - "title": "Collection Pattern", - "description": "regex patterns for collections to filter in ingestion.", - "default": { - "allow": [ - ".*" - ], - "deny": [], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "allOf": [ - { - "$ref": "#/definitions/AllowDenyPattern" - } - ] - } - }, - "additionalProperties": false - }, - "nifi": { - "type": "object", - "properties": { - "type": { - "type": "string", - "enum": [ - "nifi" - ] - }, - "config": { - "$ref": "#/definitions/nifi_config" - } - }, - "required": [ - "type", - "config" - ] - }, - "nifi_config": { - "title": "NifiSourceConfig", - "description": "Any source that produces dataset urns in a single environment should inherit this class", - "type": "object", - "properties": { - "env": { - "title": "Env", - "description": "The environment that all assets produced by this connector belong to", - "default": "PROD", - "type": "string" - }, - "site_url": { - "title": "Site Url", - "description": "URI to connect", - "type": "string" - }, - "auth": { - "description": "Nifi authentication. must be one of : NO_AUTH, SINGLE_USER, CLIENT_CERT", - "default": "NO_AUTH", - "allOf": [ - { - "$ref": "#/definitions/NifiAuthType" - } - ] - }, - "provenance_days": { - "title": "Provenance Days", - "description": "time window to analyze provenance events for external datasets", - "default": 7, - "type": "integer" - }, - "process_group_pattern": { - "title": "Process Group Pattern", - "description": "regex patterns for filtering process groups", - "default": { - "allow": [ - ".*" - ], - "deny": [], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "allOf": [ - { - "$ref": "#/definitions/AllowDenyPattern" - } - ] - }, - "site_name": { - "title": "Site Name", - "description": "Site name to identify this site with, useful when using input and output ports receiving remote connections", - "default": "default", - "type": "string" - }, - "site_url_to_site_name": { - "title": "Site Url To Site Name", - "description": "Lookup to find site_name for site_url, required if using remote process groups in nifi flow", - "default": {}, - "type": "object", - "additionalProperties": { - "type": "string" - } - }, - "username": { - "title": "Username", - "description": "Nifi username, must be set for auth = \"SINGLE_USER\"", - "type": "string" - }, - "password": { - "title": "Password", - "description": "Nifi password, must be set for auth = \"SINGLE_USER\"", - "type": "string" - }, - "client_cert_file": { - "title": "Client Cert File", - "description": "Path to PEM file containing the public certificates for the user/client identity, must be set for auth = \"CLIENT_CERT\"", - "type": "string" - }, - "client_key_file": { - "title": "Client Key File", - "description": "Path to PEM file containing the client\u2019s secret key", - "type": "string" - }, - "client_key_password": { - "title": "Client Key Password", - "description": "The password to decrypt the client_key_file", - "type": "string" - }, - "ca_file": { - "title": "Ca File", - "description": "Path to PEM file containing certs for the root CA(s) for the NiFi", - "type": "string" - } - }, - "required": [ - "site_url" - ], - "additionalProperties": false - }, - "clickhouse": { - "type": "object", - "properties": { - "type": { - "type": "string", - "enum": [ - "clickhouse" - ] - }, - "config": { - "$ref": "#/definitions/clickhouse_config" - } - }, - "required": [ - "type", - "config" - ] - }, - "clickhouse_config": { - "title": "ClickHouseConfig", - "description": "Base configuration class for stateful ingestion for source configs to inherit from.", - "type": "object", - "properties": { - "env": { - "title": "Env", - "description": "The environment that all assets produced by this connector belong to", - "default": "PROD", - "type": "string" - }, - "platform_instance_map": { - "title": "Platform Instance Map", - "description": "A holder for platform -> platform_instance mappings to generate correct dataset urns", - "type": "object", - "additionalProperties": { - "type": "string" - } - }, - "bucket_duration": { - "description": "Size of the time window to aggregate usage stats.", - "default": "DAY", - "allOf": [ - { - "$ref": "#/definitions/BucketDuration" - } - ] - }, - "end_time": { - "title": "End Time", - "description": "Latest date of usage to consider. Default: Last full day in UTC (or hour, depending on `bucket_duration`)", - "type": "string", - "format": "date-time" - }, - "start_time": { - "title": "Start Time", - "description": "Earliest date of usage to consider. Default: Last full day in UTC (or hour, depending on `bucket_duration`)", - "type": "string", - "format": "date-time" - }, - "platform": { - "title": "Platform", - "description": "The platform that this source connects to", - "type": "string" - }, - "platform_instance": { - "title": "Platform Instance", - "description": "The instance of the platform that all assets produced by this recipe belong to", - "type": "string" - }, - "stateful_ingestion": { - "$ref": "#/definitions/SQLAlchemyStatefulIngestionConfig" - }, - "options": { - "title": "Options", - "default": {}, - "type": "object" - }, - "schema_pattern": { - "title": "Schema Pattern", - "description": "regex patterns for schemas to filter in ingestion.", - "default": { - "allow": [ - ".*" - ], - "deny": [], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "allOf": [ - { - "$ref": "#/definitions/AllowDenyPattern" - } - ] - }, - "table_pattern": { - "title": "Table Pattern", - "description": "regex patterns for tables to filter in ingestion.", - "default": { - "allow": [ - ".*" - ], - "deny": [], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "allOf": [ - { - "$ref": "#/definitions/AllowDenyPattern" - } - ] - }, - "view_pattern": { - "title": "View Pattern", - "description": "regex patterns for views to filter in ingestion.", - "default": { - "allow": [ - ".*" - ], - "deny": [], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "allOf": [ - { - "$ref": "#/definitions/AllowDenyPattern" - } - ] - }, - "profile_pattern": { - "title": "Profile Pattern", - "description": "regex patterns for profiles to filter in ingestion, allowed by the `table_pattern`.", - "default": { - "allow": [ - ".*" - ], - "deny": [], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "allOf": [ - { - "$ref": "#/definitions/AllowDenyPattern" - } - ] - }, - "domain": { - "title": "Domain", - "description": " regex patterns for tables/schemas to descide domain_key domain key (domain_key can be any string like \"sales\".) There can be multiple domain key specified.", - "default": {}, - "type": "object", - "additionalProperties": { - "$ref": "#/definitions/AllowDenyPattern" - } - }, - "include_views": { - "title": "Include Views", - "description": "Whether views should be ingested.", - "default": true, - "type": "boolean" - }, - "include_tables": { - "title": "Include Tables", - "description": "Whether tables should be ingested.", - "default": true, - "type": "boolean" - }, - "profiling": { - "title": "Profiling", - "default": { - "enabled": false, - "limit": null, - "offset": null, - "report_dropped_profiles": false, - "turn_off_expensive_profiling_metrics": false, - "profile_table_level_only": false, - "include_field_null_count": true, - "include_field_min_value": true, - "include_field_max_value": true, - "include_field_mean_value": true, - "include_field_median_value": true, - "include_field_stddev_value": true, - "include_field_quantiles": false, - "include_field_distinct_value_frequencies": false, - "include_field_histogram": false, - "include_field_sample_values": true, - "allow_deny_patterns": { - "allow": [ - ".*" - ], - "deny": [], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "max_number_of_fields_to_profile": null, - "profile_if_updated_since_days": 1, - "max_workers": 50, - "query_combiner_enabled": true, - "catch_exceptions": true, - "partition_profiling_enabled": true, - "bigquery_temp_table_schema": null, - "partition_datetime": null - }, - "allOf": [ - { - "$ref": "#/definitions/GEProfilingConfig" - } - ] - }, - "username": { - "title": "Username", - "description": "username", - "type": "string" - }, - "password": { - "title": "Password", - "description": "password", - "type": "string", - "writeOnly": true, - "format": "password" - }, - "host_port": { - "title": "Host Port", - "description": "ClickHouse host URL.", - "default": "localhost:8123", - "type": "string" - }, - "database": { - "title": "Database", - "description": "database (catalog)", - "type": "string" - }, - "database_alias": { - "title": "Database Alias", - "description": "Alias to apply to database when ingesting.", - "type": "string" - }, - "scheme": { - "title": "Scheme", - "default": "clickhouse", - "type": "string" - }, - "sqlalchemy_uri": { - "title": "Sqlalchemy Uri", - "description": "URI of database to connect to. See https://docs.sqlalchemy.org/en/14/core/engines.html#database-urls. Takes precedence over other connection parameters.", - "type": "string" - }, - "secure": { - "title": "Secure", - "type": "boolean" - }, - "protocol": { - "title": "Protocol", - "type": "string" - }, - "include_table_lineage": { - "title": "Include Table Lineage", - "description": "Whether table lineage should be ingested.", - "default": true, - "type": "boolean" - }, - "include_materialized_views": { - "title": "Include Materialized Views", - "default": true, - "type": "boolean" - } - }, - "additionalProperties": false - }, - "dbt": { - "type": "object", - "properties": { - "type": { - "type": "string", - "enum": [ - "dbt" - ] - }, - "config": { - "$ref": "#/definitions/dbt_config" - } - }, - "required": [ - "type", - "config" - ] - }, - "dbt_config": { - "title": "DBTConfig", - "description": "Base configuration class for stateful ingestion for source configs to inherit from.", - "type": "object", - "properties": { - "env": { - "title": "Env", - "description": "Environment to use in namespace when constructing URNs.", - "default": "PROD", - "type": "string" - }, - "platform": { - "title": "Platform", - "description": "The platform that this source connects to", - "type": "string" - }, - "platform_instance": { - "title": "Platform Instance", - "description": "The instance of the platform that all assets produced by this recipe belong to", - "type": "string" - }, - "stateful_ingestion": { - "$ref": "#/definitions/DBTStatefulIngestionConfig" - }, - "manifest_path": { - "title": "Manifest Path", - "description": "Path to dbt manifest JSON. See https://docs.getdbt.com/reference/artifacts/manifest-json Note this can be a local file or a URI.", - "type": "string" - }, - "catalog_path": { - "title": "Catalog Path", - "description": "Path to dbt catalog JSON. See https://docs.getdbt.com/reference/artifacts/catalog-json Note this can be a local file or a URI.", - "type": "string" - }, - "sources_path": { - "title": "Sources Path", - "description": "Path to dbt sources JSON. See https://docs.getdbt.com/reference/artifacts/sources-json. If not specified, last-modified fields will not be populated. Note this can be a local file or a URI.", - "type": "string" - }, - "test_results_path": { - "title": "Test Results Path", - "description": "Path to output of dbt test run as run_results file in JSON format. See https://docs.getdbt.com/reference/artifacts/run-results-json. If not specified, test execution results will not be populated in DataHub.", - "type": "string" - }, - "target_platform": { - "title": "Target Platform", - "description": "The platform that dbt is loading onto. (e.g. bigquery / redshift / postgres etc.)", - "type": "string" - }, - "target_platform_instance": { - "title": "Target Platform Instance", - "description": "The platform instance for the platform that dbt is operating on. Use this if you have multiple instances of the same platform (e.g. redshift) and need to distinguish between them.", - "type": "string" - }, - "load_schemas": { - "title": "Load Schemas", - "description": "This flag is only consulted when disable_dbt_node_creation is set to True. Load schemas for target_platform entities from dbt catalog file, not necessary when you are already ingesting this metadata from the data platform directly. If set to False, table schema details (e.g. columns) will not be ingested.", - "default": true, - "type": "boolean" - }, - "use_identifiers": { - "title": "Use Identifiers", - "description": "Use model identifier instead of model name if defined (if not, default to model name).", - "default": false, - "type": "boolean" - }, - "node_type_pattern": { - "title": "Node Type Pattern", - "description": "regex patterns for dbt nodes to filter in ingestion.", - "default": { - "allow": [ - ".*" - ], - "deny": [], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "allOf": [ - { - "$ref": "#/definitions/AllowDenyPattern" - } - ] - }, - "tag_prefix": { - "title": "Tag Prefix", - "description": "Prefix added to tags during ingestion.", - "default": "dbt:", - "type": "string" - }, - "node_name_pattern": { - "title": "Node Name Pattern", - "description": "regex patterns for dbt model names to filter in ingestion.", - "default": { - "allow": [ - ".*" - ], - "deny": [], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "allOf": [ - { - "$ref": "#/definitions/AllowDenyPattern" - } - ] - }, - "meta_mapping": { - "title": "Meta Mapping", - "description": "mapping rules that will be executed against dbt meta properties. Refer to the section below on dbt meta automated mappings.", - "default": {}, - "type": "object" - }, - "query_tag_mapping": { - "title": "Query Tag Mapping", - "description": "mapping rules that will be executed against dbt query_tag meta properties. Refer to the section below on dbt meta automated mappings.", - "default": {}, - "type": "object" - }, - "write_semantics": { - "title": "Write Semantics", - "description": "Whether the new tags, terms and owners to be added will override the existing ones added only by this source or not. Value for this config can be \"PATCH\" or \"OVERRIDE\"", - "default": "PATCH", - "type": "string" - }, - "strip_user_ids_from_email": { - "title": "Strip User Ids From Email", - "description": "Whether or not to strip email id while adding owners using dbt meta actions.", - "default": false, - "type": "boolean" - }, - "owner_extraction_pattern": { - "title": "Owner Extraction Pattern", - "description": "Regex string to extract owner from the dbt node using the `(?P...) syntax` of the [match object](https://docs.python.org/3/library/re.html#match-objects), where the group name must be `owner`. Examples: (1)`r\"(?P(.*)): (\\w+) (\\w+)\"` will extract `jdoe` as the owner from `\"jdoe: John Doe\"` (2) `r\"@(?P(.*))\"` will extract `alice` as the owner from `\"@alice\"`.", - "type": "string" - }, - "aws_connection": { - "title": "Aws Connection", - "description": "When fetching manifest files from s3, configuration for aws connection details", - "allOf": [ - { - "$ref": "#/definitions/AwsConnectionConfig" - } - ] - }, - "delete_tests_as_datasets": { - "title": "Delete Tests As Datasets", - "description": "Prior to version 0.8.38, dbt tests were represented as datasets. If you ingested dbt tests before, set this flag to True (just needed once) to soft-delete tests that were generated as datasets by previous ingestion.", - "default": false, - "type": "boolean" - }, - "disable_dbt_node_creation": { - "title": "Disable Dbt Node Creation", - "description": "Whether to suppress dbt dataset metadata creation. When set to True, this flag applies the dbt metadata to the target_platform entities (e.g. populating schema and column descriptions from dbt into the postgres / bigquery table metadata in DataHub) and generates lineage between the platform entities.", - "default": false, - "type": "boolean" - }, - "enable_meta_mapping": { - "title": "Enable Meta Mapping", - "description": "When enabled, applies the mappings that are defined through the meta_mapping directives.", - "default": true, - "type": "boolean" - }, - "enable_query_tag_mapping": { - "title": "Enable Query Tag Mapping", - "description": "When enabled, applies the mappings that are defined through the `query_tag_mapping` directives.", - "default": true, - "type": "boolean" - } - }, - "required": [ - "manifest_path", - "catalog_path", - "target_platform" - ], - "additionalProperties": false - }, - "mysql": { - "type": "object", - "properties": { - "type": { - "type": "string", - "enum": [ - "mysql" - ] - }, - "config": { - "$ref": "#/definitions/mysql_config" - } - }, - "required": [ - "type", - "config" - ] - }, - "mysql_config": { - "title": "MySQLConfig", - "description": "Base configuration class for stateful ingestion for source configs to inherit from.", - "type": "object", - "properties": { - "env": { - "title": "Env", - "description": "The environment that all assets produced by this connector belong to", - "default": "PROD", - "type": "string" - }, - "platform": { - "title": "Platform", - "description": "The platform that this source connects to", - "type": "string" - }, - "platform_instance": { - "title": "Platform Instance", - "description": "The instance of the platform that all assets produced by this recipe belong to", - "type": "string" - }, - "stateful_ingestion": { - "$ref": "#/definitions/SQLAlchemyStatefulIngestionConfig" - }, - "options": { - "title": "Options", - "default": {}, - "type": "object" - }, - "schema_pattern": { - "title": "Schema Pattern", - "description": "regex patterns for schemas to filter in ingestion.", - "default": { - "allow": [ - ".*" - ], - "deny": [], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "allOf": [ - { - "$ref": "#/definitions/AllowDenyPattern" - } - ] - }, - "table_pattern": { - "title": "Table Pattern", - "description": "regex patterns for tables to filter in ingestion.", - "default": { - "allow": [ - ".*" - ], - "deny": [], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "allOf": [ - { - "$ref": "#/definitions/AllowDenyPattern" - } - ] - }, - "view_pattern": { - "title": "View Pattern", - "description": "regex patterns for views to filter in ingestion.", - "default": { - "allow": [ - ".*" - ], - "deny": [], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "allOf": [ - { - "$ref": "#/definitions/AllowDenyPattern" - } - ] - }, - "profile_pattern": { - "title": "Profile Pattern", - "description": "regex patterns for profiles to filter in ingestion, allowed by the `table_pattern`.", - "default": { - "allow": [ - ".*" - ], - "deny": [], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "allOf": [ - { - "$ref": "#/definitions/AllowDenyPattern" - } - ] - }, - "domain": { - "title": "Domain", - "description": " regex patterns for tables/schemas to descide domain_key domain key (domain_key can be any string like \"sales\".) There can be multiple domain key specified.", - "default": {}, - "type": "object", - "additionalProperties": { - "$ref": "#/definitions/AllowDenyPattern" - } - }, - "include_views": { - "title": "Include Views", - "description": "Whether views should be ingested.", - "default": true, - "type": "boolean" - }, - "include_tables": { - "title": "Include Tables", - "description": "Whether tables should be ingested.", - "default": true, - "type": "boolean" - }, - "profiling": { - "title": "Profiling", - "default": { - "enabled": false, - "limit": null, - "offset": null, - "report_dropped_profiles": false, - "turn_off_expensive_profiling_metrics": false, - "profile_table_level_only": false, - "include_field_null_count": true, - "include_field_min_value": true, - "include_field_max_value": true, - "include_field_mean_value": true, - "include_field_median_value": true, - "include_field_stddev_value": true, - "include_field_quantiles": false, - "include_field_distinct_value_frequencies": false, - "include_field_histogram": false, - "include_field_sample_values": true, - "allow_deny_patterns": { - "allow": [ - ".*" - ], - "deny": [], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "max_number_of_fields_to_profile": null, - "profile_if_updated_since_days": 1, - "max_workers": 50, - "query_combiner_enabled": true, - "catch_exceptions": true, - "partition_profiling_enabled": true, - "bigquery_temp_table_schema": null, - "partition_datetime": null - }, - "allOf": [ - { - "$ref": "#/definitions/GEProfilingConfig" - } - ] - }, - "username": { - "title": "Username", - "description": "username", - "type": "string" - }, - "password": { - "title": "Password", - "description": "password", - "type": "string", - "writeOnly": true, - "format": "password" - }, - "host_port": { - "title": "Host Port", - "description": "MySQL host URL.", - "default": "localhost:3306", - "type": "string" - }, - "database": { - "title": "Database", - "description": "database (catalog)", - "type": "string" - }, - "database_alias": { - "title": "Database Alias", - "description": "Alias to apply to database when ingesting.", - "type": "string" - }, - "scheme": { - "title": "Scheme", - "default": "mysql+pymysql", - "type": "string" - }, - "sqlalchemy_uri": { - "title": "Sqlalchemy Uri", - "description": "URI of database to connect to. See https://docs.sqlalchemy.org/en/14/core/engines.html#database-urls. Takes precedence over other connection parameters.", - "type": "string" - } - }, - "additionalProperties": false - }, - "trino": { - "type": "object", - "properties": { - "type": { - "type": "string", - "enum": [ - "trino" - ] - }, - "config": { - "$ref": "#/definitions/trino_config" - } - }, - "required": [ - "type", - "config" - ] - }, - "trino_config": { - "title": "TrinoConfig", - "description": "Base configuration class for stateful ingestion for source configs to inherit from.", - "type": "object", - "properties": { - "env": { - "title": "Env", - "description": "The environment that all assets produced by this connector belong to", - "default": "PROD", - "type": "string" - }, - "platform": { - "title": "Platform", - "description": "The platform that this source connects to", - "type": "string" - }, - "platform_instance": { - "title": "Platform Instance", - "description": "The instance of the platform that all assets produced by this recipe belong to", - "type": "string" - }, - "stateful_ingestion": { - "$ref": "#/definitions/SQLAlchemyStatefulIngestionConfig" - }, - "options": { - "title": "Options", - "default": {}, - "type": "object" - }, - "schema_pattern": { - "title": "Schema Pattern", - "description": "regex patterns for schemas to filter in ingestion.", - "default": { - "allow": [ - ".*" - ], - "deny": [], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "allOf": [ - { - "$ref": "#/definitions/AllowDenyPattern" - } - ] - }, - "table_pattern": { - "title": "Table Pattern", - "description": "regex patterns for tables to filter in ingestion.", - "default": { - "allow": [ - ".*" - ], - "deny": [], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "allOf": [ - { - "$ref": "#/definitions/AllowDenyPattern" - } - ] - }, - "view_pattern": { - "title": "View Pattern", - "description": "regex patterns for views to filter in ingestion.", - "default": { - "allow": [ - ".*" - ], - "deny": [], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "allOf": [ - { - "$ref": "#/definitions/AllowDenyPattern" - } - ] - }, - "profile_pattern": { - "title": "Profile Pattern", - "description": "regex patterns for profiles to filter in ingestion, allowed by the `table_pattern`.", - "default": { - "allow": [ - ".*" - ], - "deny": [], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "allOf": [ - { - "$ref": "#/definitions/AllowDenyPattern" - } - ] - }, - "domain": { - "title": "Domain", - "description": " regex patterns for tables/schemas to descide domain_key domain key (domain_key can be any string like \"sales\".) There can be multiple domain key specified.", - "default": {}, - "type": "object", - "additionalProperties": { - "$ref": "#/definitions/AllowDenyPattern" - } - }, - "include_views": { - "title": "Include Views", - "description": "Whether views should be ingested.", - "default": true, - "type": "boolean" - }, - "include_tables": { - "title": "Include Tables", - "description": "Whether tables should be ingested.", - "default": true, - "type": "boolean" - }, - "profiling": { - "title": "Profiling", - "default": { - "enabled": false, - "limit": null, - "offset": null, - "report_dropped_profiles": false, - "turn_off_expensive_profiling_metrics": false, - "profile_table_level_only": false, - "include_field_null_count": true, - "include_field_min_value": true, - "include_field_max_value": true, - "include_field_mean_value": true, - "include_field_median_value": true, - "include_field_stddev_value": true, - "include_field_quantiles": false, - "include_field_distinct_value_frequencies": false, - "include_field_histogram": false, - "include_field_sample_values": true, - "allow_deny_patterns": { - "allow": [ - ".*" - ], - "deny": [], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "max_number_of_fields_to_profile": null, - "profile_if_updated_since_days": 1, - "max_workers": 50, - "query_combiner_enabled": true, - "catch_exceptions": true, - "partition_profiling_enabled": true, - "bigquery_temp_table_schema": null, - "partition_datetime": null - }, - "allOf": [ - { - "$ref": "#/definitions/GEProfilingConfig" - } - ] - }, - "username": { - "title": "Username", - "description": "username", - "type": "string" - }, - "password": { - "title": "Password", - "description": "password", - "type": "string", - "writeOnly": true, - "format": "password" - }, - "host_port": { - "title": "Host Port", - "description": "host URL", - "type": "string" - }, - "database": { - "title": "Database", - "description": "database (catalog)", - "type": "string" - }, - "database_alias": { - "title": "Database Alias", - "description": "Alias to apply to database when ingesting.", - "type": "string" - }, - "scheme": { - "title": "Scheme", - "default": "trino", - "type": "string" - }, - "sqlalchemy_uri": { - "title": "Sqlalchemy Uri", - "description": "URI of database to connect to. See https://docs.sqlalchemy.org/en/14/core/engines.html#database-urls. Takes precedence over other connection parameters.", - "type": "string" - } - }, - "required": [ - "host_port" - ], - "additionalProperties": false - }, - "okta": { - "type": "object", - "properties": { - "type": { - "type": "string", - "enum": [ - "okta" - ] - }, - "config": { - "$ref": "#/definitions/okta_config" - } - }, - "required": [ - "type", - "config" - ] - }, - "okta_config": { - "title": "OktaConfig", - "type": "object", - "properties": { - "okta_domain": { - "title": "Okta Domain", - "description": "The location of your Okta Domain, without a protocol. Can be found in Okta Developer console.", - "default": "dev-33231928.okta.com", - "type": "string" - }, - "okta_api_token": { - "title": "Okta Api Token", - "description": "An API token generated for the DataHub application inside your Okta Developer Console.", - "default": "00be4R_M2MzDqXawbWgfKGpKee0kuEOfX1RCQSRx00", - "type": "string" - }, - "ingest_users": { - "title": "Ingest Users", - "description": "Whether users should be ingested into DataHub.", - "default": true, - "type": "boolean" - }, - "ingest_groups": { - "title": "Ingest Groups", - "description": "Whether groups should be ingested into DataHub.", - "default": true, - "type": "boolean" - }, - "ingest_group_membership": { - "title": "Ingest Group Membership", - "description": "Whether group membership should be ingested into DataHub. ingest_groups must be True if this is True.", - "default": true, - "type": "boolean" - }, - "okta_profile_to_username_attr": { - "title": "Okta Profile To Username Attr", - "description": "Which Okta User Profile attribute to use as input to DataHub username mapping.", - "default": "login", - "type": "string" - }, - "okta_profile_to_username_regex": { - "title": "Okta Profile To Username Regex", - "description": "A regex used to parse the DataHub username from the attribute specified in `okta_profile_to_username_attr`.", - "default": "([^@]+)", - "type": "string" - }, - "okta_profile_to_group_name_attr": { - "title": "Okta Profile To Group Name Attr", - "description": "Which Okta Group Profile attribute to use as input to DataHub group name mapping.", - "default": "name", - "type": "string" - }, - "okta_profile_to_group_name_regex": { - "title": "Okta Profile To Group Name Regex", - "description": "A regex used to parse the DataHub group name from the attribute specified in `okta_profile_to_group_name_attr`.", - "default": "(.*)", - "type": "string" - }, - "include_deprovisioned_users": { - "title": "Include Deprovisioned Users", - "description": "Whether to ingest users in the DEPROVISIONED state from Okta.", - "default": false, - "type": "boolean" - }, - "include_suspended_users": { - "title": "Include Suspended Users", - "description": "Whether to ingest users in the SUSPENDED state from Okta.", - "default": false, - "type": "boolean" - }, - "page_size": { - "title": "Page Size", - "description": "The number of entities requested from Okta's REST APIs in one request.", - "default": 100, - "type": "integer" - }, - "delay_seconds": { - "title": "Delay Seconds", - "description": "Number of seconds to wait between calls to Okta's REST APIs. (Okta rate limits). Defaults to 10ms.", - "default": 0.01, - "anyOf": [ - { - "type": "number" - }, - { - "type": "integer" - } - ] - }, - "okta_users_filter": { - "title": "Okta Users Filter", - "description": "Okta filter expression (not regex) for ingesting users. Only one of `okta_users_filter` and `okta_users_search` can be set. See (https://developer.okta.com/docs/reference/api/users/#list-users-with-a-filter) for more info.", - "type": "string" - }, - "okta_users_search": { - "title": "Okta Users Search", - "description": "Okta search expression (not regex) for ingesting users. Only one of `okta_users_filter` and `okta_users_search` can be set. See (https://developer.okta.com/docs/reference/api/users/#list-users-with-search) for more info.", - "type": "string" - }, - "okta_groups_filter": { - "title": "Okta Groups Filter", - "description": "Okta filter expression (not regex) for ingesting groups. Only one of `okta_groups_filter` and `okta_groups_search` can be set. See (https://developer.okta.com/docs/reference/api/groups/#filters) for more info.", - "type": "string" - }, - "okta_groups_search": { - "title": "Okta Groups Search", - "description": "Okta search expression (not regex) for ingesting groups. Only one of `okta_groups_filter` and `okta_groups_search` can be set. See (https://developer.okta.com/docs/reference/api/groups/#list-groups-with-search) for more info.", - "type": "string" - }, - "mask_group_id": { - "title": "Mask Group Id", - "default": true, - "type": "boolean" - }, - "mask_user_id": { - "title": "Mask User Id", - "default": true, - "type": "boolean" - } - }, - "additionalProperties": false - }, - "AllowDenyPattern": { - "title": "AllowDenyPattern", - "description": "A class to store allow deny regexes", - "type": "object", - "properties": { - "allow": { - "title": "Allow", - "description": "List of regex patterns for process groups to include in ingestion", - "default": [ - ".*" - ], - "type": "array", - "items": { - "type": "string" - } - }, - "deny": { - "title": "Deny", - "description": "List of regex patterns for process groups to exclude from ingestion.", - "default": [], - "type": "array", - "items": { - "type": "string" - } - }, - "ignoreCase": { - "title": "Ignorecase", - "description": "Whether to ignore case sensitivity during pattern matching.", - "default": true, - "type": "boolean" - }, - "alphabet": { - "title": "Alphabet", - "description": "Allowed alphabets pattern", - "default": "[A-Za-z0-9 _.-]", - "type": "string" - } - }, - "additionalProperties": false - }, - "AdlsSourceConfig": { - "title": "AdlsSourceConfig", - "description": "Common Azure credentials config.\n\nhttps://docs.microsoft.com/en-us/azure/storage/blobs/data-lake-storage-directory-file-acl-python", - "type": "object", - "properties": { - "base_path": { - "title": "Base Path", - "description": "Base folder in hierarchical namespaces to start from.", - "default": "/", - "type": "string" - }, - "container_name": { - "title": "Container Name", - "description": "Azure storage account container name.", - "type": "string" - }, - "account_name": { - "title": "Account Name", - "description": "Name of the Azure storage account. See [Microsoft official documentation on how to create a storage account.](https://docs.microsoft.com/en-us/azure/storage/blobs/create-data-lake-storage-account)", - "type": "string" - }, - "account_key": { - "title": "Account Key", - "description": "Azure storage account access key that can be used as a credential. **An account key, a SAS token or a client secret is required for authentication.**", - "type": "string" - }, - "sas_token": { - "title": "Sas Token", - "description": "Azure storage account Shared Access Signature (SAS) token that can be used as a credential. **An account key, a SAS token or a client secret is required for authentication.**", - "type": "string" - }, - "client_secret": { - "title": "Client Secret", - "description": "Azure client secret that can be used as a credential. **An account key, a SAS token or a client secret is required for authentication.**", - "type": "string" - }, - "client_id": { - "title": "Client Id", - "description": "Azure client (Application) ID required when a `client_secret` is used as a credential.", - "type": "string" - }, - "tenant_id": { - "title": "Tenant Id", - "description": "Azure tenant (Directory) ID required when a `client_secret` is used as a credential.", - "type": "string" - } - }, - "required": [ - "container_name", - "account_name" - ], - "additionalProperties": false - }, - "IcebergProfilingConfig": { - "title": "IcebergProfilingConfig", - "type": "object", - "properties": { - "enabled": { - "title": "Enabled", - "description": "Whether profiling should be done.", - "default": false, - "type": "boolean" - }, - "include_field_null_count": { - "title": "Include Field Null Count", - "description": "Whether to profile for the number of nulls for each column.", - "default": true, - "type": "boolean" - }, - "include_field_min_value": { - "title": "Include Field Min Value", - "description": "Whether to profile for the min value of numeric columns.", - "default": true, - "type": "boolean" - }, - "include_field_max_value": { - "title": "Include Field Max Value", - "description": "Whether to profile for the max value of numeric columns.", - "default": true, - "type": "boolean" - } - }, - "additionalProperties": false - }, - "DynamicTypedStateProviderConfig": { - "title": "DynamicTypedStateProviderConfig", - "type": "object", - "properties": { - "type": { - "title": "Type", - "description": "The type of the state provider to use. For DataHub use `datahub`", - "type": "string" - }, - "config": { - "title": "Config", - "description": "The configuration required for initializing the state provider. Default: The datahub_api config if set at pipeline level. Otherwise, the default DatahubClientConfig. See the defaults (https://github.com/datahub-project/datahub/blob/master/metadata-ingestion/src/datahub/ingestion/graph/client.py#L19)." - } - }, - "required": [ - "type" - ], - "additionalProperties": false - }, - "PulsarSourceStatefulIngestionConfig": { - "title": "PulsarSourceStatefulIngestionConfig", - "description": "Specialization of the basic StatefulIngestionConfig to add custom config.\nThis will be used to override the stateful_ingestion config param of StatefulIngestionConfigBase\nin the PulsarSourceConfig.", - "type": "object", - "properties": { - "enabled": { - "title": "Enabled", - "description": "The type of the ingestion state provider registered with datahub.", - "default": false, - "type": "boolean" - }, - "max_checkpoint_state_size": { - "title": "Max Checkpoint State Size", - "description": "The maximum size of the checkpoint state in bytes. Default is 16MB", - "default": 16777216, - "exclusiveMinimum": 0, - "type": "integer" - }, - "state_provider": { - "title": "State Provider", - "description": "The ingestion state provider configuration.", - "allOf": [ - { - "$ref": "#/definitions/DynamicTypedStateProviderConfig" - } - ] - }, - "ignore_old_state": { - "title": "Ignore Old State", - "description": "If set to True, ignores the previous checkpoint state.", - "default": false, - "type": "boolean" - }, - "ignore_new_state": { - "title": "Ignore New State", - "description": "If set to True, ignores the current checkpoint state.", - "default": false, - "type": "boolean" - }, - "remove_stale_metadata": { - "title": "Remove Stale Metadata", - "default": true, - "type": "boolean" - } - }, - "additionalProperties": false - }, - "NamingPattern": { - "title": "NamingPattern", - "type": "object", - "properties": { - "allowed_vars": { - "title": "Allowed Vars", - "type": "array", - "items": { - "type": "string" - } - }, - "pattern": { - "title": "Pattern", - "type": "string" - }, - "variables": { - "title": "Variables", - "type": "array", - "items": { - "type": "string" - } - } - }, - "required": [ - "allowed_vars", - "pattern" - ] - }, - "GitHubInfo": { - "title": "GitHubInfo", - "type": "object", - "properties": { - "repo": { - "title": "Repo", - "description": "Name of your github repo. e.g. repo for https://github.com/datahub-project/datahub is `datahub-project/datahub`.", - "type": "string" - }, - "branch": { - "title": "Branch", - "description": "Branch on which your files live by default. Typically main or master.", - "default": "main", - "type": "string" - }, - "base_url": { - "title": "Base Url", - "description": "Base url for Github", - "default": "https://github.com", - "type": "string" - } - }, - "required": [ - "repo" - ], - "additionalProperties": false - }, - "TransportOptionsConfig": { - "title": "TransportOptionsConfig", - "type": "object", - "properties": { - "timeout": { - "title": "Timeout", - "type": "integer" - }, - "headers": { - "title": "Headers", - "type": "object", - "additionalProperties": { - "type": "string" - } - } - }, - "required": [ - "timeout", - "headers" - ], - "additionalProperties": false - }, - "SQLAlchemyStatefulIngestionConfig": { - "title": "SQLAlchemyStatefulIngestionConfig", - "description": "Specialization of basic StatefulIngestionConfig to adding custom config.\nThis will be used to override the stateful_ingestion config param of StatefulIngestionConfigBase\nin the SQLAlchemyConfig.", - "type": "object", - "properties": { - "enabled": { - "title": "Enabled", - "description": "The type of the ingestion state provider registered with datahub.", - "default": false, - "type": "boolean" - }, - "max_checkpoint_state_size": { - "title": "Max Checkpoint State Size", - "description": "The maximum size of the checkpoint state in bytes. Default is 16MB", - "default": 16777216, - "exclusiveMinimum": 0, - "type": "integer" - }, - "state_provider": { - "title": "State Provider", - "description": "The ingestion state provider configuration.", - "allOf": [ - { - "$ref": "#/definitions/DynamicTypedStateProviderConfig" - } - ] - }, - "ignore_old_state": { - "title": "Ignore Old State", - "description": "If set to True, ignores the previous checkpoint state.", - "default": false, - "type": "boolean" - }, - "ignore_new_state": { - "title": "Ignore New State", - "description": "If set to True, ignores the current checkpoint state.", - "default": false, - "type": "boolean" - }, - "remove_stale_metadata": { - "title": "Remove Stale Metadata", - "description": "Soft-deletes the tables and views that were found in the last successful run but missing in the current run with stateful_ingestion enabled.", - "default": true, - "type": "boolean" - } - }, - "additionalProperties": false - }, - "GEProfilingConfig": { - "title": "GEProfilingConfig", - "type": "object", - "properties": { - "enabled": { - "title": "Enabled", - "description": "Whether profiling should be done.", - "default": false, - "type": "boolean" - }, - "limit": { - "title": "Limit", - "description": "Max number of documents to profile. By default, profiles all documents.", - "type": "integer" - }, - "offset": { - "title": "Offset", - "description": "Offset in documents to profile. By default, uses no offset.", - "type": "integer" - }, - "report_dropped_profiles": { - "title": "Report Dropped Profiles", - "description": "If datasets which were not profiled are reported in source report or not. Set to `True` for debugging purposes.", - "default": false, - "type": "boolean" - }, - "turn_off_expensive_profiling_metrics": { - "title": "Turn Off Expensive Profiling Metrics", - "description": "Whether to turn off expensive profiling or not. This turns off profiling for quantiles, distinct_value_frequencies, histogram & sample_values. This also limits maximum number of fields being profiled to 10.", - "default": false, - "type": "boolean" - }, - "profile_table_level_only": { - "title": "Profile Table Level Only", - "description": "Whether to perform profiling at table-level only, or include column-level profiling as well.", - "default": false, - "type": "boolean" - }, - "include_field_null_count": { - "title": "Include Field Null Count", - "description": "Whether to profile for the number of nulls for each column.", - "default": true, - "type": "boolean" - }, - "include_field_min_value": { - "title": "Include Field Min Value", - "description": "Whether to profile for the min value of numeric columns.", - "default": true, - "type": "boolean" - }, - "include_field_max_value": { - "title": "Include Field Max Value", - "description": "Whether to profile for the max value of numeric columns.", - "default": true, - "type": "boolean" - }, - "include_field_mean_value": { - "title": "Include Field Mean Value", - "description": "Whether to profile for the mean value of numeric columns.", - "default": true, - "type": "boolean" - }, - "include_field_median_value": { - "title": "Include Field Median Value", - "description": "Whether to profile for the median value of numeric columns.", - "default": true, - "type": "boolean" - }, - "include_field_stddev_value": { - "title": "Include Field Stddev Value", - "description": "Whether to profile for the standard deviation of numeric columns.", - "default": true, - "type": "boolean" - }, - "include_field_quantiles": { - "title": "Include Field Quantiles", - "description": "Whether to profile for the quantiles of numeric columns.", - "default": false, - "type": "boolean" - }, - "include_field_distinct_value_frequencies": { - "title": "Include Field Distinct Value Frequencies", - "description": "Whether to profile for distinct value frequencies.", - "default": false, - "type": "boolean" - }, - "include_field_histogram": { - "title": "Include Field Histogram", - "description": "Whether to profile for the histogram for numeric fields.", - "default": false, - "type": "boolean" - }, - "include_field_sample_values": { - "title": "Include Field Sample Values", - "description": "Whether to profile for the sample values for all columns.", - "default": true, - "type": "boolean" - }, - "allow_deny_patterns": { - "title": "Allow Deny Patterns", - "description": "regex patterns for filtering of tables or table columns to profile.", - "default": { - "allow": [ - ".*" - ], - "deny": [], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "allOf": [ - { - "$ref": "#/definitions/AllowDenyPattern" - } - ] - }, - "max_number_of_fields_to_profile": { - "title": "Max Number Of Fields To Profile", - "description": "A positive integer that specifies the maximum number of columns to profile for any table. `None` implies all columns. The cost of profiling goes up significantly as the number of columns to profile goes up.", - "exclusiveMinimum": 0, - "type": "integer" - }, - "profile_if_updated_since_days": { - "title": "Profile If Updated Since Days", - "description": "Profile table only if it has been updated since these many number of days. `None` implies profile all tables. Only Snowflake supports this.", - "default": 1, - "exclusiveMinimum": 0, - "type": "number" - }, - "max_workers": { - "title": "Max Workers", - "description": "Number of worker threads to use for profiling. Set to 1 to disable.", - "default": 50, - "type": "integer" - }, - "query_combiner_enabled": { - "title": "Query Combiner Enabled", - "description": "*This feature is still experimental and can be disabled if it causes issues.* Reduces the total number of queries issued and speeds up profiling by dynamically combining SQL queries where possible.", - "default": true, - "type": "boolean" - }, - "catch_exceptions": { - "title": "Catch Exceptions", - "default": true, - "type": "boolean" - }, - "partition_profiling_enabled": { - "title": "Partition Profiling Enabled", - "default": true, - "type": "boolean" - }, - "bigquery_temp_table_schema": { - "title": "Bigquery Temp Table Schema", - "description": "On bigquery for profiling partitioned tables needs to create temporary views. You have to define a schema where these will be created. Views will be cleaned up after profiler runs. (Great expectation tech details about this (https://legacy.docs.greatexpectations.io/en/0.9.0/reference/integrations/bigquery.html#custom-queries-with-sql-datasource).", - "type": "string" - }, - "partition_datetime": { - "title": "Partition Datetime", - "description": "For partitioned datasets profile only the partition which matches the datetime or profile the latest one if not set. Only Bigquery supports this.", - "type": "string", - "format": "date-time" - } - }, - "additionalProperties": false - }, - "BucketDuration": { - "title": "BucketDuration", - "description": "An enumeration.", - "enum": [ - "DAY", - "HOUR" - ], - "type": "string" - }, - "LookerConnectionDefinition": { - "title": "LookerConnectionDefinition", - "type": "object", - "properties": { - "platform": { - "title": "Platform", - "type": "string" - }, - "default_db": { - "title": "Default Db", - "type": "string" - }, - "default_schema": { - "title": "Default Schema", - "type": "string" - }, - "platform_instance": { - "title": "Platform Instance", - "type": "string" - }, - "platform_env": { - "title": "Platform Env", - "description": "The environment that the platform is located in. Leaving this empty will inherit defaults from the top level Looker configuration", - "type": "string" - } - }, - "required": [ - "platform", - "default_db" - ], - "additionalProperties": false - }, - "LookerAPIConfig": { - "title": "LookerAPIConfig", - "type": "object", - "properties": { - "client_id": { - "title": "Client Id", - "description": "Looker API client id.", - "type": "string" - }, - "client_secret": { - "title": "Client Secret", - "description": "Looker API client secret.", - "type": "string" - }, - "base_url": { - "title": "Base Url", - "description": "Url to your Looker instance: `https://company.looker.com:19999` or `https://looker.company.com`, or similar. Used for making API calls to Looker and constructing clickable dashboard and chart urls.", - "type": "string" - }, - "transport_options": { - "title": "Transport Options", - "description": "Populates the [TransportOptions](https://github.com/looker-open-source/sdk-codegen/blob/94d6047a0d52912ac082eb91616c1e7c379ab262/python/looker_sdk/rtl/transport.py#L70) struct for looker client", - "allOf": [ - { - "$ref": "#/definitions/TransportOptionsConfig" - } - ] - } - }, - "required": [ - "client_id", - "client_secret", - "base_url" - ], - "additionalProperties": false - }, - "ProvidedConfig": { - "title": "ProvidedConfig", - "type": "object", - "properties": { - "provider": { - "title": "Provider", - "type": "string" - }, - "path_key": { - "title": "Path Key", - "type": "string" - }, - "value": { - "title": "Value", - "type": "string" - } - }, - "required": [ - "provider", - "path_key", - "value" - ], - "additionalProperties": false - }, - "OauthConfiguration": { - "title": "OauthConfiguration", - "type": "object", - "properties": { - "provider": { - "title": "Provider", - "description": "Identity provider for oauth, e.g- microsoft", - "type": "string" - }, - "client_id": { - "title": "Client Id", - "description": "client id of your registered application", - "type": "string" - }, - "scopes": { - "title": "Scopes", - "description": "scopes required to connect to snowflake", - "type": "array", - "items": { - "type": "string" - } - }, - "use_certificate": { - "title": "Use Certificate", - "description": "Do you want to use certificate and private key to authenticate using oauth", - "default": false, - "type": "string" - }, - "client_secret": { - "title": "Client Secret", - "description": "client secret of the application if use_certificate = false", - "type": "string" - }, - "authority_url": { - "title": "Authority Url", - "description": "Authority url of your identity provider", - "type": "string" - }, - "encoded_oauth_public_key": { - "title": "Encoded Oauth Public Key", - "description": "base64 encoded certificate content if use_certificate = true", - "type": "string" - }, - "encoded_oauth_private_key": { - "title": "Encoded Oauth Private Key", - "description": "base64 encoded private key content if use_certificate = true", - "type": "string" - } - }, - "additionalProperties": false - }, - "SnowflakeProvisionRoleConfig": { - "title": "SnowflakeProvisionRoleConfig", - "type": "object", - "properties": { - "enabled": { - "title": "Enabled", - "description": "Whether provisioning of Snowflake role (used for ingestion) is enabled or not.", - "default": false, - "type": "boolean" - }, - "dry_run": { - "title": "Dry Run", - "description": "If provision_role is enabled, whether to dry run the sql commands for system admins to see what sql grant commands would be run without actually running the grant commands.", - "default": false, - "type": "boolean" - }, - "drop_role_if_exists": { - "title": "Drop Role If Exists", - "description": "Useful during testing to ensure you have a clean slate role. Not recommended for production use cases.", - "default": false, - "type": "boolean" - }, - "run_ingestion": { - "title": "Run Ingestion", - "description": "If system admins wish to skip actual ingestion of metadata during testing of the provisioning of role.", - "default": false, - "type": "boolean" - }, - "admin_role": { - "title": "Admin Role", - "description": "The Snowflake role of admin user used for provisioning of the role specified by role config. System admins can audit the open source code and decide to use a different role.", - "default": "accountadmin", - "type": "string" - }, - "admin_username": { - "title": "Admin Username", - "description": "The username to be used for provisioning of role.", - "type": "string" - }, - "admin_password": { - "title": "Admin Password", - "description": "The password to be used for provisioning of role.", - "type": "string", - "writeOnly": true, - "format": "password" - } - }, - "required": [ - "admin_username" - ], - "additionalProperties": false - }, - "SnowflakeStatefulIngestionConfig": { - "title": "SnowflakeStatefulIngestionConfig", - "description": "Specialization of basic StatefulIngestionConfig to adding custom config.\nThis will be used to override the stateful_ingestion config param of StatefulIngestionConfigBase\nin the SnowflakeUsageConfig.", - "type": "object", - "properties": { - "enabled": { - "title": "Enabled", - "description": "The type of the ingestion state provider registered with datahub.", - "default": false, - "type": "boolean" - }, - "max_checkpoint_state_size": { - "title": "Max Checkpoint State Size", - "description": "The maximum size of the checkpoint state in bytes. Default is 16MB", - "default": 16777216, - "exclusiveMinimum": 0, - "type": "integer" - }, - "state_provider": { - "title": "State Provider", - "description": "The ingestion state provider configuration.", - "allOf": [ - { - "$ref": "#/definitions/DynamicTypedStateProviderConfig" - } - ] - }, - "force_rerun": { - "title": "Force Rerun", - "default": false, - "type": "boolean" - }, - "ignore_new_state": { - "title": "Ignore New State", - "description": "If set to True, ignores the current checkpoint state.", - "default": false, - "type": "boolean" - } - }, - "additionalProperties": false - }, - "LineageMode": { - "title": "LineageMode", - "description": "An enumeration.", - "enum": [ - "sql_based", - "stl_scan_based", - "mixed" - ] - }, - "GlueProfilingConfig": { - "title": "GlueProfilingConfig", - "type": "object", - "properties": { - "row_count": { - "title": "Row Count", - "description": "The parameter name for row count in glue table.", - "type": "string" - }, - "column_count": { - "title": "Column Count", - "description": "The parameter name for column count in glue table.", - "type": "string" - }, - "unique_count": { - "title": "Unique Count", - "description": "The parameter name for the count of unique value in a column.", - "type": "string" - }, - "unique_proportion": { - "title": "Unique Proportion", - "description": "The parameter name for the proportion of unique values in a column.", - "type": "string" - }, - "null_count": { - "title": "Null Count", - "description": "The parameter name for the count of null values in a column.", - "type": "integer" - }, - "null_proportion": { - "title": "Null Proportion", - "description": "The parameter name for the proportion of null values in a column.", - "type": "string" - }, - "min": { - "title": "Min", - "description": "The parameter name for the min value of a column.", - "type": "string" - }, - "max": { - "title": "Max", - "description": "The parameter name for the max value of a column.", - "type": "string" - }, - "mean": { - "title": "Mean", - "description": "The parameter name for the mean value of a column.", - "type": "string" - }, - "median": { - "title": "Median", - "description": "The parameter name for the median value of a column.", - "type": "string" - }, - "stdev": { - "title": "Stdev", - "description": "The parameter name for the standard deviation of a column.", - "type": "string" - }, - "partition_patterns": { - "title": "Partition Patterns", - "description": "Regex patterns for filtering partitions for profile. The pattern should be a string like: \"{'key':'value'}\".", - "default": { - "allow": [ - ".*" - ], - "deny": [], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "allOf": [ - { - "$ref": "#/definitions/AllowDenyPattern" - } - ] - } - }, - "additionalProperties": false - }, - "ModeAPIConfig": { - "title": "ModeAPIConfig", - "type": "object", - "properties": { - "retry_backoff_multiplier": { - "title": "Retry Backoff Multiplier", - "description": "Multiplier for exponential backoff when waiting to retry", - "default": 2, - "anyOf": [ - { - "type": "integer" - }, - { - "type": "number" - } - ] - }, - "max_retry_interval": { - "title": "Max Retry Interval", - "description": "Maximum interval to wait when retrying", - "default": 10, - "anyOf": [ - { - "type": "integer" - }, - { - "type": "number" - } - ] - }, - "max_attempts": { - "title": "Max Attempts", - "description": "Maximum number of attempts to retry before failing", - "default": 5, - "type": "integer" - } - }, - "additionalProperties": false - }, - "AwsSourceConfig": { - "title": "AwsSourceConfig", - "description": "Common AWS credentials config.\n\nCurrently used by:\n - Glue source\n - SageMaker source", - "type": "object", - "properties": { - "aws_access_key_id": { - "title": "Aws Access Key Id", - "description": "Autodetected. See https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html", - "type": "string" - }, - "aws_secret_access_key": { - "title": "Aws Secret Access Key", - "description": "Autodetected. See https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html", - "type": "string" - }, - "aws_session_token": { - "title": "Aws Session Token", - "description": "Autodetected. See https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html", - "type": "string" - }, - "aws_role": { - "title": "Aws Role", - "description": "Autodetected. See https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html", - "anyOf": [ - { - "type": "string" - }, - { - "type": "array", - "items": { - "type": "string" - } - } - ] - }, - "aws_profile": { - "title": "Aws Profile", - "description": "Named AWS profile to use, if not set the default will be used", - "type": "string" - }, - "aws_region": { - "title": "Aws Region", - "description": "AWS region code.", - "type": "string" - }, - "aws_endpoint_url": { - "title": "Aws Endpoint Url", - "description": "Autodetected. See https://boto3.amazonaws.com/v1/documentation/api/latest/reference/core/session.html", - "type": "string" - }, - "aws_proxy": { - "title": "Aws Proxy", - "description": "Autodetected. See https://boto3.amazonaws.com/v1/documentation/api/latest/reference/core/session.html", - "type": "object", - "additionalProperties": { - "type": "string" - } - }, - "env": { - "title": "Env", - "description": "The environment that all assets produced by this connector belong to", - "default": "PROD", - "type": "string" - }, - "database_pattern": { - "title": "Database Pattern", - "description": "regex patterns for databases to filter in ingestion.", - "default": { - "allow": [ - ".*" - ], - "deny": [], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "allOf": [ - { - "$ref": "#/definitions/AllowDenyPattern" - } - ] - }, - "table_pattern": { - "title": "Table Pattern", - "description": "regex patterns for tables to filter in ingestion.", - "default": { - "allow": [ - ".*" - ], - "deny": [], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "allOf": [ - { - "$ref": "#/definitions/AllowDenyPattern" - } - ] - } - }, - "required": [ - "aws_region" - ], - "additionalProperties": false - }, - "DataLakeProfilerConfig": { - "title": "DataLakeProfilerConfig", - "type": "object", - "properties": { - "enabled": { - "title": "Enabled", - "description": "Whether profiling should be done.", - "default": false, - "type": "boolean" - }, - "profile_table_level_only": { - "title": "Profile Table Level Only", - "description": "Whether to perform profiling at table-level only or include column-level profiling as well.", - "default": false, - "type": "boolean" - }, - "allow_deny_patterns": { - "title": "Allow Deny Patterns", - "default": { - "allow": [ - ".*" - ], - "deny": [], - "ignoreCase": true, - "alphabet": "[A-Za-z0-9 _.-]" - }, - "allOf": [ - { - "$ref": "#/definitions/AllowDenyPattern" - } - ] - }, - "max_number_of_fields_to_profile": { - "title": "Max Number Of Fields To Profile", - "description": "A positive integer that specifies the maximum number of columns to profile for any table. `None` implies all columns. The cost of profiling goes up significantly as the number of columns to profile goes up.", - "exclusiveMinimum": 0, - "type": "integer" - }, - "include_field_null_count": { - "title": "Include Field Null Count", - "description": "Whether to profile for the number of nulls for each column.", - "default": true, - "type": "boolean" - }, - "include_field_min_value": { - "title": "Include Field Min Value", - "description": "Whether to profile for the min value of numeric columns.", - "default": true, - "type": "boolean" - }, - "include_field_max_value": { - "title": "Include Field Max Value", - "description": "Whether to profile for the max value of numeric columns.", - "default": true, - "type": "boolean" - }, - "include_field_mean_value": { - "title": "Include Field Mean Value", - "description": "Whether to profile for the mean value of numeric columns.", - "default": true, - "type": "boolean" - }, - "include_field_median_value": { - "title": "Include Field Median Value", - "description": "Whether to profile for the median value of numeric columns.", - "default": true, - "type": "boolean" - }, - "include_field_stddev_value": { - "title": "Include Field Stddev Value", - "description": "Whether to profile for the standard deviation of numeric columns.", - "default": true, - "type": "boolean" - }, - "include_field_quantiles": { - "title": "Include Field Quantiles", - "description": "Whether to profile for the quantiles of numeric columns.", - "default": true, - "type": "boolean" - }, - "include_field_distinct_value_frequencies": { - "title": "Include Field Distinct Value Frequencies", - "description": "Whether to profile for distinct value frequencies.", - "default": true, - "type": "boolean" - }, - "include_field_histogram": { - "title": "Include Field Histogram", - "description": "Whether to profile for the histogram for numeric fields.", - "default": true, - "type": "boolean" - }, - "include_field_sample_values": { - "title": "Include Field Sample Values", - "description": "Whether to profile for the sample values for all columns.", - "default": true, - "type": "boolean" - } - }, - "additionalProperties": false - }, - "PathSpec": { - "title": "PathSpec", - "type": "object", - "properties": { - "include": { - "title": "Include", - "description": "Path to table (s3 or local file system). Name variable {table} is used to mark the folder with dataset. In absence of {table}, file level dataset will be created. Check below examples for more details.", - "type": "string" - }, - "exclude": { - "title": "Exclude", - "description": "list of paths in glob pattern which will be excluded while scanning for the datasets", - "type": "array", - "items": { - "type": "string" - } - }, - "file_types": { - "title": "File Types", - "description": "Files with extenstions specified here (subset of default value) only will be scanned to create dataset. Other files will be omitted.", - "default": [ - "csv", - "tsv", - "json", - "parquet", - "avro" - ], - "type": "array", - "items": { - "type": "string" - } - }, - "default_extension": { - "title": "Default Extension", - "description": "For files without extension it will assume the specified file type. If it is not set the files without extensions will be skipped.", - "type": "string" - }, - "table_name": { - "title": "Table Name", - "description": "Display name of the dataset.Combination of named variableds from include path and strings", - "type": "string" - }, - "enable_compression": { - "title": "Enable Compression", - "description": "Enable or disable processing compressed files. Currenly .gz and .bz files are supported.", - "default": true, - "type": "boolean" - }, - "sample_files": { - "title": "Sample Files", - "description": "Not listing all the files but only taking a handful amount of sample file to infer the schema. File count and file size calculation will be disabled. This can affect performance significantly if enabled", - "default": true, - "type": "boolean" - } - }, - "required": [ - "include" - ], - "additionalProperties": false - }, - "NifiAuthType": { - "title": "NifiAuthType", - "description": "An enumeration.", - "enum": [ - "NO_AUTH", - "SINGLE_USER", - "CLIENT_CERT" - ] - }, - "DBTStatefulIngestionConfig": { - "title": "DBTStatefulIngestionConfig", - "description": "Specialization of basic StatefulIngestionConfig to adding custom config.\nThis will be used to override the stateful_ingestion config param of StatefulIngestionConfigBase\nin the SQLAlchemyConfig.", - "type": "object", - "properties": { - "enabled": { - "title": "Enabled", - "description": "The type of the ingestion state provider registered with datahub.", - "default": false, - "type": "boolean" - }, - "max_checkpoint_state_size": { - "title": "Max Checkpoint State Size", - "description": "The maximum size of the checkpoint state in bytes. Default is 16MB", - "default": 16777216, - "exclusiveMinimum": 0, - "type": "integer" - }, - "state_provider": { - "title": "State Provider", - "description": "The ingestion state provider configuration.", - "allOf": [ - { - "$ref": "#/definitions/DynamicTypedStateProviderConfig" - } - ] - }, - "ignore_old_state": { - "title": "Ignore Old State", - "description": "If set to True, ignores the previous checkpoint state.", - "default": false, - "type": "boolean" - }, - "ignore_new_state": { - "title": "Ignore New State", - "description": "If set to True, ignores the current checkpoint state.", - "default": false, - "type": "boolean" - }, - "remove_stale_metadata": { - "title": "Remove Stale Metadata", - "default": true, - "type": "boolean" - } - }, - "additionalProperties": false - }, - "AwsConnectionConfig": { - "title": "AwsConnectionConfig", - "description": "Common AWS credentials config.\n\nCurrently used by:\n - Glue source\n - SageMaker source\n - dbt source", - "type": "object", - "properties": { - "aws_access_key_id": { - "title": "Aws Access Key Id", - "description": "Autodetected. See https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html", - "type": "string" - }, - "aws_secret_access_key": { - "title": "Aws Secret Access Key", - "description": "Autodetected. See https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html", - "type": "string" - }, - "aws_session_token": { - "title": "Aws Session Token", - "description": "Autodetected. See https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html", - "type": "string" - }, - "aws_role": { - "title": "Aws Role", - "description": "Autodetected. See https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html", - "anyOf": [ - { - "type": "string" - }, - { - "type": "array", - "items": { - "type": "string" - } - } - ] - }, - "aws_profile": { - "title": "Aws Profile", - "description": "Named AWS profile to use, if not set the default will be used", - "type": "string" - }, - "aws_region": { - "title": "Aws Region", - "description": "AWS region code.", - "type": "string" - }, - "aws_endpoint_url": { - "title": "Aws Endpoint Url", - "description": "Autodetected. See https://boto3.amazonaws.com/v1/documentation/api/latest/reference/core/session.html", - "type": "string" - }, - "aws_proxy": { - "title": "Aws Proxy", - "description": "Autodetected. See https://boto3.amazonaws.com/v1/documentation/api/latest/reference/core/session.html", - "type": "object", - "additionalProperties": { - "type": "string" - } - } - }, - "required": [ - "aws_region" - ], - "additionalProperties": false - } - }, - "type": "object", - "properties": { - "source": { - "anyOf": [ - { - "$ref": "#/definitions/feast-legacy" - }, - { - "$ref": "#/definitions/redash" - }, - { - "$ref": "#/definitions/iceberg" - }, - { - "$ref": "#/definitions/pulsar" - }, - { - "$ref": "#/definitions/looker" - }, - { - "$ref": "#/definitions/sqlalchemy" - }, - { - "$ref": "#/definitions/azure-ad" - }, - { - "$ref": "#/definitions/starburst-trino-usage" - }, - { - "$ref": "#/definitions/postgres" - }, - { - "$ref": "#/definitions/tableau" - }, - { - "$ref": "#/definitions/lookml" - }, - { - "$ref": "#/definitions/powerbi" - }, - { - "$ref": "#/definitions/kafka-connect" - }, - { - "$ref": "#/definitions/snowflake" - }, - { - "$ref": "#/definitions/snowflake-usage" - }, - { - "$ref": "#/definitions/redshift-usage" - }, - { - "$ref": "#/definitions/hive" - }, - { - "$ref": "#/definitions/hana" - }, - { - "$ref": "#/definitions/sagemaker" - }, - { - "$ref": "#/definitions/glue" - }, - { - "$ref": "#/definitions/oracle" - }, - { - "$ref": "#/definitions/druid" - }, - { - "$ref": "#/definitions/mode" - }, - { - "$ref": "#/definitions/file" - }, - { - "$ref": "#/definitions/mssql" - }, - { - "$ref": "#/definitions/data-lake" - }, - { - "$ref": "#/definitions/presto-on-hive" - }, - { - "$ref": "#/definitions/vertica" - }, - { - "$ref": "#/definitions/csv-enricher" - }, - { - "$ref": "#/definitions/mariadb" - }, - { - "$ref": "#/definitions/feast" - }, - { - "$ref": "#/definitions/ldap" - }, - { - "$ref": "#/definitions/elasticsearch" - }, - { - "$ref": "#/definitions/superset" - }, - { - "$ref": "#/definitions/datahub-lineage-file" - }, - { - "$ref": "#/definitions/s3" - }, - { - "$ref": "#/definitions/athena" - }, - { - "$ref": "#/definitions/redshift" - }, - { - "$ref": "#/definitions/openapi" - }, - { - "$ref": "#/definitions/metabase" - }, - { - "$ref": "#/definitions/datahub-business-glossary" - }, - { - "$ref": "#/definitions/clickhouse-usage" - }, - { - "$ref": "#/definitions/mongodb" - }, - { - "$ref": "#/definitions/nifi" - }, - { - "$ref": "#/definitions/clickhouse" - }, - { - "$ref": "#/definitions/dbt" - }, - { - "$ref": "#/definitions/mysql" - }, - { - "$ref": "#/definitions/trino" - }, - { - "$ref": "#/definitions/okta" - } - ] - }, - "transformers": { - "type": "array", - "items": { - "type": "object", - "description": "Transformer configs see at https://datahubproject.io/docs/metadata-ingestion/transformers", - "properties": { - "type": { - "type": "string", - "description": "Transformer type" - }, - "config": { - "type": "object", - "description": "Transformer config" - } - }, - "required": [ - "type" - ], - "additionalProperties": false - } - }, - "sink": { - "description": "sink", - "anyOf": [ - { - "$ref": "#/definitions/datahub_kafka_sink" - }, - { - "$ref": "#/definitions/datahub_rest_sink" - }, - { - "$ref": "#/definitions/console_sink" - }, - { - "$ref": "#/definitions/file_sink" - } - ] - } - }, - "required": [ - "source" - ] -} \ No newline at end of file