diff --git a/.github/actions/docker-custom-build-and-push/action.yml b/.github/actions/docker-custom-build-and-push/action.yml index 4695cb5f65218..0cb553ca8cf03 100644 --- a/.github/actions/docker-custom-build-and-push/action.yml +++ b/.github/actions/docker-custom-build-and-push/action.yml @@ -68,13 +68,13 @@ runs: # Code for building multi-platform images and pushing to Docker Hub. - name: Set up QEMU - uses: docker/setup-qemu-action@v1 + uses: docker/setup-qemu-action@v2 if: ${{ inputs.publish == 'true' }} - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v1 + uses: docker/setup-buildx-action@v2 if: ${{ inputs.publish == 'true' }} - name: Login to DockerHub - uses: docker/login-action@v1 + uses: docker/login-action@v2 if: ${{ inputs.publish == 'true' }} with: username: ${{ inputs.username }} diff --git a/.github/workflows/docker-feast-source.yml b/.github/workflows/docker-feast-source.yml deleted file mode 100644 index e4dbbd37bedba..0000000000000 --- a/.github/workflows/docker-feast-source.yml +++ /dev/null @@ -1,76 +0,0 @@ -name: docker-feast-source docker -on: - push: - branches: - - master - paths: - - "metadata-ingestion/src/datahub/ingestion/source/feast_image/**" - - ".github/workflows/docker-feast-source.yml" - pull_request: - branches: - - master - paths: - - "metadata-ingestion/src/datahub/ingestion/source/feast_image/**" - - ".github/workflows/docker-feast-source.yml" - release: - types: [published, edited] - -concurrency: - group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} - cancel-in-progress: true - -jobs: - setup: - runs-on: ubuntu-latest - outputs: - tag: ${{ steps.tag.outputs.tag }} - publish: ${{ steps.publish.outputs.publish }} - steps: - - name: Checkout - uses: actions/checkout@v3 - - name: Compute Tag - id: tag - run: | - echo "GITHUB_REF: $GITHUB_REF" - SHORT_SHA=$(git rev-parse --short "$GITHUB_SHA") - echo "SHORT_SHA: $SHORT_SHA" - TAG=$(echo ${GITHUB_REF} | sed -e "s,refs/heads/master,latest\,${SHORT_SHA},g" -e 's,refs/tags/,,g' -e 's,refs/pull/\([0-9]*\).*,pr\1,g') - echo "tag=$TAG" >> $GITHUB_OUTPUT - - name: Check whether publishing enabled - id: publish - env: - ENABLE_PUBLISH: ${{ secrets.ORG_DOCKER_PASSWORD }} - run: | - echo "Enable publish: ${{ env.ENABLE_PUBLISH != '' }}" - echo "publish=${{ env.ENABLE_PUBLISH != '' }}" >> $GITHUB_OUTPUT - push_to_registries: - name: Build and Push Docker Image to Docker Hub - runs-on: ubuntu-latest - if: ${{ needs.setup.outputs.publish == 'true' }} - needs: setup - steps: - - name: Check out the repo - uses: actions/checkout@v3 - with: - fetch-depth: 0 - - name: Docker meta - id: docker_meta - uses: crazy-max/ghaction-docker-meta@v1 - with: - # list of Docker images to use as base name for tags - images: | - acryldata/datahub-ingestion-feast-wrapper - # add git short SHA as Docker tag - tag-custom: ${{ needs.setup.outputs.tag }} - tag-custom-only: true - - name: Login to DockerHub - uses: docker/login-action@v1 - with: - username: ${{ secrets.DOCKER_USERNAME }} - password: ${{ secrets.ORG_DOCKER_PASSWORD }} - - name: Build and Push image - uses: docker/build-push-action@v3 - with: - context: ./metadata-ingestion/src/datahub/ingestion/source/feast_image - tags: ${{ steps.docker_meta.outputs.tags }} - push: ${{ needs.setup.outputs.publish == 'true' }} diff --git a/.github/workflows/docker-ingestion-base.yml b/.github/workflows/docker-ingestion-base.yml index 62a3e3b19160b..8a642d79e8067 100644 --- a/.github/workflows/docker-ingestion-base.yml +++ b/.github/workflows/docker-ingestion-base.yml @@ -24,11 +24,11 @@ jobs: with: fetch-depth: 0 - name: Set up QEMU - uses: docker/setup-qemu-action@v1 + uses: docker/setup-qemu-action@v2 - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v1 + uses: docker/setup-buildx-action@v2 - name: Login to DockerHub - uses: docker/login-action@v1 + uses: docker/login-action@v2 with: username: ${{ secrets.ACRYL_DOCKER_USERNAME }} password: ${{ secrets.ACRYL_DOCKER_PASSWORD }} diff --git a/.github/workflows/docker-ingestion.yml b/.github/workflows/docker-ingestion.yml index 61bfb64169871..e5bc725f69a6b 100644 --- a/.github/workflows/docker-ingestion.yml +++ b/.github/workflows/docker-ingestion.yml @@ -72,12 +72,12 @@ jobs: tag-custom: ${{ needs.setup.outputs.tag }} tag-custom-only: true - name: Set up QEMU - uses: docker/setup-qemu-action@v1 + uses: docker/setup-qemu-action@v2 - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v1 + uses: docker/setup-buildx-action@v2 - name: Login to DockerHub if: ${{ needs.setup.outputs.publish == 'true' }} - uses: docker/login-action@v1 + uses: docker/login-action@v2 with: username: ${{ secrets.DOCKER_USERNAME }} password: ${{ secrets.DOCKER_PASSWORD }} diff --git a/.github/workflows/docker-postgres-setup.yml b/.github/workflows/docker-postgres-setup.yml index 60635155052b0..ad0a49dd7718c 100644 --- a/.github/workflows/docker-postgres-setup.yml +++ b/.github/workflows/docker-postgres-setup.yml @@ -64,7 +64,7 @@ jobs: tag-custom-only: true - name: Login to DockerHub if: ${{ needs.setup.outputs.publish == 'true' }} - uses: docker/login-action@v1 + uses: docker/login-action@v2 with: username: ${{ secrets.DOCKER_USERNAME }} password: ${{ secrets.ORG_DOCKER_PASSWORD }} diff --git a/.github/workflows/publish-datahub-jars.yml b/.github/workflows/publish-datahub-jars.yml index 88565fa5d127c..867663c05ad48 100644 --- a/.github/workflows/publish-datahub-jars.yml +++ b/.github/workflows/publish-datahub-jars.yml @@ -1,4 +1,4 @@ -name: Publish Datahub Java Jars (Client, Spark Lineage, Protobuf, Ranger) +name: Publish Datahub Java Jars (Client, Spark Lineage, Protobuf, Auth API) on: push: @@ -142,7 +142,7 @@ jobs: echo signingKey=$SIGNING_KEY >> gradle.properties ./gradlew -PreleaseVersion=${{ needs.setup.outputs.tag }} :metadata-integration:java:datahub-protobuf:publish ./gradlew :metadata-integration:java:datahub-protobuf:closeAndReleaseRepository --info - - name: publish datahub-ranger-plugin snapshot jar + - name: publish datahub-auth-api snapshot jar if: ${{ github.event_name != 'release' }} env: RELEASE_USERNAME: ${{ secrets.RELEASE_USERNAME }} @@ -153,9 +153,9 @@ jobs: NEXUS_PASSWORD: ${{ secrets.NEXUS_PASSWORD }} run: | echo signingKey=$SIGNING_KEY >> gradle.properties - ./gradlew :datahub-ranger-plugin:printVersion - ./gradlew :datahub-ranger-plugin:publish - - name: release datahub-ranger-plugin jar + ./gradlew :metadata-auth:auth-api:printVersion + ./gradlew :metadata-auth:auth-api:publish + - name: release datahub-auth-api jar if: ${{ github.event_name == 'release' }} env: RELEASE_USERNAME: ${{ secrets.RELEASE_USERNAME }} @@ -166,5 +166,5 @@ jobs: NEXUS_PASSWORD: ${{ secrets.NEXUS_PASSWORD }} run: | echo signingKey=$SIGNING_KEY >> gradle.properties - ./gradlew -PreleaseVersion=${{ needs.setup.outputs.tag }} :datahub-ranger-plugin:publish - ./gradlew :datahub-ranger-plugin:closeAndReleaseRepository --info + ./gradlew -PreleaseVersion=${{ needs.setup.outputs.tag }} :metadata-auth:auth-api:publish + ./gradlew :metadata-auth:auth-api:closeAndReleaseRepository --info diff --git a/.gitignore b/.gitignore index 0b435f9ab0d9a..0ce69dc04772b 100644 --- a/.gitignore +++ b/.gitignore @@ -75,3 +75,5 @@ datahub-frontend/public/** datahub-frontend/test/resources/public/** .remote* +# Ignore runtime generated authenticatior/authorizer jar files +metadata-service/plugin/src/test/resources/sample-plugins/** diff --git a/build.gradle b/build.gradle index 29177b4c4432c..22217f2149c5e 100644 --- a/build.gradle +++ b/build.gradle @@ -60,6 +60,8 @@ project.ext.externalDependency = [ 'awsGlueSchemaRegistrySerde': 'software.amazon.glue:schema-registry-serde:1.1.10', 'awsMskIamAuth': 'software.amazon.msk:aws-msk-iam-auth:1.1.1', 'awsSecretsManagerJdbc': 'com.amazonaws.secretsmanager:aws-secretsmanager-jdbc:1.0.8', + 'awsPostgresIamAuth': 'software.amazon.jdbc:aws-advanced-jdbc-wrapper:1.0.0', + 'awsRds':'software.amazon.awssdk:rds:2.18.24', 'cacheApi' : 'javax.cache:cache-api:1.1.0', 'commonsCli': 'commons-cli:commons-cli:1.5.0', 'commonsIo': 'commons-io:commons-io:2.4', @@ -92,6 +94,8 @@ project.ext.externalDependency = [ 'httpClient': 'org.apache.httpcomponents:httpclient:4.5.9', 'httpAsyncClient': 'org.apache.httpcomponents:httpasyncclient:4.1.5', 'iStackCommons': 'com.sun.istack:istack-commons-runtime:4.0.1', + 'jacksonJDK8': "com.fasterxml.jackson.datatype:jackson-datatype-jdk8:$jacksonVersion", + 'jacksonDataPropertyFormat': "com.fasterxml.jackson.dataformat:jackson-dataformat-properties:$jacksonVersion", 'jacksonCore': "com.fasterxml.jackson.core:jackson-core:$jacksonVersion", 'jacksonDataBind': "com.fasterxml.jackson.core:jackson-databind:$jacksonVersion.2", 'jacksonDataFormatYaml': "com.fasterxml.jackson.dataformat:jackson-dataformat-yaml:$jacksonVersion", @@ -196,7 +200,7 @@ allprojects { apply plugin: 'checkstyle' } -configure(subprojects.findAll {! it.name.startsWith('spark-lineage') }) { +configure(subprojects.findAll {! it.name.startsWith('spark-lineage')}) { configurations.all { exclude group: "io.netty", module: "netty" diff --git a/datahub-graphql-core/build.gradle b/datahub-graphql-core/build.gradle index 528054833bb9a..c07106be514d1 100644 --- a/datahub-graphql-core/build.gradle +++ b/datahub-graphql-core/build.gradle @@ -6,6 +6,7 @@ apply plugin: 'java' dependencies { compile project(':metadata-service:restli-client') compile project(':metadata-service:auth-impl') + compile project(':metadata-service:auth-api') compile project(':metadata-io') compile project(':metadata-utils') @@ -34,6 +35,7 @@ graphqlCodegen { "$projectDir/src/main/resources/auth.graphql".toString(), "$projectDir/src/main/resources/timeline.graphql".toString(), "$projectDir/src/main/resources/tests.graphql".toString(), + "$projectDir/src/main/resources/step.graphql".toString(), ] outputDir = new File("$projectDir/src/mainGeneratedGraphQL/java") packageName = "com.linkedin.datahub.graphql.generated" diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/Constants.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/Constants.java index 1bd59ed39f0e5..0e66982b11557 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/Constants.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/Constants.java @@ -18,6 +18,7 @@ public class Constants { public static final String INGESTION_SCHEMA_FILE = "ingestion.graphql"; public static final String TIMELINE_SCHEMA_FILE = "timeline.graphql"; public static final String TESTS_SCHEMA_FILE = "tests.graphql"; + public static final String STEPS_SCHEMA_FILE = "step.graphql"; public static final String BROWSE_PATH_DELIMITER = "/"; public static final String VERSION_STAMP_FIELD_NAME = "versionStamp"; } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java index 705789ec4753e..be4cc7b68e274 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java @@ -3,11 +3,11 @@ import com.datahub.authentication.AuthenticationConfiguration; import com.datahub.authentication.group.GroupService; import com.datahub.authentication.invite.InviteTokenService; -import com.datahub.authentication.post.PostService; import com.datahub.authentication.token.StatefulTokenService; import com.datahub.authentication.user.NativeUserService; import com.datahub.authorization.AuthorizationConfiguration; import com.datahub.authorization.role.RoleService; +import com.datahub.authentication.post.PostService; import com.google.common.collect.ImmutableList; import com.linkedin.common.VersionedUrn; import com.linkedin.common.urn.Urn; @@ -192,6 +192,8 @@ import com.linkedin.datahub.graphql.resolvers.search.SearchAcrossEntitiesResolver; import com.linkedin.datahub.graphql.resolvers.search.SearchAcrossLineageResolver; import com.linkedin.datahub.graphql.resolvers.search.SearchResolver; +import com.linkedin.datahub.graphql.resolvers.step.BatchGetStepStatesResolver; +import com.linkedin.datahub.graphql.resolvers.step.BatchUpdateStepStatesResolver; import com.linkedin.datahub.graphql.resolvers.tag.CreateTagResolver; import com.linkedin.datahub.graphql.resolvers.tag.DeleteTagResolver; import com.linkedin.datahub.graphql.resolvers.tag.SetTagColorResolver; @@ -249,7 +251,7 @@ import com.linkedin.datahub.graphql.types.tag.TagType; import com.linkedin.datahub.graphql.types.test.TestType; import com.linkedin.entity.client.EntityClient; -import com.linkedin.metadata.config.DatahubConfiguration; +import com.linkedin.metadata.config.DataHubConfiguration; import com.linkedin.metadata.config.IngestionConfiguration; import com.linkedin.metadata.config.TestsConfiguration; import com.linkedin.metadata.config.VisualConfiguration; @@ -327,7 +329,7 @@ public class GmsGraphQLEngine { private final VisualConfiguration visualConfiguration; private final TelemetryConfiguration telemetryConfiguration; private final TestsConfiguration testsConfiguration; - private final DatahubConfiguration datahubConfiguration; + private final DataHubConfiguration datahubConfiguration; private final DatasetType datasetType; private final CorpUserType corpUserType; @@ -393,7 +395,7 @@ public GmsGraphQLEngine(final EntityClient entityClient, final GraphClient graph final AuthorizationConfiguration authorizationConfiguration, final GitVersion gitVersion, final TimelineService timelineService, final boolean supportsImpactAnalysis, final VisualConfiguration visualConfiguration, final TelemetryConfiguration telemetryConfiguration, - final TestsConfiguration testsConfiguration, final DatahubConfiguration datahubConfiguration, + final TestsConfiguration testsConfiguration, final DataHubConfiguration datahubConfiguration, final SiblingGraphService siblingGraphService, final GroupService groupService, final RoleService roleService, final InviteTokenService inviteTokenService, final PostService postService, final FeatureFlags featureFlags) { @@ -557,6 +559,7 @@ public GraphQLEngine.Builder builder() { .addSchema(fileBasedSchema(INGESTION_SCHEMA_FILE)) .addSchema(fileBasedSchema(TIMELINE_SCHEMA_FILE)) .addSchema(fileBasedSchema(TESTS_SCHEMA_FILE)) + .addSchema(fileBasedSchema(STEPS_SCHEMA_FILE)) .addDataLoaders(loaderSuppliers(loadableTypes)) .addDataLoader("Aspect", context -> createDataLoader(aspectType, context)) .configureRuntimeWiring(this::configureRuntimeWiring); @@ -691,6 +694,7 @@ private void configureQueryResolvers(final RuntimeWiring.Builder builder) { .dataFetcher("listRoles", new ListRolesResolver(this.entityClient)) .dataFetcher("getInviteToken", new GetInviteTokenResolver(this.inviteTokenService)) .dataFetcher("listPosts", new ListPostsResolver(this.entityClient)) + .dataFetcher("batchGetStepStates", new BatchGetStepStatesResolver(this.entityClient)) ); } @@ -814,6 +818,7 @@ private void configureMutationResolvers(final RuntimeWiring.Builder builder) { .dataFetcher("createInviteToken", new CreateInviteTokenResolver(this.inviteTokenService)) .dataFetcher("acceptRole", new AcceptRoleResolver(this.roleService, this.inviteTokenService)) .dataFetcher("createPost", new CreatePostResolver(this.postService)) + .dataFetcher("batchUpdateStepStates", new BatchUpdateStepStatesResolver(this.entityClient)) ); } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/QueryContext.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/QueryContext.java index 9173e7c712dc9..4803ef08fdddc 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/QueryContext.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/QueryContext.java @@ -2,7 +2,7 @@ import com.datahub.authentication.Actor; import com.datahub.authentication.Authentication; -import com.datahub.authorization.Authorizer; +import com.datahub.plugins.auth.authorization.Authorizer; /** @@ -16,7 +16,7 @@ public interface QueryContext { boolean isAuthenticated(); /** - * Returns the {@link com.datahub.authentication.Authentication} associated with the current query context. + * Returns the {@link Authentication} associated with the current query context. */ Authentication getAuthentication(); diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/authorization/AuthorizationUtils.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/authorization/AuthorizationUtils.java index 183501b192886..bffd98f0d271e 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/authorization/AuthorizationUtils.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/authorization/AuthorizationUtils.java @@ -2,7 +2,7 @@ import com.datahub.authorization.AuthorizationRequest; import com.datahub.authorization.AuthorizationResult; -import com.datahub.authorization.Authorizer; +import com.datahub.plugins.auth.authorization.Authorizer; import com.datahub.authorization.ResourceSpec; import com.google.common.collect.ImmutableList; import com.linkedin.common.AuditStamp; diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/AuthUtils.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/AuthUtils.java index 081a56bebb229..08cb6735862e9 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/AuthUtils.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/AuthUtils.java @@ -7,7 +7,7 @@ import java.util.Optional; import com.datahub.authorization.AuthorizationRequest; import com.datahub.authorization.AuthorizationResult; -import com.datahub.authorization.Authorizer; +import com.datahub.plugins.auth.authorization.Authorizer; public class AuthUtils { diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/MeResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/MeResolver.java index c42dae129aa34..01293f1cae254 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/MeResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/MeResolver.java @@ -2,7 +2,7 @@ import com.datahub.authorization.AuthorizationRequest; import com.datahub.authorization.AuthorizationResult; -import com.datahub.authorization.Authorizer; +import com.datahub.plugins.auth.authorization.Authorizer; import com.linkedin.common.urn.Urn; import com.linkedin.datahub.graphql.QueryContext; import com.linkedin.datahub.graphql.authorization.AuthorizationUtils; diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/config/AppConfigResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/config/AppConfigResolver.java index ef9cd62018e0b..913f32eb83fea 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/config/AppConfigResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/config/AppConfigResolver.java @@ -16,7 +16,7 @@ import com.linkedin.datahub.graphql.generated.TelemetryConfig; import com.linkedin.datahub.graphql.generated.TestsConfig; import com.linkedin.datahub.graphql.generated.VisualConfig; -import com.linkedin.metadata.config.DatahubConfiguration; +import com.linkedin.metadata.config.DataHubConfiguration; import com.linkedin.metadata.config.IngestionConfiguration; import com.linkedin.metadata.config.TestsConfiguration; import com.linkedin.metadata.telemetry.TelemetryConfiguration; @@ -42,7 +42,7 @@ public class AppConfigResolver implements DataFetcher> { + private final EntityClient _entityClient; + + @Override + public CompletableFuture get(@Nonnull final DataFetchingEnvironment environment) + throws Exception { + final QueryContext context = environment.getContext(); + final Authentication authentication = context.getAuthentication(); + final BatchGetStepStatesInput input = + bindArgument(environment.getArgument("input"), BatchGetStepStatesInput.class); + + return CompletableFuture.supplyAsync(() -> { + Map urnsToIdsMap; + Set urns; + Map entityResponseMap; + + try { + urnsToIdsMap = buildUrnToIdMap(input.getIds(), authentication); + urns = urnsToIdsMap.keySet(); + entityResponseMap = _entityClient.batchGetV2(DATAHUB_STEP_STATE_ENTITY_NAME, urns, + ImmutableSet.of(DATAHUB_STEP_STATE_PROPERTIES_ASPECT_NAME), authentication); + } catch (Exception e) { + throw new RuntimeException(e); + } + + final Map stepStatePropertiesMap = new HashMap<>(); + for (Map.Entry entry : entityResponseMap.entrySet()) { + final Urn urn = entry.getKey(); + final DataHubStepStateProperties stepStateProperties = getStepStateProperties(urn, entry.getValue()); + if (stepStateProperties != null) { + stepStatePropertiesMap.put(urn, stepStateProperties); + } + } + + final List results = stepStatePropertiesMap.entrySet() + .stream() + .map(entry -> buildStepStateResult(urnsToIdsMap.get(entry.getKey()), entry.getValue())) + .collect(Collectors.toList()); + final BatchGetStepStatesResult result = new BatchGetStepStatesResult(); + result.setResults(results); + return result; + }); + } + + @Nonnull + private Map buildUrnToIdMap(@Nonnull final List ids, @Nonnull final Authentication authentication) + throws RemoteInvocationException { + final Map urnToIdMap = new HashMap<>(); + for (final String id : ids) { + final Urn urn = getStepStateUrn(id); + if (_entityClient.exists(urn, authentication)) { + urnToIdMap.put(urn, id); + } + } + + return urnToIdMap; + } + + @Nonnull + private Urn getStepStateUrn(@Nonnull final String id) { + final DataHubStepStateKey stepStateKey = new DataHubStepStateKey().setId(id); + return convertEntityKeyToUrn(stepStateKey, DATAHUB_STEP_STATE_ENTITY_NAME); + } + + @Nullable + private DataHubStepStateProperties getStepStateProperties(@Nonnull final Urn urn, + @Nonnull final EntityResponse entityResponse) { + final EnvelopedAspectMap aspectMap = entityResponse.getAspects(); + // If aspect is not present, log the error and return null. + if (!aspectMap.containsKey(DATAHUB_STEP_STATE_PROPERTIES_ASPECT_NAME)) { + log.error("Failed to find step state properties for urn: " + urn); + return null; + } + return new DataHubStepStateProperties(aspectMap.get(DATAHUB_STEP_STATE_PROPERTIES_ASPECT_NAME).getValue().data()); + } + + @Nonnull + private StepStateResult buildStepStateResult(@Nonnull final String id, + @Nonnull final DataHubStepStateProperties stepStateProperties) { + final StepStateResult result = new StepStateResult(); + result.setId(id); + final List mappedProperties = stepStateProperties + .getProperties() + .entrySet() + .stream() + .map(entry -> buildStringMapEntry(entry.getKey(), entry.getValue())) + .collect(Collectors.toList()); + result.setProperties(mappedProperties); + return result; + } + + @Nonnull + private StringMapEntry buildStringMapEntry(@Nonnull final String key, @Nonnull final String value) { + final StringMapEntry entry = new StringMapEntry(); + entry.setKey(key); + entry.setValue(value); + return entry; + } +} \ No newline at end of file diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/step/BatchUpdateStepStatesResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/step/BatchUpdateStepStatesResolver.java new file mode 100644 index 0000000000000..e4c21207ddd34 --- /dev/null +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/step/BatchUpdateStepStatesResolver.java @@ -0,0 +1,93 @@ +package com.linkedin.datahub.graphql.resolvers.step; + +import com.datahub.authentication.Authentication; +import com.linkedin.common.AuditStamp; +import com.linkedin.common.urn.Urn; +import com.linkedin.common.urn.UrnUtils; +import com.linkedin.data.template.StringMap; +import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.generated.BatchUpdateStepStatesInput; +import com.linkedin.datahub.graphql.generated.BatchUpdateStepStatesResult; +import com.linkedin.datahub.graphql.generated.StepStateInput; +import com.linkedin.datahub.graphql.generated.StringMapEntryInput; +import com.linkedin.datahub.graphql.generated.UpdateStepStateResult; +import com.linkedin.entity.client.EntityClient; +import com.linkedin.metadata.key.DataHubStepStateKey; +import com.linkedin.mxe.MetadataChangeProposal; +import com.linkedin.step.DataHubStepStateProperties; +import graphql.schema.DataFetcher; +import graphql.schema.DataFetchingEnvironment; +import java.util.List; +import java.util.Map; +import java.util.concurrent.CompletableFuture; +import java.util.stream.Collectors; +import javax.annotation.Nonnull; +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; + +import static com.linkedin.datahub.graphql.resolvers.ResolverUtils.*; +import static com.linkedin.metadata.Constants.*; +import static com.linkedin.metadata.entity.AspectUtils.*; + + +@Slf4j +@RequiredArgsConstructor +public class BatchUpdateStepStatesResolver implements DataFetcher> { + private final EntityClient _entityClient; + + @Override + public CompletableFuture get(@Nonnull final DataFetchingEnvironment environment) + throws Exception { + final QueryContext context = environment.getContext(); + final Authentication authentication = context.getAuthentication(); + + final BatchUpdateStepStatesInput input = + bindArgument(environment.getArgument("input"), BatchUpdateStepStatesInput.class); + final List states = input.getStates(); + final String actorUrnStr = authentication.getActor().toUrnStr(); + + return CompletableFuture.supplyAsync(() -> { + final Urn actorUrn = UrnUtils.getUrn(actorUrnStr); + final AuditStamp auditStamp = new AuditStamp().setActor(actorUrn).setTime(System.currentTimeMillis()); + final List results = states + .stream() + .map(state -> buildUpdateStepStateResult(state, auditStamp, authentication)) + .collect(Collectors.toList()); + final BatchUpdateStepStatesResult result = new BatchUpdateStepStatesResult(); + result.setResults(results); + return result; + }); + } + + private UpdateStepStateResult buildUpdateStepStateResult(@Nonnull final StepStateInput state, + @Nonnull final AuditStamp auditStamp, + @Nonnull final Authentication authentication) { + final String id = state.getId(); + final UpdateStepStateResult updateStepStateResult = new UpdateStepStateResult(); + updateStepStateResult.setId(id); + final boolean success = updateStepState(id, state.getProperties(), auditStamp, authentication); + updateStepStateResult.setSucceeded(success); + return updateStepStateResult; + } + + private boolean updateStepState(@Nonnull final String id, + @Nonnull final List inputProperties, @Nonnull final AuditStamp auditStamp, + @Nonnull final Authentication authentication) { + final Map properties = + inputProperties.stream().collect(Collectors.toMap(StringMapEntryInput::getKey, StringMapEntryInput::getValue)); + try { + final DataHubStepStateKey stepStateKey = new DataHubStepStateKey().setId(id); + final DataHubStepStateProperties stepStateProperties = + new DataHubStepStateProperties().setProperties(new StringMap(properties)).setLastModified(auditStamp); + + final MetadataChangeProposal proposal = + buildMetadataChangeProposal(DATAHUB_STEP_STATE_ENTITY_NAME, stepStateKey, + DATAHUB_STEP_STATE_PROPERTIES_ASPECT_NAME, stepStateProperties); + _entityClient.ingestProposal(proposal, authentication, false); + return true; + } catch (Exception e) { + log.error("Could not update step state for id {}", id, e); + return false; + } + } +} \ No newline at end of file diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/test/TestUtils.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/test/TestUtils.java index 49f1b6243bcd7..248da3e58d8ae 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/test/TestUtils.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/test/TestUtils.java @@ -1,11 +1,17 @@ package com.linkedin.datahub.graphql.resolvers.test; +import com.linkedin.data.template.RecordTemplate; import com.linkedin.data.template.SetMode; import com.linkedin.datahub.graphql.QueryContext; import com.linkedin.datahub.graphql.generated.TestDefinitionInput; +import com.linkedin.entity.Aspect; +import com.linkedin.entity.EntityResponse; +import com.linkedin.entity.EnvelopedAspect; +import com.linkedin.entity.EnvelopedAspectMap; import com.linkedin.metadata.authorization.PoliciesConfig; import com.linkedin.test.TestDefinition; import com.linkedin.test.TestDefinitionType; +import java.util.Map; import java.util.Optional; import javax.annotation.Nonnull; @@ -28,5 +34,15 @@ public static TestDefinition mapDefinition(final TestDefinitionInput testDefInpu return result; } + public static EntityResponse buildEntityResponse(Map aspects) { + final EntityResponse entityResponse = new EntityResponse(); + final EnvelopedAspectMap aspectMap = new EnvelopedAspectMap(); + for (Map.Entry entry : aspects.entrySet()) { + aspectMap.put(entry.getKey(), new EnvelopedAspect().setValue(new Aspect(entry.getValue().data()))); + } + entityResponse.setAspects(aspectMap); + return entityResponse; + } + private TestUtils() { } } diff --git a/datahub-graphql-core/src/main/resources/step.graphql b/datahub-graphql-core/src/main/resources/step.graphql new file mode 100644 index 0000000000000..829cc27d9d87d --- /dev/null +++ b/datahub-graphql-core/src/main/resources/step.graphql @@ -0,0 +1,103 @@ +extend type Query { + """ + Batch fetch the state for a set of steps. + """ + batchGetStepStates(input: BatchGetStepStatesInput!): BatchGetStepStatesResult! +} + + +extend type Mutation { + """ + Batch update the state for a set of steps. + """ + batchUpdateStepStates(input: BatchUpdateStepStatesInput!): BatchUpdateStepStatesResult! +} + + +""" +Input arguments required for fetching step states +""" +input BatchGetStepStatesInput { + """ + The unique ids for the steps to retrieve + """ + ids: [String!]! +} + + +""" +Input arguments required for updating step states +""" +input BatchUpdateStepStatesInput { + """ + Set of step states. If the id does not exist, it will be created. + """ + states: [StepStateInput!]! +} + + +""" +The input required to update the state of a step +""" +input StepStateInput { + """ + The globally unique id for the step + """ + id: String! + + """ + The new properties for the step + """ + properties: [StringMapEntryInput]! +} + +""" +Result returned when fetching step state +""" +type BatchGetStepStatesResult { + """ + The step states + """ + results: [StepStateResult!]! +} + +""" +A single step state +""" +type StepStateResult { + """ + Unique id of the step + """ + id: String! + + """ + The properties for the step state + """ + properties: [StringMapEntry!]! +} + + +""" +Result returned when fetching step state +""" +type BatchUpdateStepStatesResult { + """ + Results for each step + """ + results: [UpdateStepStateResult!]! +} + +""" +Result returned when fetching step state +""" +type UpdateStepStateResult { + """ + Id of the step + """ + id: String! + + """ + Whether the update succeeded. + """ + succeeded: Boolean! +} \ No newline at end of file diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/TestUtils.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/TestUtils.java index e93f48336e8a6..e9b12937ddf81 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/TestUtils.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/TestUtils.java @@ -2,7 +2,7 @@ import com.datahub.authentication.Authentication; import com.datahub.authorization.AuthorizationResult; -import com.datahub.authorization.Authorizer; +import com.datahub.plugins.auth.authorization.Authorizer; import com.linkedin.common.AuditStamp; import com.linkedin.metadata.entity.EntityService; import com.linkedin.mxe.MetadataChangeProposal; diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/glossary/GlossaryUtilsTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/glossary/GlossaryUtilsTest.java index acd8b9dc3a69f..c9a4058547e54 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/glossary/GlossaryUtilsTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/glossary/GlossaryUtilsTest.java @@ -2,7 +2,7 @@ import com.datahub.authorization.AuthorizationRequest; import com.datahub.authorization.AuthorizationResult; -import com.datahub.authorization.Authorizer; +import com.datahub.plugins.auth.authorization.Authorizer; import com.datahub.authorization.ResourceSpec; import com.linkedin.common.urn.Urn; import com.linkedin.common.urn.UrnUtils; diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/ingest/IngestTestUtils.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/ingest/IngestTestUtils.java index 822d353218ab3..dae0758f6a2f6 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/ingest/IngestTestUtils.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/ingest/IngestTestUtils.java @@ -2,7 +2,7 @@ import com.datahub.authentication.Authentication; import com.datahub.authorization.AuthorizationResult; -import com.datahub.authorization.Authorizer; +import com.datahub.plugins.auth.authorization.Authorizer; import com.google.common.collect.ImmutableMap; import com.linkedin.common.urn.Urn; import com.linkedin.data.template.StringMap; diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/ingest/IngestionAuthUtilsTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/ingest/IngestionAuthUtilsTest.java index ce00fa1525582..12045b9361469 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/ingest/IngestionAuthUtilsTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/ingest/IngestionAuthUtilsTest.java @@ -2,7 +2,7 @@ import com.datahub.authorization.AuthorizationRequest; import com.datahub.authorization.AuthorizationResult; -import com.datahub.authorization.Authorizer; +import com.datahub.plugins.auth.authorization.Authorizer; import com.linkedin.datahub.graphql.QueryContext; import java.util.Optional; import org.mockito.Mockito; diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/step/BatchGetStepStatesResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/step/BatchGetStepStatesResolverTest.java new file mode 100644 index 0000000000000..8c4445452c564 --- /dev/null +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/step/BatchGetStepStatesResolverTest.java @@ -0,0 +1,118 @@ +package com.linkedin.datahub.graphql.resolvers.step; + +import com.datahub.authentication.Authentication; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.ImmutableSet; +import com.linkedin.common.AuditStamp; +import com.linkedin.common.urn.Urn; +import com.linkedin.common.urn.UrnUtils; +import com.linkedin.data.template.RecordTemplate; +import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.generated.BatchGetStepStatesInput; +import com.linkedin.datahub.graphql.generated.BatchGetStepStatesResult; +import com.linkedin.datahub.graphql.resolvers.test.TestUtils; +import com.linkedin.entity.EntityResponse; +import com.linkedin.entity.client.EntityClient; +import com.linkedin.step.DataHubStepStateProperties; +import graphql.schema.DataFetchingEnvironment; +import java.util.Map; +import java.util.Set; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + +import static com.linkedin.datahub.graphql.TestUtils.*; +import static com.linkedin.metadata.Constants.*; +import static org.mockito.Mockito.*; +import static org.testng.Assert.*; + + +public class BatchGetStepStatesResolverTest { + private static final Urn ACTOR_URN = UrnUtils.getUrn("urn:li:corpuser:test"); + private static final long TIME = 123L; + private static final AuditStamp AUDIT_STAMP = new AuditStamp().setActor(ACTOR_URN).setTime(TIME); + private static final String FIRST_STEP_STATE_ID = "1"; + private static final String SECOND_STEP_STATE_ID = "2"; + private static final Urn FIRST_STEP_STATE_URN = UrnUtils.getUrn("urn:li:dataHubStepState:1"); + private static final Urn SECOND_STEP_STATE_URN = UrnUtils.getUrn("urn:li:dataHubStepState:2"); + private static final Set ASPECTS = ImmutableSet.of(DATAHUB_STEP_STATE_PROPERTIES_ASPECT_NAME); + private EntityClient _entityClient; + private BatchGetStepStatesResolver _resolver; + private DataFetchingEnvironment _dataFetchingEnvironment; + private Authentication _authentication; + + @BeforeMethod + public void setupTest() throws Exception { + + _entityClient = mock(EntityClient.class); + _dataFetchingEnvironment = mock(DataFetchingEnvironment.class); + _authentication = mock(Authentication.class); + + _resolver = new BatchGetStepStatesResolver(_entityClient); + } + + @Test + public void testBatchGetStepStatesFirstStepCompleted() throws Exception { + final QueryContext mockContext = getMockAllowContext(); + when(_dataFetchingEnvironment.getContext()).thenReturn(mockContext); + when(mockContext.getAuthentication()).thenReturn(_authentication); + + final BatchGetStepStatesInput input = new BatchGetStepStatesInput(); + input.setIds(ImmutableList.of(FIRST_STEP_STATE_ID, SECOND_STEP_STATE_ID)); + when(_dataFetchingEnvironment.getArgument("input")).thenReturn(input); + + when(_entityClient.exists(eq(FIRST_STEP_STATE_URN), eq(_authentication))).thenReturn(true); + when(_entityClient.exists(eq(SECOND_STEP_STATE_URN), eq(_authentication))).thenReturn(false); + + final DataHubStepStateProperties firstStepStateProperties = + new DataHubStepStateProperties().setLastModified(AUDIT_STAMP); + + final Set urns = ImmutableSet.of(FIRST_STEP_STATE_URN); + final Map firstAspectMap = ImmutableMap.of(DATAHUB_STEP_STATE_PROPERTIES_ASPECT_NAME, + firstStepStateProperties); + final Map entityResponseMap = ImmutableMap.of(FIRST_STEP_STATE_URN, + TestUtils.buildEntityResponse(firstAspectMap)); + + when(_entityClient.batchGetV2(eq(DATAHUB_STEP_STATE_ENTITY_NAME), eq(urns), eq(ASPECTS), eq(_authentication))) + .thenReturn(entityResponseMap); + + final BatchGetStepStatesResult actualBatchResult = _resolver.get(_dataFetchingEnvironment).join(); + assertNotNull(actualBatchResult); + assertEquals(1, actualBatchResult.getResults().size()); + } + + @Test + public void testBatchGetStepStatesBothStepsCompleted() throws Exception { + final QueryContext mockContext = getMockAllowContext(); + when(_dataFetchingEnvironment.getContext()).thenReturn(mockContext); + when(mockContext.getAuthentication()).thenReturn(_authentication); + + final BatchGetStepStatesInput input = new BatchGetStepStatesInput(); + input.setIds(ImmutableList.of(FIRST_STEP_STATE_ID, SECOND_STEP_STATE_ID)); + when(_dataFetchingEnvironment.getArgument("input")).thenReturn(input); + + when(_entityClient.exists(eq(FIRST_STEP_STATE_URN), eq(_authentication))).thenReturn(true); + when(_entityClient.exists(eq(SECOND_STEP_STATE_URN), eq(_authentication))).thenReturn(true); + + final DataHubStepStateProperties firstStepStateProperties = + new DataHubStepStateProperties().setLastModified(AUDIT_STAMP); + final DataHubStepStateProperties secondStepStateProperties = + new DataHubStepStateProperties().setLastModified(AUDIT_STAMP); + + final Set urns = ImmutableSet.of(FIRST_STEP_STATE_URN, SECOND_STEP_STATE_URN); + final Map firstAspectMap = ImmutableMap.of(DATAHUB_STEP_STATE_PROPERTIES_ASPECT_NAME, + firstStepStateProperties); + final Map secondAspectMap = ImmutableMap.of(DATAHUB_STEP_STATE_PROPERTIES_ASPECT_NAME, + secondStepStateProperties); + final Map entityResponseMap = ImmutableMap.of( + FIRST_STEP_STATE_URN, TestUtils.buildEntityResponse(firstAspectMap), + SECOND_STEP_STATE_URN, TestUtils.buildEntityResponse(secondAspectMap)); + + when(_entityClient.batchGetV2(eq(DATAHUB_STEP_STATE_ENTITY_NAME), eq(urns), eq(ASPECTS), eq(_authentication))) + .thenReturn(entityResponseMap); + + final BatchGetStepStatesResult actualBatchResult = _resolver.get(_dataFetchingEnvironment).join(); + assertNotNull(actualBatchResult); + assertEquals(2, actualBatchResult.getResults().size()); + } +} diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/step/BatchUpdateStepStatesResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/step/BatchUpdateStepStatesResolverTest.java new file mode 100644 index 0000000000000..5f20a11f15ac6 --- /dev/null +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/step/BatchUpdateStepStatesResolverTest.java @@ -0,0 +1,60 @@ +package com.linkedin.datahub.graphql.resolvers.step; + +import com.datahub.authentication.Actor; +import com.datahub.authentication.ActorType; +import com.datahub.authentication.Authentication; +import com.google.common.collect.ImmutableList; +import com.linkedin.common.urn.Urn; +import com.linkedin.common.urn.UrnUtils; +import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.generated.BatchUpdateStepStatesInput; +import com.linkedin.datahub.graphql.generated.BatchUpdateStepStatesResult; +import com.linkedin.datahub.graphql.generated.StepStateInput; +import com.linkedin.entity.client.EntityClient; +import graphql.schema.DataFetchingEnvironment; +import java.util.Collections; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + +import static com.linkedin.datahub.graphql.TestUtils.*; +import static org.mockito.Mockito.*; +import static org.testng.Assert.*; + + +public class BatchUpdateStepStatesResolverTest { + private static final Urn ACTOR_URN = UrnUtils.getUrn("urn:li:corpuser:test"); + private static final String FIRST_STEP_STATE_ID = "1"; + private EntityClient _entityClient; + private BatchUpdateStepStatesResolver _resolver; + private DataFetchingEnvironment _dataFetchingEnvironment; + private Authentication _authentication; + + @BeforeMethod + public void setupTest() throws Exception { + _entityClient = mock(EntityClient.class); + _dataFetchingEnvironment = mock(DataFetchingEnvironment.class); + _authentication = mock(Authentication.class); + + _resolver = new BatchUpdateStepStatesResolver(_entityClient); + } + + @Test + public void testBatchUpdateStepStatesFirstStepCompleted() throws Exception { + final QueryContext mockContext = getMockAllowContext(); + when(_dataFetchingEnvironment.getContext()).thenReturn(mockContext); + when(mockContext.getAuthentication()).thenReturn(_authentication); + when(_authentication.getActor()).thenReturn(new Actor(ActorType.USER, ACTOR_URN.toString())); + + final BatchUpdateStepStatesInput input = new BatchUpdateStepStatesInput(); + final StepStateInput firstInput = new StepStateInput(); + firstInput.setId(FIRST_STEP_STATE_ID); + firstInput.setProperties(Collections.emptyList()); + input.setStates(ImmutableList.of(firstInput)); + when(_dataFetchingEnvironment.getArgument("input")).thenReturn(input); + + final BatchUpdateStepStatesResult actualBatchResult = _resolver.get(_dataFetchingEnvironment).join(); + assertNotNull(actualBatchResult); + assertEquals(1, actualBatchResult.getResults().size()); + verify(_entityClient, times(1)).ingestProposal(any(), eq(_authentication), eq(false)); + } +} diff --git a/datahub-ranger-plugin/build.gradle b/datahub-ranger-plugin/build.gradle deleted file mode 100644 index a08d3f2b1e4c9..0000000000000 --- a/datahub-ranger-plugin/build.gradle +++ /dev/null @@ -1,110 +0,0 @@ -apply plugin: 'java' -apply plugin: 'signing' -apply plugin: 'maven-publish' -apply plugin: 'io.codearte.nexus-staging' -apply from: '../metadata-integration/java/versioning.gradle' - - -repositories { - mavenCentral() -} - -java { - withJavadocJar() - withSourcesJar() -} - -test { - useJUnit() -} - -compileJava { - sourceCompatibility = '1.8' - targetCompatibility = '1.8' -} - -dependencies { - implementation(externalDependency.rangerCommons) { - exclude group: "org.apache.htrace", module: "htrace-core4" - } - implementation externalDependency.hadoopCommon3 - implementation externalDependency.log4j12Api - - constraints { - implementation(externalDependency.woodstoxCore) { - because("previous versions are vulnerable to CVE-2022-40151 CVE-2022-40152") - } - implementation(externalDependency.jettyClient) { - because("previous versions are vulnerable to CVE-2021-28165") - } - implementation(externalDependency.jettison) { - because("previous versions are vulnerable to CVE-2022-40149 CVE-2022-40150") - } - } - - testCompile externalDependency.testng -} - - -publishing { - publications { - mavenJava(MavenPublication) { - from components.java - pom { - name = 'DataHub Apache Ranger Plugin' - group = 'io.acryl' - artifactId = 'datahub-ranger-plugin' - description = 'DataHub Apache Ranger plugin for authorization of DataHub resources' - url = 'https://datahubproject.io' - scm { - connection = 'scm:git:git://github.com/datahub-project/datahub.git' - developerConnection = 'scm:git:ssh://github.com:datahub-project/datahub.git' - url = 'https://github.com/datahub-project/datahub.git' - } - - licenses { - license { - name = 'The Apache License, Version 2.0' - url = 'http://www.apache.org/licenses/LICENSE-2.0.txt' - } - } - - developers { - developer { - id = 'datahub' - name = 'DataHub' - email = 'datahub@acryl.io' - } - } - } - } - } - - repositories { - maven { - def releasesRepoUrl = "https://s01.oss.sonatype.org/service/local/staging/deploy/maven2/" - def snapshotsRepoUrl = "https://s01.oss.sonatype.org/content/repositories/snapshots/" - def ossrhUsername = System.getenv('RELEASE_USERNAME') - def ossrhPassword = System.getenv('RELEASE_PASSWORD') - credentials { - username ossrhUsername - password ossrhPassword - } - url = version.endsWith('SNAPSHOT') ? snapshotsRepoUrl : releasesRepoUrl - } - } -} - -signing { - def signingKey = findProperty("signingKey") - def signingPassword = System.getenv("SIGNING_PASSWORD") - useInMemoryPgpKeys(signingKey, signingPassword) - sign publishing.publications.mavenJava -} - -// Required to submit jar file to staging repo of maven central -nexusStaging { - serverUrl = "https://s01.oss.sonatype.org/service/local/" //required only for projects registered in Sonatype after 2021-02-24 - username = System.getenv("NEXUS_USERNAME") - password = System.getenv("NEXUS_PASSWORD") -} diff --git a/datahub-ranger-plugin/conf/auth-plugin-configuration-configMap.kubernetes.yaml b/datahub-ranger-plugin/conf/auth-plugin-configuration-configMap.kubernetes.yaml deleted file mode 100644 index 36644d1120b47..0000000000000 --- a/datahub-ranger-plugin/conf/auth-plugin-configuration-configMap.kubernetes.yaml +++ /dev/null @@ -1,79 +0,0 @@ -apiVersion: v1 -kind: ConfigMap -metadata: - name: auth-plugin-configuration -data: - ranger-datahub-security.xml: | - - - - ranger.plugin.datahub.policy.rest.url - http://[RANGER_SERVICE_HOST]:[RANGER_SERVICE_PORT] - - URL to Ranger Admin - - - - - ranger.plugin.datahub.service.name - datahub - - Name of the Ranger service containing policies for this datahub instance - - - - - ranger.plugin.datahub.policy.source.impl - org.apache.ranger.admin.client.RangerAdminRESTClient - - Class to retrieve policies from the source - - - - - ranger.plugin.datahub.policy.rest.ssl.config.file - ranger-policymgr-ssl.xml - - Path to the file containing SSL details to contact Ranger Admin - - - - - ranger.plugin.datahub.policy.pollIntervalMs - 30000 - - How often to poll for changes in policies? - - - - - ranger.plugin.datahub.policy.cache.dir - /tmp - - Directory where Ranger policies are cached after successful retrieval from the source - - - - - ranger.plugin.datahub.policy.rest.client.connection.timeoutMs - 120000 - - RangerRestClient Connection Timeout in Milli Seconds - - - - - ranger.plugin.datahub.policy.rest.client.read.timeoutMs - 30000 - - RangerRestClient read Timeout in Milli Seconds - - - - ranger.plugin.datahub.service.name - ranger_datahub - - Name of the Ranger service containing policies for this datahub instance - - - diff --git a/datahub-ranger-plugin/conf/ranger-datahub-audit.xml b/datahub-ranger-plugin/conf/ranger-datahub-audit.xml deleted file mode 100644 index e7a94b2d5befa..0000000000000 --- a/datahub-ranger-plugin/conf/ranger-datahub-audit.xml +++ /dev/null @@ -1,79 +0,0 @@ - - - - - - - xasecure.audit.destination.db - false - - - - xasecure.audit.destination.db.jdbc.driver - com.mysql.jdbc.Driver - - - - xasecure.audit.destination.db.jdbc.url - jdbc:mysql://localhost/ranger_audit - - - - xasecure.audit.destination.db.password - rangerlogger - - - - xasecure.audit.destination.db.user - rangerlogger - - - - xasecure.audit.destination.db.batch.filespool.dir - /tmp/audit/db/spool - - - - - - xasecure.audit.destination.hdfs - false - - - - xasecure.audit.destination.hdfs.dir - hdfs://localhost:8020/ranger/audit - - - - xasecure.audit.destination.hdfs.batch.filespool.dir - /tmp/audit/hdfs/spool - - - - - - xasecure.audit.destination.log4j - true - - - - xasecure.audit.destination.log4j.logger - ranger_audit_logger - - \ No newline at end of file diff --git a/datahub-ranger-plugin/conf/ranger-datahub-security.xml b/datahub-ranger-plugin/conf/ranger-datahub-security.xml deleted file mode 100644 index d9947873c3326..0000000000000 --- a/datahub-ranger-plugin/conf/ranger-datahub-security.xml +++ /dev/null @@ -1,90 +0,0 @@ - - - - - - ranger.plugin.datahub.policy.rest.url - http://[RANGER_SERVICE_HOST]:[RANGER_SERVICE_PORT] - - URL to Ranger Admin - - - - - ranger.plugin.datahub.service.name - datahub - - Name of the Ranger service containing policies for this datahub instance - - - - - ranger.plugin.datahub.policy.source.impl - org.apache.ranger.admin.client.RangerAdminRESTClient - - Class to retrieve policies from the source - - - - - ranger.plugin.datahub.policy.rest.ssl.config.file - ranger-policymgr-ssl.xml - - Path to the file containing SSL details to contact Ranger Admin - - - - - ranger.plugin.datahub.policy.pollIntervalMs - 30000 - - How often to poll for changes in policies? - - - - - ranger.plugin.datahub.policy.cache.dir - /tmp - - Directory where Ranger policies are cached after successful retrieval from the source - - - - - ranger.plugin.datahub.policy.rest.client.connection.timeoutMs - 120000 - - RangerRestClient Connection Timeout in Milli Seconds - - - - - ranger.plugin.datahub.policy.rest.client.read.timeoutMs - 30000 - - RangerRestClient read Timeout in Milli Seconds - - - - ranger.plugin.datahub.service.name - ranger_datahub - - Name of the Ranger service containing policies for this datahub instance - - - diff --git a/datahub-ranger-plugin/conf/servicedef.json b/datahub-ranger-plugin/conf/servicedef.json deleted file mode 100644 index 33bec319ff124..0000000000000 --- a/datahub-ranger-plugin/conf/servicedef.json +++ /dev/null @@ -1,443 +0,0 @@ -{ - "name": "datahub", - "label": "DataHub Ranger Plugin", - "description": "DataHub metadata service ranger plugin", - "guid": "4b4d4245-dd79-45fa-8854-8eb7cb1c37ad", - "implClass": "com.datahub.authorizer.plugin.ranger.DataHubRangerAuthPlugin", - "version": 1, - "isEnabled": 1, - "resources": [ - { - "itemId": 1, - "name": "platform", - "type": "string", - "level": 10, - "parent": "", - "mandatory": true, - "lookupSupported": true, - "recursiveSupported": true, - "excludesSupported": true, - "matcher": "org.apache.ranger.plugin.resourcematcher.RangerPathResourceMatcher", - "matcherOptions": { - "wildCard": true, - "ignoreCase": true - }, - "validationRegEx": "", - "validationMessage": "", - "uiHint": "", - "label": "platform", - "description": "This resource is added to simulate DataHub platform policy" - }, - { - "itemId": 2, - "name": "dataset", - "type": "string", - "level": 10, - "parent": "", - "mandatory": true, - "lookupSupported": true, - "recursiveSupported": true, - "excludesSupported": true, - "matcher": "org.apache.ranger.plugin.resourcematcher.RangerPathResourceMatcher", - "matcherOptions": { - "wildCard": true, - "ignoreCase": true - }, - "validationRegEx": "", - "validationMessage": "", - "uiHint": "", - "label": "dataset", - "description": "DataHub dataset resource" - }, - { - "itemId": 3, - "name": "dashboard", - "type": "string", - "level": 10, - "parent": "", - "mandatory": true, - "lookupSupported": true, - "recursiveSupported": true, - "excludesSupported": true, - "matcher": "org.apache.ranger.plugin.resourcematcher.RangerPathResourceMatcher", - "matcherOptions": { - "wildCard": true, - "ignoreCase": true - }, - "validationRegEx": "", - "validationMessage": "", - "uiHint": "", - "label": "dashboard", - "description": "DataHub dashboard resource" - }, - { - "itemId": 4, - "name": "chart", - "type": "string", - "level": 10, - "parent": "", - "mandatory": true, - "lookupSupported": true, - "recursiveSupported": true, - "excludesSupported": true, - "matcher": "org.apache.ranger.plugin.resourcematcher.RangerPathResourceMatcher", - "matcherOptions": { - "wildCard": true, - "ignoreCase": true - }, - "validationRegEx": "", - "validationMessage": "", - "uiHint": "", - "label": "chart", - "description": "DataHub chart resource" - }, - { - "itemId": 5, - "name": "dataflow", - "type": "string", - "level": 10, - "parent": "", - "mandatory": true, - "lookupSupported": true, - "recursiveSupported": true, - "excludesSupported": true, - "matcher": "org.apache.ranger.plugin.resourcematcher.RangerPathResourceMatcher", - "matcherOptions": { - "wildCard": true, - "ignoreCase": true - }, - "validationRegEx": "", - "validationMessage": "", - "uiHint": "", - "label": "dataFlow", - "description": "DataHub dataFlow resource" - }, - { - "itemId": 6, - "name": "datajob", - "type": "string", - "level": 10, - "parent": "", - "mandatory": true, - "lookupSupported": true, - "recursiveSupported": true, - "excludesSupported": true, - "matcher": "org.apache.ranger.plugin.resourcematcher.RangerPathResourceMatcher", - "matcherOptions": { - "wildCard": true, - "ignoreCase": true - }, - "validationRegEx": "", - "validationMessage": "", - "uiHint": "", - "label": "dataJob", - "description": "DataHub dataJob resource" - }, - { - "itemId": 7, - "name": "tag", - "type": "string", - "level": 10, - "parent": "", - "mandatory": true, - "lookupSupported": true, - "recursiveSupported": true, - "excludesSupported": true, - "matcher": "org.apache.ranger.plugin.resourcematcher.RangerPathResourceMatcher", - "matcherOptions": { - "wildCard": true, - "ignoreCase": true - }, - "validationRegEx": "", - "validationMessage": "", - "uiHint": "", - "label": "tag", - "description": "DataHub tag resource" - }, - { - "itemId": 8, - "name": "container", - "type": "string", - "level": 10, - "parent": "", - "mandatory": true, - "lookupSupported": true, - "recursiveSupported": true, - "excludesSupported": true, - "matcher": "org.apache.ranger.plugin.resourcematcher.RangerPathResourceMatcher", - "matcherOptions": { - "wildCard": true, - "ignoreCase": true - }, - "validationRegEx": "", - "validationMessage": "", - "uiHint": "", - "label": "container", - "description": "DataHub container resource" - }, - { - "itemId": 9, - "name": "domain", - "type": "string", - "level": 10, - "parent": "", - "mandatory": true, - "lookupSupported": true, - "recursiveSupported": true, - "excludesSupported": true, - "matcher": "org.apache.ranger.plugin.resourcematcher.RangerPathResourceMatcher", - "matcherOptions": { - "wildCard": true, - "ignoreCase": true - }, - "validationRegEx": "", - "validationMessage": "", - "uiHint": "", - "label": "domain", - "description": "DataHub domain resource" - }, - { - "itemId": 10, - "name": "glossaryterm", - "type": "string", - "level": 10, - "parent": "", - "mandatory": true, - "lookupSupported": true, - "recursiveSupported": true, - "excludesSupported": true, - "matcher": "org.apache.ranger.plugin.resourcematcher.RangerPathResourceMatcher", - "matcherOptions": { - "wildCard": true, - "ignoreCase": true - }, - "validationRegEx": "", - "validationMessage": "", - "uiHint": "", - "label": "glossaryTerm", - "description": "DataHub glossaryTerm resource" - }, - { - "itemId": 11, - "name": "corpgroup", - "type": "string", - "level": 10, - "parent": "", - "mandatory": true, - "lookupSupported": true, - "recursiveSupported": true, - "excludesSupported": true, - "matcher": "org.apache.ranger.plugin.resourcematcher.RangerPathResourceMatcher", - "matcherOptions": { - "wildCard": true, - "ignoreCase": true - }, - "validationRegEx": "", - "validationMessage": "", - "uiHint": "", - "label": "corpGroup", - "description": "DataHub corpGroup resource" - }, - { - "itemId": 12, - "name": "corpuser", - "type": "string", - "level": 10, - "parent": "", - "mandatory": true, - "lookupSupported": true, - "recursiveSupported": true, - "excludesSupported": true, - "matcher": "org.apache.ranger.plugin.resourcematcher.RangerPathResourceMatcher", - "matcherOptions": { - "wildCard": true, - "ignoreCase": true - }, - "validationRegEx": "", - "validationMessage": "", - "uiHint": "", - "label": "corpUser", - "description": "DataHub corpUser resource" - }, - { - "itemId": 13, - "name": "notebook", - "type": "string", - "level": 10, - "parent": "", - "mandatory": true, - "lookupSupported": true, - "recursiveSupported": true, - "excludesSupported": true, - "matcher": "org.apache.ranger.plugin.resourcematcher.RangerPathResourceMatcher", - "matcherOptions": { - "wildCard": true, - "ignoreCase": true - }, - "validationRegEx": "", - "validationMessage": "", - "uiHint": "", - "label": "notebook", - "description": "DataHub notebook resource" - } - - ], - "accessTypes": [ - { - "itemId": 1, - "name": "MANAGE_POLICIES", - "label": "MANAGE_POLICIES" - }, - { - "itemId": 2, - "name": "MANAGE_INGESTION", - "label": "MANAGE_INGESTION" - }, - { - "itemId": 3, - "name": "MANAGE_SECRETS", - "label": "MANAGE_SECRETS" - }, - { - "itemId": 4, - "name": "MANAGE_USERS_AND_GROUPS", - "label": "MANAGE_USERS_AND_GROUPS" - }, - { - "itemId": 5, - "name": "VIEW_ANALYTICS", - "label": "VIEW_ANALYTICS" - }, - { - "itemId": 6, - "name": "GENERATE_PERSONAL_ACCESS_TOKENS", - "label": "GENERATE_PERSONAL_ACCESS_TOKENS" - }, - { - "itemId": 7, - "name": "MANAGE_DOMAINS", - "label": "MANAGE_DOMAINS" - }, - { - "itemId": 8, - "name": "VIEW_ENTITY_PAGE", - "label": "VIEW_ENTITY_PAGE" - }, - { - "itemId": 9, - "name": "EDIT_ENTITY_TAGS", - "label": "EDIT_ENTITY_TAGS" - }, - { - "itemId": 10, - "name": "EDIT_ENTITY_GLOSSARY_TERMS", - "label": "EDIT_ENTITY_GLOSSARY_TERMS" - }, - { - "itemId": 11, - "name": "EDIT_ENTITY_OWNERS", - "label": "EDIT_ENTITY_OWNERS" - }, - { - "itemId": 12, - "name": "EDIT_ENTITY_DOCS", - "label": "EDIT_ENTITY_DOCS" - }, - { - "itemId": 13, - "name": "EDIT_ENTITY_DOC_LINKS", - "label": "EDIT_ENTITY_DOC_LINKS" - }, - { - "itemId": 14, - "name": "EDIT_ENTITY_STATUS", - "label": "EDIT_ENTITY_STATUS" - }, - { - "itemId": 15, - "name": "EDIT_DOMAINS", - "label": "EDIT_DOMAINS" - }, - { - "itemId": 16, - "name": "EDIT_DEPRECATION", - "label": "EDIT_DEPRECATION" - }, - { - "itemId": 17, - "name": "EDIT_ENTITY_ASSERTIONS", - "label": "EDIT_ENTITY_ASSERTIONS" - }, - { - "itemId": 18, - "name": "EDIT_ENTITY", - "label": "EDIT_ENTITY" - }, - { - "itemId": 19, - "name": "EDIT_DATASET_COL_TAGS", - "label": "EDIT_DATASET_COL_TAGS" - }, - { - "itemId": 20, - "name": "EDIT_DATASET_COL_GLOSSARY_TERMS", - "label": "EDIT_DATASET_COL_GLOSSARY_TERMS" - }, - { - "itemId": 21, - "name": "EDIT_DATASET_COL_DESCRIPTION", - "label": "EDIT_DATASET_COL_DESCRIPTION" - }, - { - "itemId": 22, - "name": "VIEW_DATASET_USAGE", - "label": "VIEW_DATASET_USAGE" - }, - { - "itemId": 23, - "name": "VIEW_DATASET_PROFILE", - "label": "VIEW_DATASET_PROFILE" - }, - { - "itemId": 24, - "name": "EDIT_TAG_COLOR", - "label": "EDIT_TAG_COLOR" - }, - { - "itemId": 25, - "name": "EDIT_GROUP_MEMBERS", - "label": "EDIT_GROUP_MEMBERS" - }, - { - "itemId": 26, - "name": "EDIT_USER_PROFILE", - "label": "EDIT_USER_PROFILE" - }, - { - "itemId": 27, - "name": "EDIT_CONTACT_INFO", - "label": "EDIT_CONTACT_INFO" - }, - { - "itemId": 28, - "name": "COMMON_ENTITYS", - "label": "COMMON_ENTITYS" - }, - { - "itemId": 29, - "name": "EDIT_ENTITY_DOMAINS", - "label": "EDIT_ENTITY_DOMAINS" - }, - { - "itemId": 30, - "name": "EDIT_ENTITY_DEPRECATION", - "label": "EDIT_ENTITY_DEPRECATION" - } - ], - "enums": [ - ], - "contextEnrichers": [ - ], - "policyConditions": [ - ] -} diff --git a/datahub-ranger-plugin/src/main/java/com/datahub/authorizer/plugin/ranger/DataHubRangerAuthPlugin.java b/datahub-ranger-plugin/src/main/java/com/datahub/authorizer/plugin/ranger/DataHubRangerAuthPlugin.java deleted file mode 100644 index f339623052ffd..0000000000000 --- a/datahub-ranger-plugin/src/main/java/com/datahub/authorizer/plugin/ranger/DataHubRangerAuthPlugin.java +++ /dev/null @@ -1,56 +0,0 @@ -package com.datahub.authorizer.plugin.ranger; - -import org.apache.ranger.plugin.client.BaseClient; -import org.apache.log4j.Logger; -import org.apache.ranger.plugin.service.RangerBaseService; -import org.apache.ranger.plugin.service.ResourceLookupContext; - -import java.util.HashMap; -import java.util.List; -import java.util.Map; - - -/** - * Datahub Apache Ranger Plugin. - * It assists in creating policies on Apache Ranger Admin Portal. - * - */ -public class DataHubRangerAuthPlugin extends RangerBaseService { - private static Logger log = Logger.getLogger(DataHubRangerAuthPlugin.class.getName()); - - /** - * This is dummy function. As this plugin doesn't have any configuration - * @return A Map with success message - * @throws Exception - */ - @Override - public Map validateConfig() throws Exception { - throw new UnsupportedOperationException("validateConfig is not supported."); - } - - /** - * This is dummy function. As this plugin doesn't support the resource lookup - * @param resourceLookupContext - * @return Empty list of string - * @throws Exception - */ - @Override - public List lookupResource(ResourceLookupContext resourceLookupContext) throws Exception { - throw new UnsupportedOperationException("lookupResource is not supported."); - } - - private Map returnSuccessMap() { - String message = "Connection test successful"; - Map retMap = new HashMap<>(); - BaseClient.generateResponseDataMap(true, message, message, null, null, retMap); - return retMap; - } - - private Map returnFailMap() { - String message = "Connection test fail"; - Map retMap = new HashMap<>(); - BaseClient.generateResponseDataMap(false, message, message, null, null, retMap); - return retMap; - } - -} diff --git a/datahub-ranger-plugin/src/test/java/com/datahub/authorizer/plugin/ranger/TestDataHubRangerAuthPlugin.java b/datahub-ranger-plugin/src/test/java/com/datahub/authorizer/plugin/ranger/TestDataHubRangerAuthPlugin.java deleted file mode 100644 index 69d2146b24474..0000000000000 --- a/datahub-ranger-plugin/src/test/java/com/datahub/authorizer/plugin/ranger/TestDataHubRangerAuthPlugin.java +++ /dev/null @@ -1,18 +0,0 @@ -package com.datahub.authorizer.plugin.ranger; - -import org.junit.Test; - - -public class TestDataHubRangerAuthPlugin { - @Test(expected = UnsupportedOperationException.class) - public void testValidateConfig() throws Exception { - DataHubRangerAuthPlugin datahubRangerAuthPlugin = new DataHubRangerAuthPlugin(); - datahubRangerAuthPlugin.validateConfig(); - } - - @Test(expected = UnsupportedOperationException.class) - public void testLookupResource() throws Exception { - DataHubRangerAuthPlugin datahubRangerAuthPlugin = new DataHubRangerAuthPlugin(); - datahubRangerAuthPlugin.lookupResource(null); - } -} diff --git a/datahub-web-react/README.md b/datahub-web-react/README.md index 891735a7c55e9..90b0e375af4c9 100644 --- a/datahub-web-react/README.md +++ b/datahub-web-react/README.md @@ -9,7 +9,6 @@ This module contains a React application that serves as the DataHub UI. Feel free to take a look around, deploy, and contribute. -For details about the motivation please see [this RFC](../docs/rfc/active/2055-react-app/README.md). ## Functional Goals The initial milestone for the app was to achieve functional parity with the previous Ember app. This meant supporting diff --git a/datahub-web-react/codegen.yml b/datahub-web-react/codegen.yml index addcd229f8564..20ed09e2ec924 100644 --- a/datahub-web-react/codegen.yml +++ b/datahub-web-react/codegen.yml @@ -9,6 +9,7 @@ schema: - '../datahub-graphql-core/src/main/resources/ingestion.graphql' - '../datahub-graphql-core/src/main/resources/timeline.graphql' - '../datahub-graphql-core/src/main/resources/tests.graphql' + - '../datahub-graphql-core/src/main/resources/step.graphql' config: scalars: Long: number diff --git a/datahub-web-react/package.json b/datahub-web-react/package.json index c1c3f0d41fb89..976825894f04f 100644 --- a/datahub-web-react/package.json +++ b/datahub-web-react/package.json @@ -85,6 +85,7 @@ "react-syntax-highlighter": "^15.4.4", "react-timezone-select": "^1.1.15", "react-visibility-sensor": "^5.1.1", + "reactour": "1.18.7", "sinon": "^11.1.1", "start-server-and-test": "1.12.2", "styled-components": "^5.2.1", diff --git a/datahub-web-react/src/app/ProtectedRoutes.tsx b/datahub-web-react/src/app/ProtectedRoutes.tsx index c7230c256caf1..d1370ab882f68 100644 --- a/datahub-web-react/src/app/ProtectedRoutes.tsx +++ b/datahub-web-react/src/app/ProtectedRoutes.tsx @@ -4,6 +4,7 @@ import { Layout } from 'antd'; import { HomePage } from './home/HomePage'; import AppConfigProvider from '../AppConfigProvider'; import { SearchRoutes } from './SearchRoutes'; +import { EducationStepsProvider } from '../providers/EducationStepsProvider'; /** * Container for all views behind an authentication wall. @@ -11,14 +12,16 @@ import { SearchRoutes } from './SearchRoutes'; export const ProtectedRoutes = (): JSX.Element => { return ( - - - - } /> - } /> - + + + + + } /> + } /> + + - + ); }; diff --git a/datahub-web-react/src/app/domain/DomainsList.tsx b/datahub-web-react/src/app/domain/DomainsList.tsx index ea6bf5e5778c3..8ffdcbe956310 100644 --- a/datahub-web-react/src/app/domain/DomainsList.tsx +++ b/datahub-web-react/src/app/domain/DomainsList.tsx @@ -14,6 +14,8 @@ import { SearchBar } from '../search/SearchBar'; import { useEntityRegistry } from '../useEntityRegistry'; import { scrollToTop } from '../shared/searchUtils'; import { addToListDomainsCache, removeFromListDomainsCache } from './utils'; +import { OnboardingTour } from '../onboarding/OnboardingTour'; +import { DOMAINS_INTRO_ID, DOMAINS_CREATE_DOMAIN_ID } from '../onboarding/config/DomainsOnboardingConfig'; const DomainsContainer = styled.div``; @@ -87,9 +89,10 @@ export const DomainsList = () => { <> {!data && loading && } {error && } + - = { urn: string; @@ -164,6 +165,7 @@ export const EntityProfile = ({ const [sidebarWidth, setSidebarWidth] = useState(window.innerWidth * 0.25); const [browserWidth, setBrowserWith] = useState(window.innerWidth * 0.2); const [shouldUpdateBrowser, setShouldUpdateBrowser] = useState(false); + const stepIds: string[] = getOnboardingStepIdsForEntityType(entityType); function refreshBrowser() { setShouldUpdateBrowser(true); @@ -298,6 +300,7 @@ export const EntityProfile = ({ }} > <> + {customNavBar} {showBrowseBar && !customNavBar && } diff --git a/datahub-web-react/src/app/entity/shared/containers/profile/header/EntityTabs.tsx b/datahub-web-react/src/app/entity/shared/containers/profile/header/EntityTabs.tsx index cfe44b7c27249..0b02953d4f81c 100644 --- a/datahub-web-react/src/app/entity/shared/containers/profile/header/EntityTabs.tsx +++ b/datahub-web-react/src/app/entity/shared/containers/profile/header/EntityTabs.tsx @@ -1,6 +1,6 @@ import React, { useEffect } from 'react'; import { Tabs } from 'antd'; -import styled from 'styled-components'; +import styled from 'styled-components/macro'; import { EntityTab } from '../../../types'; import { useBaseEntity, useEntityData, useRouteToTab } from '../../../EntityContext'; diff --git a/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/Domain/SidebarDomainSection.tsx b/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/Domain/SidebarDomainSection.tsx index 96607742df256..0e1eafcfecab9 100644 --- a/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/Domain/SidebarDomainSection.tsx +++ b/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/Domain/SidebarDomainSection.tsx @@ -7,6 +7,7 @@ import { SidebarHeader } from '../SidebarHeader'; import { SetDomainModal } from './SetDomainModal'; import { useUnsetDomainMutation } from '../../../../../../../graphql/mutations.generated'; import { DomainLink } from '../../../../../../shared/tags/DomainLink'; +import { ENTITY_PROFILE_DOMAINS_ID } from '../../../../../../onboarding/config/EntityProfileOnboardingConfig'; export const SidebarDomainSection = () => { const { entityData } = useEntityData(); @@ -46,38 +47,40 @@ export const SidebarDomainSection = () => { return (
- -
- {domain && ( - { - e.preventDefault(); - onRemoveDomain(entityData?.domain?.associatedUrn); +
+ +
+ {domain && ( + { + e.preventDefault(); + onRemoveDomain(entityData?.domain?.associatedUrn); + }} + /> + )} + {!domain && ( + <> + + {EMPTY_MESSAGES.domain.title}. {EMPTY_MESSAGES.domain.description} + + + + )} +
+ {showModal && ( + { + setShowModal(false); }} /> )} - {!domain && ( - <> - - {EMPTY_MESSAGES.domain.title}. {EMPTY_MESSAGES.domain.description} - - - - )}
- {showModal && ( - { - setShowModal(false); - }} - /> - )}
); }; diff --git a/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/Ownership/SidebarOwnerSection.tsx b/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/Ownership/SidebarOwnerSection.tsx index 3ee949732eddf..234160cebdf77 100644 --- a/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/Ownership/SidebarOwnerSection.tsx +++ b/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/Ownership/SidebarOwnerSection.tsx @@ -6,6 +6,7 @@ import { EMPTY_MESSAGES } from '../../../../constants'; import { useEntityData, useMutationUrn, useRefetch } from '../../../../EntityContext'; import { SidebarHeader } from '../SidebarHeader'; import { EditOwnersModal } from './EditOwnersModal'; +import { ENTITY_PROFILE_OWNERS_ID } from '../../../../../../onboarding/config/EntityProfileOnboardingConfig'; export const SidebarOwnerSection = ({ properties }: { properties?: any }) => { const { entityType, entityData } = useEntityData(); @@ -16,7 +17,7 @@ export const SidebarOwnerSection = ({ properties }: { properties?: any }) => { const ownersEmpty = !entityData?.ownership?.owners?.length; return ( -
+
{entityData?.ownership?.owners?.map((owner) => ( diff --git a/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/SidebarTagsSection.tsx b/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/SidebarTagsSection.tsx index dd7f0c440f164..8c36b621fc7f0 100644 --- a/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/SidebarTagsSection.tsx +++ b/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/SidebarTagsSection.tsx @@ -4,6 +4,10 @@ import styled from 'styled-components'; import TagTermGroup from '../../../../../shared/tags/TagTermGroup'; import { SidebarHeader } from './SidebarHeader'; import { useEntityData, useMutationUrn, useRefetch } from '../../../EntityContext'; +import { + ENTITY_PROFILE_GLOSSARY_TERMS_ID, + ENTITY_PROFILE_TAGS_ID, +} from '../../../../../onboarding/config/EntityProfileOnboardingConfig'; const TermSection = styled.div` margin-top: 20px; @@ -21,27 +25,31 @@ export const SidebarTagsSection = ({ properties }: { properties?: any }) => { return (
- - - - + + + + + + + +
); diff --git a/datahub-web-react/src/app/entity/shared/containers/profile/utils.ts b/datahub-web-react/src/app/entity/shared/containers/profile/utils.ts index d467a944e5d71..fd3f2ad4c86bd 100644 --- a/datahub-web-react/src/app/entity/shared/containers/profile/utils.ts +++ b/datahub-web-react/src/app/entity/shared/containers/profile/utils.ts @@ -6,6 +6,17 @@ import { useEntityRegistry } from '../../../../useEntityRegistry'; import EntityRegistry from '../../../EntityRegistry'; import { EntityTab, GenericEntityProperties } from '../../types'; import { useIsSeparateSiblingsMode, SEPARATE_SIBLINGS_URL_PARAM } from '../../siblingUtils'; +import { + ENTITY_PROFILE_DOCUMENTATION_ID, + ENTITY_PROFILE_DOMAINS_ID, + ENTITY_PROFILE_ENTITIES_ID, + ENTITY_PROFILE_GLOSSARY_TERMS_ID, + ENTITY_PROFILE_LINEAGE_ID, + ENTITY_PROFILE_OWNERS_ID, + ENTITY_PROFILE_PROPERTIES_ID, + ENTITY_PROFILE_SCHEMA_ID, + ENTITY_PROFILE_TAGS_ID, +} from '../../../../onboarding/config/EntityProfileOnboardingConfig'; export function getDataForEntityType({ data: entityData, @@ -107,3 +118,42 @@ export function useEntityQueryParams() { return response; } + +export function getOnboardingStepIdsForEntityType(entityType: EntityType): string[] { + switch (entityType) { + case EntityType.Chart: + return [ + ENTITY_PROFILE_DOCUMENTATION_ID, + ENTITY_PROFILE_PROPERTIES_ID, + ENTITY_PROFILE_LINEAGE_ID, + ENTITY_PROFILE_TAGS_ID, + ENTITY_PROFILE_GLOSSARY_TERMS_ID, + ENTITY_PROFILE_OWNERS_ID, + ENTITY_PROFILE_DOMAINS_ID, + ]; + case EntityType.Container: + return [ + ENTITY_PROFILE_ENTITIES_ID, + ENTITY_PROFILE_DOCUMENTATION_ID, + ENTITY_PROFILE_PROPERTIES_ID, + ENTITY_PROFILE_OWNERS_ID, + ENTITY_PROFILE_TAGS_ID, + ENTITY_PROFILE_GLOSSARY_TERMS_ID, + ENTITY_PROFILE_DOMAINS_ID, + ]; + case EntityType.Dataset: + return [ + ENTITY_PROFILE_SCHEMA_ID, + ENTITY_PROFILE_DOCUMENTATION_ID, + ENTITY_PROFILE_PROPERTIES_ID, + ENTITY_PROFILE_LINEAGE_ID, + ENTITY_PROFILE_OWNERS_ID, + ENTITY_PROFILE_TAGS_ID, + ENTITY_PROFILE_GLOSSARY_TERMS_ID, + ENTITY_PROFILE_DOMAINS_ID, + ]; + break; + default: + return []; + } +} diff --git a/datahub-web-react/src/app/glossary/BusinessGlossaryPage.tsx b/datahub-web-react/src/app/glossary/BusinessGlossaryPage.tsx index a2a763ef796db..c85eb61603579 100644 --- a/datahub-web-react/src/app/glossary/BusinessGlossaryPage.tsx +++ b/datahub-web-react/src/app/glossary/BusinessGlossaryPage.tsx @@ -15,6 +15,12 @@ import { Message } from '../shared/Message'; import { sortGlossaryTerms } from '../entity/glossaryTerm/utils'; import { useEntityRegistry } from '../useEntityRegistry'; import { sortGlossaryNodes } from '../entity/glossaryNode/utils'; +import { + BUSINESS_GLOSSARY_INTRO_ID, + BUSINESS_GLOSSARY_CREATE_TERM_ID, + BUSINESS_GLOSSARY_CREATE_TERM_GROUP_ID, +} from '../onboarding/config/BusinessGlossaryOnboardingConfig'; +import { OnboardingTour } from '../onboarding/OnboardingTour'; export const HeaderWrapper = styled(TabToolbar)` padding: 15px 45px 10px 24px; @@ -72,6 +78,13 @@ function BusinessGlossaryPage() { return ( <> + {(termsLoading || nodesLoading) && ( @@ -94,10 +107,18 @@ function BusinessGlossaryPage() { Business Glossary
- -
diff --git a/datahub-web-react/src/app/home/GettingStartedModal.tsx b/datahub-web-react/src/app/home/GettingStartedModal.tsx deleted file mode 100644 index f159c59bf2987..0000000000000 --- a/datahub-web-react/src/app/home/GettingStartedModal.tsx +++ /dev/null @@ -1,107 +0,0 @@ -import React from 'react'; -import styled from 'styled-components'; -import { Link } from 'react-router-dom'; -import { Divider, Image, Modal, Steps, Typography } from 'antd'; -import pipinstall from '../../images/pipinstall.png'; -import recipeExample from '../../images/recipe-example.png'; -import ingestExample from '../../images/ingest-example.png'; - -const StyledModal = styled(Modal)` - top: 20px; -`; - -const StepImage = styled(Image)` - width: auto; - object-fit: contain; - margin-right: 10px; - background-color: transparent; - border-radius: 8px; -`; - -const GettingStartedParagraph = styled(Typography.Paragraph)` - font-size: 14px; - && { - margin-bottom: 28px; - } -`; - -const SectionTitle = styled(Typography.Title)` - && { - margin-bottom: 12px; - } -`; - -type Props = { - visible: boolean; - onClose: () => void; -}; - -export const GettingStartedModal = ({ visible, onClose }: Props) => { - return ( - - Welcome to DataHub - - Getting Started - - It looks like you're new to DataHub - Welcome! To start ingesting metadata, follow these steps or - check out the full{' '} - - Metadata Ingestion Quickstart Guide. - - - UI Ingestion - - Start integrating your data sources immediately by navigating to the{' '} - Ingestion tab. - - CLI Ingestion - - - - From your command line, install the acryl-datahub package from PyPI. - - - - } - /> - - - Define a YAML file defining the source from which you wish to extract metadata. This is - where you'll tell DataHub how to connect to your data source and configure the - metadata to be extracted. - - - - } - /> - - - Execute the datahub ingest command from your command line to ingest metadata into - DataHub. - - - - } - /> - - - That's it! Once you've ingested metadata, you can begin to search, document, tag, and assign - ownership for your data assets. - - Still have questions? - - Join our Slack to ask questions, provide feedback and - more. - - - ); -}; diff --git a/datahub-web-react/src/app/home/HomePage.tsx b/datahub-web-react/src/app/home/HomePage.tsx index 2bb2981566285..eb19134e68738 100644 --- a/datahub-web-react/src/app/home/HomePage.tsx +++ b/datahub-web-react/src/app/home/HomePage.tsx @@ -2,11 +2,30 @@ import React from 'react'; import { HomePageHeader } from './HomePageHeader'; import { HomePageBody } from './HomePageBody'; import analytics, { EventType } from '../analytics'; +import { OnboardingTour } from '../onboarding/OnboardingTour'; +import { + GLOBAL_WELCOME_TO_DATAHUB_ID, + HOME_PAGE_INGESTION_ID, + HOME_PAGE_DOMAINS_ID, + HOME_PAGE_MOST_POPULAR_ID, + HOME_PAGE_PLATFORMS_ID, + HOME_PAGE_SEARCH_BAR_ID, +} from '../onboarding/config/HomePageOnboardingConfig'; export const HomePage = () => { analytics.event({ type: EventType.HomePageViewEvent }); return ( <> + diff --git a/datahub-web-react/src/app/home/HomePageHeader.tsx b/datahub-web-react/src/app/home/HomePageHeader.tsx index db00b4240c7db..b51adc2f31a56 100644 --- a/datahub-web-react/src/app/home/HomePageHeader.tsx +++ b/datahub-web-react/src/app/home/HomePageHeader.tsx @@ -1,7 +1,7 @@ import React, { useEffect, useMemo, useState } from 'react'; import { useHistory } from 'react-router'; import { Typography, Image, Row, Button, Tag } from 'antd'; -import styled, { useTheme } from 'styled-components'; +import styled, { useTheme } from 'styled-components/macro'; import { RightOutlined } from '@ant-design/icons'; import { ManageAccount } from '../shared/ManageAccount'; import { useGetAuthenticatedUser } from '../useGetAuthenticatedUser'; @@ -19,6 +19,7 @@ import { HeaderLinks } from '../shared/admin/HeaderLinks'; import { ANTD_GRAY } from '../entity/shared/constants'; import { useAppConfig } from '../useAppConfig'; import { DEFAULT_APP_CONFIG } from '../../appConfigContext'; +import { HOME_PAGE_SEARCH_BAR_ID } from '../onboarding/config/HomePageOnboardingConfig'; const Background = styled.div` width: 100%; @@ -249,7 +250,7 @@ export const HomePageHeader = () => { {!!themeConfig.content.subtitle && ( {themeConfig.content.subtitle} )} - + { // Entity Types const entityRegistry = useEntityRegistry(); const browseEntityList = entityRegistry.getBrowseEntityTypes(); - const [showGettingStartedModal, setShowGettingStartedModal] = useState(false); const userUrn = user?.urn; const showSimplifiedHomepage = user?.settings?.appearance?.showSimplifiedHomepage; @@ -123,21 +136,26 @@ export const HomePageRecommendations = ({ user }: Props) => { const recommendationModules = data?.listRecommendations?.modules; // Determine whether metadata has been ingested yet. - const hasLoadedEntityCounts = orderedEntityCounts && orderedEntityCounts.length > 0; const hasIngestedMetadata = orderedEntityCounts && orderedEntityCounts.filter((entityCount) => entityCount.count > 0).length > 0; - useEffect(() => { - if (hasLoadedEntityCounts && !hasIngestedMetadata) { - setShowGettingStartedModal(true); - } - }, [hasLoadedEntityCounts, hasIngestedMetadata]); - // we want to render the domain module first if it exists const domainRecommendationModule = recommendationModules?.find( (module) => module.renderType === RecommendationRenderType.DomainSearchList, ); + // Render domain onboarding step if the domains module exists + const hasDomains = !!domainRecommendationModule; + useUpdateEducationStepIdsAllowlist(hasDomains, HOME_PAGE_DOMAINS_ID); + + // Render platforms onboarding step if the platforms module exists + const hasPlatforms = !!recommendationModules?.some((module) => module?.moduleId === PLATFORMS_MODULE_ID); + useUpdateEducationStepIdsAllowlist(hasPlatforms, HOME_PAGE_PLATFORMS_ID); + + // Render most popular onboarding step if the most popular module exists + const hasMostPopular = !!recommendationModules?.some((module) => module?.moduleId === MOST_POPULAR_MODULE_ID); + useUpdateEducationStepIdsAllowlist(hasMostPopular, HOME_PAGE_MOST_POPULAR_ID); + return ( @@ -145,7 +163,7 @@ export const HomePageRecommendations = ({ user }: Props) => { {domainRecommendationModule && ( <> - + {domainRecommendationModule.title} { ) : ( - setShowGettingStartedModal(true)}> - Connect your data sources - )} @@ -189,7 +204,7 @@ export const HomePageRecommendations = ({ user }: Props) => { recommendationModules .filter((module) => module.renderType !== RecommendationRenderType.DomainSearchList) .map((module) => ( - + {module.title} { /> ))} - setShowGettingStartedModal(false)} visible={showGettingStartedModal} /> ); }; diff --git a/datahub-web-react/src/app/identity/group/GroupList.tsx b/datahub-web-react/src/app/identity/group/GroupList.tsx index 1fb1508fcb294..29cba3c47bba9 100644 --- a/datahub-web-react/src/app/identity/group/GroupList.tsx +++ b/datahub-web-react/src/app/identity/group/GroupList.tsx @@ -13,6 +13,8 @@ import CreateGroupModal from './CreateGroupModal'; import { SearchBar } from '../../search/SearchBar'; import { useEntityRegistry } from '../../useEntityRegistry'; import { scrollToTop } from '../../shared/searchUtils'; +import { GROUPS_CREATE_GROUP_ID, GROUPS_INTRO_ID } from '../../onboarding/config/GroupsOnboardingConfig'; +import { OnboardingTour } from '../../onboarding/OnboardingTour'; const GroupContainer = styled.div``; @@ -77,12 +79,13 @@ export const GroupList = () => { return ( <> + {!data && loading && } {error && }
-
diff --git a/datahub-web-react/src/app/identity/user/UserList.tsx b/datahub-web-react/src/app/identity/user/UserList.tsx index 4b65fe79a8a76..4d6daccbdcac9 100644 --- a/datahub-web-react/src/app/identity/user/UserList.tsx +++ b/datahub-web-react/src/app/identity/user/UserList.tsx @@ -1,6 +1,6 @@ import React, { useEffect, useState } from 'react'; import { Button, Empty, List, Pagination } from 'antd'; -import styled from 'styled-components'; +import styled from 'styled-components/macro'; import * as QueryString from 'query-string'; import { UsergroupAddOutlined } from '@ant-design/icons'; import { useLocation } from 'react-router'; @@ -15,6 +15,14 @@ import ViewInviteTokenModal from './ViewInviteTokenModal'; import { useGetAuthenticatedUser } from '../../useGetAuthenticatedUser'; import { useListRolesQuery } from '../../../graphql/role.generated'; import { scrollToTop } from '../../shared/searchUtils'; +import { OnboardingTour } from '../../onboarding/OnboardingTour'; +import { + USERS_ASSIGN_ROLE_ID, + USERS_INTRO_ID, + USERS_INVITE_LINK_ID, + USERS_SSO_ID, +} from '../../onboarding/config/UsersOnboardingConfig'; +import { useUpdateEducationStepIdsAllowlist } from '../../onboarding/useUpdateEducationStepIdsAllowlist'; const UserContainer = styled.div``; @@ -99,14 +107,22 @@ export const UserList = () => { const error = usersError || rolesError; const selectRoleOptions = rolesData?.listRoles?.roles?.map((role) => role as DataHubRole) || []; + useUpdateEducationStepIdsAllowlist(canManagePolicies, USERS_INVITE_LINK_ID); + return ( <> + {!usersData && loading && } {error && }
-
diff --git a/datahub-web-react/src/app/identity/user/UserListItem.tsx b/datahub-web-react/src/app/identity/user/UserListItem.tsx index 9579cbfca3b68..28fdf58d14e93 100644 --- a/datahub-web-react/src/app/identity/user/UserListItem.tsx +++ b/datahub-web-react/src/app/identity/user/UserListItem.tsx @@ -1,5 +1,5 @@ import React, { useState } from 'react'; -import styled from 'styled-components'; +import styled from 'styled-components/macro'; import { Dropdown, List, Menu, Tag, Tooltip, Typography } from 'antd'; import { Link } from 'react-router-dom'; import { DeleteOutlined, MoreOutlined, UnlockOutlined } from '@ant-design/icons'; @@ -10,6 +10,7 @@ import { ANTD_GRAY, REDESIGN_COLORS } from '../../entity/shared/constants'; import ViewResetTokenModal from './ViewResetTokenModal'; import useDeleteEntity from '../../entity/shared/EntityDropdown/useDeleteEntity'; import SelectRole from './SelectRole'; +import { USERS_ASSIGN_ROLE_ID } from '../../onboarding/config/UsersOnboardingConfig'; type Props = { user: CorpUser; @@ -109,7 +110,7 @@ export default function UserListItem({ user, canManageUserCredentials, selectRol - + { return ( + Manage Ingestion diff --git a/datahub-web-react/src/app/ingest/source/IngestionSourceList.tsx b/datahub-web-react/src/app/ingest/source/IngestionSourceList.tsx index 2700a53bd1917..04449177af43f 100644 --- a/datahub-web-react/src/app/ingest/source/IngestionSourceList.tsx +++ b/datahub-web-react/src/app/ingest/source/IngestionSourceList.tsx @@ -14,7 +14,7 @@ import { import { Message } from '../../shared/Message'; import TabToolbar from '../../entity/shared/components/styled/TabToolbar'; import { IngestionSourceBuilderModal } from './builder/IngestionSourceBuilderModal'; -import { CLI_EXECUTOR_ID } from './utils'; +import { addToListIngestionSourcesCache, CLI_EXECUTOR_ID, removeFromListIngestionSourcesCache } from './utils'; import { DEFAULT_EXECUTOR_ID, SourceBuilderState } from './builder/types'; import { IngestionSource, UpdateIngestionSourceInput } from '../../../types.generated'; import { SearchBar } from '../../search/SearchBar'; @@ -26,6 +26,12 @@ import { scrollToTop } from '../../shared/searchUtils'; import useRefreshIngestionData from './executions/useRefreshIngestionData'; import { isExecutionRequestActive } from './executions/IngestionSourceExecutionList'; import analytics, { EventType } from '../../analytics'; +import { + INGESTION_CREATE_SOURCE_ID, + INGESTION_REFRESH_SOURCES_ID, +} from '../../onboarding/config/IngestionOnboardingConfig'; + +const PLACEHOLDER_URN = 'placeholder-urn'; const SourceContainer = styled.div``; @@ -96,7 +102,7 @@ export const IngestionSourceList = () => { const [sourceFilter, setSourceFilter] = useState(IngestionSourceType.ALL); // Ingestion Source Queries - const { loading, error, data, refetch } = useListIngestionSourcesQuery({ + const { loading, error, data, client, refetch } = useListIngestionSourcesQuery({ variables: { input: { start, @@ -104,6 +110,7 @@ export const IngestionSourceList = () => { query, }, }, + fetchPolicy: 'cache-first', }); const [createIngestionSource] = useCreateIngestionSourceMutation(); const [updateIngestionSource] = useUpdateIngestionSourceMutation(); @@ -197,6 +204,19 @@ export const IngestionSourceList = () => { }); } else { // Create + const newSource = { + urn: PLACEHOLDER_URN, + name: input.name, + type: input.type, + config: null, + schedule: { + interval: input.schedule?.interval || null, + timezone: input.schedule?.timezone || null, + }, + platform: null, + executions: null, + }; + addToListIngestionSourcesCache(client, newSource, pageSize, query); createIngestionSource({ variables: { input } }) .then((result) => { message.loading({ content: 'Loading...', duration: 2 }); @@ -218,7 +238,6 @@ export const IngestionSourceList = () => { setIsBuildingSource(false); setFocusSourceUrn(undefined); resetState(); - // onCreateOrUpdateIngestionSourceSuccess(); }) .catch((e) => { message.destroy(); @@ -226,6 +245,7 @@ export const IngestionSourceList = () => { content: `Failed to create ingestion source!: \n ${e.message || ''}`, duration: 3, }); + removeFromListIngestionSourcesCache(client, PLACEHOLDER_URN, page, pageSize, query); }); } }; @@ -236,6 +256,7 @@ export const IngestionSourceList = () => { }; const deleteIngestionSource = async (urn: string) => { + removeFromListIngestionSourcesCache(client, urn, page, pageSize, query); removeIngestionSourceMutation({ variables: { urn }, }) @@ -338,10 +359,10 @@ export const IngestionSourceList = () => {
- -
diff --git a/datahub-web-react/src/app/ingest/source/IngestionSourceTable.tsx b/datahub-web-react/src/app/ingest/source/IngestionSourceTable.tsx index bfafb81538bbb..9c2c9f13f73a5 100644 --- a/datahub-web-react/src/app/ingest/source/IngestionSourceTable.tsx +++ b/datahub-web-react/src/app/ingest/source/IngestionSourceTable.tsx @@ -122,7 +122,7 @@ function IngestionSourceTable({ source.executions?.total && source.executions?.total > 0 && source.executions?.executionRequests[0].result?.status, - cliIngestion: source.config.executorId === CLI_EXECUTOR_ID, + cliIngestion: source.config?.executorId === CLI_EXECUTOR_ID, })); return ( diff --git a/datahub-web-react/src/app/ingest/source/builder/LookerWarning.tsx b/datahub-web-react/src/app/ingest/source/builder/LookerWarning.tsx index 9a47076857f90..ea1490608168d 100644 --- a/datahub-web-react/src/app/ingest/source/builder/LookerWarning.tsx +++ b/datahub-web-react/src/app/ingest/source/builder/LookerWarning.tsx @@ -14,13 +14,13 @@ export const LookerWarning = ({ type }: Props) => { if (type === LOOKER) { link = ( - DataHub lookml module + DataHub LookML Ingestion Source ); } else if (type === LOOK_ML) { link = ( - DataHub looker module + DataHub Looker Ingestion Source ); } @@ -32,8 +32,8 @@ export const LookerWarning = ({ type }: Props) => { banner message={ <> - To get complete Looker metadata integration (including Looker views and lineage to the underlying - warehouse tables), you must also use the {link}. + To complete the Looker integration (including Looker views and lineage to the underlying warehouse + tables), you must also use the {link}. } /> diff --git a/datahub-web-react/src/app/ingest/source/builder/RecipeForm/FormField.tsx b/datahub-web-react/src/app/ingest/source/builder/RecipeForm/FormField.tsx index 40c12464d5391..47092b078b3e9 100644 --- a/datahub-web-react/src/app/ingest/source/builder/RecipeForm/FormField.tsx +++ b/datahub-web-react/src/app/ingest/source/builder/RecipeForm/FormField.tsx @@ -1,5 +1,5 @@ import React from 'react'; -import { Checkbox, DatePicker, Form, Input, Select, Tooltip, FormInstance } from 'antd'; +import { Checkbox, DatePicker, Form, Input, Select, Tooltip } from 'antd'; import styled from 'styled-components/macro'; import Button from 'antd/lib/button'; import { MinusCircleOutlined, PlusOutlined } from '@ant-design/icons'; @@ -94,11 +94,11 @@ interface Props { secrets: Secret[]; refetchSecrets: () => void; removeMargin?: boolean; - form: FormInstance; + updateFormValue: (field, value) => void; } function FormField(props: Props) { - const { field, secrets, refetchSecrets, removeMargin, form } = props; + const { field, secrets, refetchSecrets, removeMargin, updateFormValue } = props; if (field.type === FieldType.LIST) return ; @@ -113,7 +113,7 @@ function FormField(props: Props) { secrets={secrets} removeMargin={removeMargin} refetchSecrets={refetchSecrets} - form={form} + updateFormValue={updateFormValue} /> ); diff --git a/datahub-web-react/src/app/ingest/source/builder/RecipeForm/RecipeForm.tsx b/datahub-web-react/src/app/ingest/source/builder/RecipeForm/RecipeForm.tsx index 86a7893a9f902..98763965503b1 100644 --- a/datahub-web-react/src/app/ingest/source/builder/RecipeForm/RecipeForm.tsx +++ b/datahub-web-react/src/app/ingest/source/builder/RecipeForm/RecipeForm.tsx @@ -60,8 +60,9 @@ function getInitialValues(displayRecipe: string, allFields: any[]) { } if (recipeObj) { allFields.forEach((field) => { - initialValues[field.name] = - field.getValueFromRecipeOverride?.(recipeObj) || get(recipeObj, field.fieldPath); + initialValues[field.name] = field.getValueFromRecipeOverride + ? field.getValueFromRecipeOverride(recipeObj) + : get(recipeObj, field.fieldPath); }); } @@ -123,9 +124,9 @@ function RecipeForm(props: Props) { Object.keys(changedValues).forEach((fieldName) => { const recipeField = allFields.find((f) => f.name === fieldName); if (recipeField) { - updatedValues = - recipeField.setValueOnRecipeOverride?.(updatedValues, allValues[fieldName]) || - setFieldValueOnRecipe(updatedValues, allValues[fieldName], recipeField.fieldPath); + updatedValues = recipeField.setValueOnRecipeOverride + ? recipeField.setValueOnRecipeOverride(updatedValues, allValues[fieldName]) + : setFieldValueOnRecipe(updatedValues, allValues[fieldName], recipeField.fieldPath); } }); @@ -133,6 +134,11 @@ function RecipeForm(props: Props) { setStagedRecipe(stagedRecipe); } + function updateFormValue(fieldName, fieldValue) { + updateFormValues({ [fieldName]: fieldValue }, { [fieldName]: fieldValue }); + form.setFieldsValue({ [fieldName]: fieldValue }); + } + return (
))} {CONNECTORS_WITH_TEST_CONNECTION.has(type as string) && ( @@ -187,7 +193,7 @@ function RecipeForm(props: Props) { secrets={secrets} refetchSecrets={refetchSecrets} removeMargin={i === filterFields.length - 1} - form={form} + updateFormValue={updateFormValue} /> @@ -213,7 +219,7 @@ function RecipeForm(props: Props) { secrets={secrets} refetchSecrets={refetchSecrets} removeMargin={i === advancedFields.length - 1} - form={form} + updateFormValue={updateFormValue} /> ))} diff --git a/datahub-web-react/src/app/ingest/source/builder/RecipeForm/SecretField/CreateSecretButton.tsx b/datahub-web-react/src/app/ingest/source/builder/RecipeForm/SecretField/CreateSecretButton.tsx index 31024512cbbcc..8561805e6270c 100644 --- a/datahub-web-react/src/app/ingest/source/builder/RecipeForm/SecretField/CreateSecretButton.tsx +++ b/datahub-web-react/src/app/ingest/source/builder/RecipeForm/SecretField/CreateSecretButton.tsx @@ -24,11 +24,12 @@ const CreateButton = styled(Button)` `; interface Props { + initialState?: SecretBuilderState; onSubmit?: (state: SecretBuilderState) => void; refetchSecrets: () => void; } -function CreateSecretButton({ onSubmit, refetchSecrets }: Props) { +function CreateSecretButton({ initialState, onSubmit, refetchSecrets }: Props) { const [isCreateModalVisible, setIsCreateModalVisible] = useState(false); const [createSecretMutation] = useCreateSecretMutation(); @@ -62,6 +63,7 @@ function CreateSecretButton({ onSubmit, refetchSecrets }: Props) { {isCreateModalVisible && ( setIsCreateModalVisible(false)} onSubmit={createSecret} diff --git a/datahub-web-react/src/app/ingest/source/builder/RecipeForm/SecretField/SecretField.tsx b/datahub-web-react/src/app/ingest/source/builder/RecipeForm/SecretField/SecretField.tsx index 08213f891205a..00de5b3ca0ac0 100644 --- a/datahub-web-react/src/app/ingest/source/builder/RecipeForm/SecretField/SecretField.tsx +++ b/datahub-web-react/src/app/ingest/source/builder/RecipeForm/SecretField/SecretField.tsx @@ -1,5 +1,5 @@ import React, { ReactNode } from 'react'; -import { AutoComplete, Divider, Form, FormInstance } from 'antd'; +import { AutoComplete, Divider, Form } from 'antd'; import styled from 'styled-components/macro'; import { Secret } from '../../../../../../types.generated'; import CreateSecretButton from './CreateSecretButton'; @@ -52,7 +52,7 @@ interface SecretFieldProps { secrets: Secret[]; removeMargin?: boolean; refetchSecrets: () => void; - form: FormInstance; + updateFormValue: (field, value) => void; } function SecretFieldTooltip({ tooltipLabel }: { tooltipLabel?: string | ReactNode }) { @@ -84,7 +84,7 @@ const encodeSecret = (secretName: string) => { return `\${${secretName}}`; }; -function SecretField({ field, secrets, removeMargin, form, refetchSecrets }: SecretFieldProps) { +function SecretField({ field, secrets, removeMargin, updateFormValue, refetchSecrets }: SecretFieldProps) { const options = secrets.map((secret) => ({ value: encodeSecret(secret.name), label: secret.name })); return ( @@ -108,9 +108,7 @@ function SecretField({ field, secrets, removeMargin, form, refetchSecrets }: Sec {menu} - form.setFields([{ name: field.name, value: encodeSecret(state.name as string) }]) - } + onSubmit={(state) => updateFormValue(field.name, encodeSecret(state.name as string))} refetchSecrets={refetchSecrets} /> diff --git a/datahub-web-react/src/app/ingest/source/builder/RecipeForm/bigquery.ts b/datahub-web-react/src/app/ingest/source/builder/RecipeForm/bigquery.ts index 19811034a1f52..59764ffdd03b4 100644 --- a/datahub-web-react/src/app/ingest/source/builder/RecipeForm/bigquery.ts +++ b/datahub-web-react/src/app/ingest/source/builder/RecipeForm/bigquery.ts @@ -1,19 +1,20 @@ -import { RecipeField, FieldType, setListValuesOnRecipe } from './common'; +import { RecipeField, FieldType } from './common'; export const BIGQUERY_PROJECT_ID: RecipeField = { name: 'project_id', - label: 'BigQuery Project ID', + label: 'Project ID', tooltip: 'Project ID where you have rights to run queries and create tables.', placeholder: 'my-project-123', type: FieldType.TEXT, fieldPath: 'source.config.project_id', rules: null, + required: true, }; export const BIGQUERY_CREDENTIAL_PROJECT_ID: RecipeField = { name: 'credential.project_id', label: 'Credentials Project ID', - tooltip: 'Project id to set the credentials.', + tooltip: "The Project ID, which can be found in your service account's JSON Key (project_id)", placeholder: 'my-project-123', type: FieldType.TEXT, fieldPath: 'source.config.credential.project_id', @@ -23,129 +24,43 @@ export const BIGQUERY_CREDENTIAL_PROJECT_ID: RecipeField = { export const BIGQUERY_PRIVATE_KEY_ID: RecipeField = { name: 'credential.private_key_id', label: 'Private Key Id', - tooltip: 'Private key id.', - placeholder: 'BQ_PRIVATE_KEY_ID', + tooltip: "The Private Key id, which can be found in your service account's JSON Key (private_key_id)", type: FieldType.SECRET, fieldPath: 'source.config.credential.private_key_id', + placeholder: 'd0121d0000882411234e11166c6aaa23ed5d74e0', rules: null, + required: true, }; export const BIGQUERY_PRIVATE_KEY: RecipeField = { name: 'credential.private_key', label: 'Private Key', - placeholder: 'BQ_PRIVATE_KEY', - tooltip: 'Private key in a form of "-----BEGIN PRIVATE KEY-----\nprivate-key\n-----END PRIVATE KEY-----\n".', + tooltip: "The Private key, which can be found in your service account's JSON Key (private_key).", + placeholder: '-----BEGIN PRIVATE KEY-----....\n-----END PRIVATE KEY-----', type: FieldType.SECRET, fieldPath: 'source.config.credential.private_key', rules: null, + required: true, }; export const BIGQUERY_CLIENT_EMAIL: RecipeField = { name: 'credential.client_email', label: 'Client Email', - tooltip: 'Client email.', + tooltip: "The Client Email, which can be found in your service account's JSON Key (client_email).", placeholder: 'client_email@gmail.com', type: FieldType.TEXT, fieldPath: 'source.config.credential.client_email', rules: null, + required: true, }; export const BIGQUERY_CLIENT_ID: RecipeField = { name: 'credential.client_id', label: 'Client ID', - tooltip: 'Client ID.', + tooltip: "The Client ID, which can be found in your service account's JSON Key (client_id).", placeholder: '123456789098765432101', type: FieldType.TEXT, fieldPath: 'source.config.credential.client_id', rules: null, -}; - -const schemaAllowFieldPath = 'source.config.schema_pattern.allow'; -export const BIGQUERY_SCHEMA_ALLOW: RecipeField = { - name: 'schema_pattern.allow', - label: 'Allow Patterns', - tooltip: 'Use regex here.', - placeholder: '^my_schema$', - type: FieldType.LIST, - buttonLabel: 'Add pattern', - fieldPath: schemaAllowFieldPath, - rules: null, - section: 'Schemas', - setValueOnRecipeOverride: (recipe: any, values: string[]) => - setListValuesOnRecipe(recipe, values, schemaAllowFieldPath), -}; - -const schemaDenyFieldPath = 'source.config.schema_pattern.deny'; -export const BIGQUERY_SCHEMA_DENY: RecipeField = { - name: 'schema_pattern.deny', - label: 'Deny Patterns', - tooltip: 'Use regex here.', - placeholder: '^my_schema$', - type: FieldType.LIST, - buttonLabel: 'Add pattern', - fieldPath: schemaDenyFieldPath, - rules: null, - section: 'Schemas', - setValueOnRecipeOverride: (recipe: any, values: string[]) => - setListValuesOnRecipe(recipe, values, schemaDenyFieldPath), -}; - -const tableAllowFieldPath = 'source.config.table_pattern.allow'; -export const BIGQUERY_TABLE_ALLOW: RecipeField = { - name: 'table_pattern.allow', - label: 'Allow Patterns', - tooltip: 'Use regex here.', - placeholder: '^my_schema\\.table_name$', - type: FieldType.LIST, - buttonLabel: 'Add pattern', - fieldPath: tableAllowFieldPath, - rules: null, - section: 'Tables', - setValueOnRecipeOverride: (recipe: any, values: string[]) => - setListValuesOnRecipe(recipe, values, tableAllowFieldPath), -}; - -const tableDenyFieldPath = 'source.config.table_pattern.deny'; -export const BIGQUERY_TABLE_DENY: RecipeField = { - name: 'table_pattern.deny', - label: 'Deny Patterns', - tooltip: 'Use regex here.', - placeholder: '^my_schema\\.table_name$', - type: FieldType.LIST, - buttonLabel: 'Add pattern', - fieldPath: tableDenyFieldPath, - rules: null, - section: 'Tables', - setValueOnRecipeOverride: (recipe: any, values: string[]) => - setListValuesOnRecipe(recipe, values, tableDenyFieldPath), -}; - -const viewAllowFieldPath = 'source.config.view_pattern.allow'; -export const BIGQUERY_VIEW_ALLOW: RecipeField = { - name: 'view_pattern.allow', - label: 'Allow Patterns', - tooltip: 'Use regex here.', - placeholder: '^my_schema\\.view_name$', - type: FieldType.LIST, - buttonLabel: 'Add pattern', - fieldPath: viewAllowFieldPath, - rules: null, - section: 'Views', - setValueOnRecipeOverride: (recipe: any, values: string[]) => - setListValuesOnRecipe(recipe, values, viewAllowFieldPath), -}; - -const viewDenyFieldPath = 'source.config.view_pattern.deny'; -export const BIGQUERY_VIEW_DENY: RecipeField = { - name: 'view_pattern.deny', - label: 'Deny Patterns', - tooltip: 'Use regex here.', - placeholder: '^my_schema\\.view_name$', - type: FieldType.LIST, - buttonLabel: 'Add pattern', - fieldPath: viewDenyFieldPath, - rules: null, - section: 'Views', - setValueOnRecipeOverride: (recipe: any, values: string[]) => - setListValuesOnRecipe(recipe, values, viewDenyFieldPath), + required: true, }; diff --git a/datahub-web-react/src/app/ingest/source/builder/RecipeForm/bigqueryBeta.ts b/datahub-web-react/src/app/ingest/source/builder/RecipeForm/bigqueryBeta.ts index 24e2393830c37..03a9ba08f358f 100644 --- a/datahub-web-react/src/app/ingest/source/builder/RecipeForm/bigqueryBeta.ts +++ b/datahub-web-react/src/app/ingest/source/builder/RecipeForm/bigqueryBeta.ts @@ -3,11 +3,12 @@ import { FieldType, RecipeField, setListValuesOnRecipe } from './common'; export const BIGQUERY_BETA_PROJECT_ID: RecipeField = { name: 'credential.project_id', label: 'Project ID', - tooltip: 'Project id to set the credentials.', + tooltip: "The Project ID, which can be found in your service account's JSON Key (project_id)", placeholder: 'my-project-123', type: FieldType.TEXT, fieldPath: 'source.config.credential.project_id', rules: null, + required: true, }; const projectIdAllowFieldPath = 'source.config.project_id_pattern.allow'; diff --git a/datahub-web-react/src/app/ingest/source/builder/RecipeForm/common.tsx b/datahub-web-react/src/app/ingest/source/builder/RecipeForm/common.tsx index 359ca217c9f5d..4af4dba01ac14 100644 --- a/datahub-web-react/src/app/ingest/source/builder/RecipeForm/common.tsx +++ b/datahub-web-react/src/app/ingest/source/builder/RecipeForm/common.tsx @@ -97,8 +97,9 @@ const databaseAllowFieldPath = 'source.config.database_pattern.allow'; export const DATABASE_ALLOW: RecipeField = { name: 'database_pattern.allow', label: 'Allow Patterns', - tooltip: 'Use regex here.', - placeholder: '^my_db$', + tooltip: + 'Only include specific Databases by providing the name of a Database, or a Regular Expression (REGEX). If not provided, all Databases will be included.', + placeholder: 'database_name', type: FieldType.LIST, buttonLabel: 'Add pattern', fieldPath: databaseAllowFieldPath, @@ -112,8 +113,9 @@ const databaseDenyFieldPath = 'source.config.database_pattern.deny'; export const DATABASE_DENY: RecipeField = { name: 'database_pattern.deny', label: 'Deny Patterns', - tooltip: 'Use regex here.', - placeholder: '^my_db$', + tooltip: + 'Exclude specific Databases by providing the name of a Database, or a Regular Expression (REGEX). If not provided, all Databases will be included. Deny patterns always take precedence over Allow patterns.', + placeholder: 'database_name', type: FieldType.LIST, buttonLabel: 'Add pattern', fieldPath: databaseDenyFieldPath, @@ -127,12 +129,14 @@ const dashboardAllowFieldPath = 'source.config.dashboard_pattern.allow'; export const DASHBOARD_ALLOW: RecipeField = { name: 'dashboard_pattern.allow', label: 'Allow Patterns', - tooltip: 'Use regex here.', + tooltip: + 'Only include specific Dashboards by providing the name of a Dashboard, or a Regular Expression (REGEX). If not provided, all Dashboards will be included.', type: FieldType.LIST, buttonLabel: 'Add pattern', fieldPath: dashboardAllowFieldPath, rules: null, section: 'Dashboards', + placeholder: 'my_dashboard', setValueOnRecipeOverride: (recipe: any, values: string[]) => setListValuesOnRecipe(recipe, values, dashboardAllowFieldPath), }; @@ -141,12 +145,14 @@ const dashboardDenyFieldPath = 'source.config.dashboard_pattern.deny'; export const DASHBOARD_DENY: RecipeField = { name: 'dashboard_pattern.deny', label: 'Deny Patterns', - tooltip: 'Use regex here.', + tooltip: + 'Exclude specific Dashboards by providing the name of a Dashboard, or a Regular Expression (REGEX). If not provided, all Dashboards will be included. Deny patterns always take precendence over Allow patterns.', type: FieldType.LIST, buttonLabel: 'Add pattern', fieldPath: dashboardDenyFieldPath, rules: null, section: 'Dashboards', + placeholder: 'my_dashboard', setValueOnRecipeOverride: (recipe: any, values: string[]) => setListValuesOnRecipe(recipe, values, dashboardDenyFieldPath), }; @@ -155,7 +161,10 @@ const schemaAllowFieldPath = 'source.config.schema_pattern.allow'; export const SCHEMA_ALLOW: RecipeField = { name: 'schema_pattern.allow', label: 'Allow Patterns', - tooltip: 'Use regex here.', + // TODO: Change this to FULLY qualified names once the allow / deny consistency track is completed. + tooltip: + 'Only include specific Schemas by providing the name of a Schema, or a Regular Expression (REGEX) to include specific Schemas. If not provided, all Schemas inside allowed Databases will be included.', + placeholder: 'company_schema', type: FieldType.LIST, buttonLabel: 'Add pattern', fieldPath: schemaAllowFieldPath, @@ -169,7 +178,9 @@ const schemaDenyFieldPath = 'source.config.schema_pattern.deny'; export const SCHEMA_DENY: RecipeField = { name: 'schema_pattern.deny', label: 'Deny Patterns', - tooltip: 'Use regex here.', + tooltip: + 'Exclude specific Schemas by providing the name of a Schema, or a Regular Expression (REGEX). If not provided, all Schemas inside allowed Databases will be included. Deny patterns always take precedence over Allow patterns.', + placeholder: 'company_schema', type: FieldType.LIST, buttonLabel: 'Add pattern', fieldPath: schemaDenyFieldPath, @@ -179,60 +190,68 @@ export const SCHEMA_DENY: RecipeField = { setListValuesOnRecipe(recipe, values, schemaDenyFieldPath), }; -const viewAllowFieldPath = 'source.config.view_pattern.allow'; -export const VIEW_ALLOW: RecipeField = { - name: 'view_pattern.allow', +const tableAllowFieldPath = 'source.config.table_pattern.allow'; +export const TABLE_ALLOW: RecipeField = { + name: 'table_pattern.allow', label: 'Allow Patterns', - tooltip: 'Use regex here.', + tooltip: + 'Only include Tables with particular names by providing the fully qualified name of a Table, or a Regular Expression (REGEX). If not provided, all Tables inside allowed Databases and Schemas will be included in ingestion.', + placeholder: 'database_name.company_schema.table_name', type: FieldType.LIST, buttonLabel: 'Add pattern', - fieldPath: viewAllowFieldPath, + fieldPath: tableAllowFieldPath, rules: null, - section: 'Views', + section: 'Tables', setValueOnRecipeOverride: (recipe: any, values: string[]) => - setListValuesOnRecipe(recipe, values, viewAllowFieldPath), + setListValuesOnRecipe(recipe, values, tableAllowFieldPath), }; -const viewDenyFieldPath = 'source.config.view_pattern.deny'; -export const VIEW_DENY: RecipeField = { - name: 'view_pattern.deny', +const tableDenyFieldPath = 'source.config.table_pattern.deny'; +export const TABLE_DENY: RecipeField = { + name: 'table_pattern.deny', label: 'Deny Patterns', - tooltip: 'Use regex here.', + tooltip: + 'Exclude Tables with particular names by providing the fully qualified name of a Table, or a Regular Expression (REGEX). If not provided, all Tables inside allowed Databases and Schemas will be included in ingestion. Deny patterns always take precedence over Allow patterns.', + placeholder: 'database_name.company_schema.table_name', type: FieldType.LIST, buttonLabel: 'Add pattern', - fieldPath: viewDenyFieldPath, + fieldPath: tableDenyFieldPath, rules: null, - section: 'Views', + section: 'Tables', setValueOnRecipeOverride: (recipe: any, values: string[]) => - setListValuesOnRecipe(recipe, values, viewDenyFieldPath), + setListValuesOnRecipe(recipe, values, tableDenyFieldPath), }; -const tableAllowFieldPath = 'source.config.table_pattern.allow'; -export const TABLE_ALLOW: RecipeField = { - name: 'table_pattern.allow', +const viewAllowFieldPath = 'source.config.view_pattern.allow'; +export const VIEW_ALLOW: RecipeField = { + name: 'view_pattern.allow', label: 'Allow Patterns', - tooltip: 'Use regex here.', + tooltip: + 'Only include Views with particular names by providing the fully qualified name of a View, or a Regular Expression (REGEX). If not provided, all Views inside allowed Databases and Schemas will be included in ingestion.', + placeholder: 'database_name.company_schema.view_name', type: FieldType.LIST, buttonLabel: 'Add pattern', - fieldPath: tableAllowFieldPath, + fieldPath: viewAllowFieldPath, rules: null, - section: 'Tables', + section: 'Views', setValueOnRecipeOverride: (recipe: any, values: string[]) => - setListValuesOnRecipe(recipe, values, tableAllowFieldPath), + setListValuesOnRecipe(recipe, values, viewAllowFieldPath), }; -const tableDenyFieldPath = 'source.config.table_pattern.deny'; -export const TABLE_DENY: RecipeField = { - name: 'table_pattern.deny', +const viewDenyFieldPath = 'source.config.view_pattern.deny'; +export const VIEW_DENY: RecipeField = { + name: 'view_pattern.deny', label: 'Deny Patterns', - tooltip: 'Use regex here.', + tooltip: + 'Exclude Views with particular names by providing the fully qualified name of a View, or a Regular Expression (REGEX). If not provided, all Views inside allowed Databases and Schemas will be included in ingestion. Deny patterns always take precedence over Allow patterns.', + placeholder: 'database_name.company_schema.view_name', type: FieldType.LIST, buttonLabel: 'Add pattern', - fieldPath: tableDenyFieldPath, + fieldPath: viewDenyFieldPath, rules: null, - section: 'Tables', + section: 'Views', setValueOnRecipeOverride: (recipe: any, values: string[]) => - setListValuesOnRecipe(recipe, values, tableDenyFieldPath), + setListValuesOnRecipe(recipe, values, viewDenyFieldPath), }; /* ---------------------------------------------------- Advance Section ---------------------------------------------------- */ @@ -257,21 +276,40 @@ export const INCLUDE_LINEAGE: RecipeField = { export const INCLUDE_TABLE_LINEAGE: RecipeField = { name: 'include_table_lineage', label: 'Include Table Lineage', - tooltip: 'Whether or not table lineage should be ingested.', + tooltip: 'Extract Tabel-Level lineage metadata. Enabling this may increase the duration of the extraction process.', type: FieldType.BOOLEAN, fieldPath: 'source.config.include_table_lineage', rules: null, }; -export const PROFILING_ENABLED: RecipeField = { +const isProfilingEnabledFieldPath = 'source.config.profiling.enabled'; +export const TABLE_PROFILING_ENABLED: RecipeField = { name: 'profiling.enabled', - label: 'Enable Profiling', - tooltip: 'Whether profiling should be performed on the assets extracted from the ingestion source.', + label: 'Enable Table Profiling', + tooltip: + 'Generate Data Profiles for extracted Tables. Enabling this may increase the duration of the extraction process.', type: FieldType.BOOLEAN, - fieldPath: 'source.config.profiling.enabled', + fieldPath: isProfilingEnabledFieldPath, rules: null, }; +const isTableProfilingOnlyFieldPath = 'source.config.profiling.profile_table_level_only'; +export const COLUMN_PROFILING_ENABLED: RecipeField = { + name: 'column_profiling.enabled', + label: 'Enable Column Profiling', + tooltip: + 'Generate Data Profiles for the Columns in extracted Tables. Enabling this may increase the duration of the extraction process.', + type: FieldType.BOOLEAN, + fieldPath: isTableProfilingOnlyFieldPath, + rules: null, + getValueFromRecipeOverride: (recipe: any) => { + return get(recipe, isProfilingEnabledFieldPath) && !get(recipe, isTableProfilingOnlyFieldPath); + }, + setValueOnRecipeOverride: (recipe: any, value: boolean) => { + return setFieldValueOnRecipe(recipe, !value, isTableProfilingOnlyFieldPath); + }, +}; + export const STATEFUL_INGESTION_ENABLED: RecipeField = { name: 'stateful_ingestion.enabled', label: 'Enable Stateful Ingestion', @@ -336,6 +374,24 @@ export const INGEST_OWNER: RecipeField = { rules: null, }; +export const INCLUDE_TABLES: RecipeField = { + name: 'include_tables', + label: 'Include Tables', + tooltip: 'Extract Tables from source.', + type: FieldType.BOOLEAN, + fieldPath: 'source.config.include_tables', + rules: null, +}; + +export const INCLUDE_VIEWS: RecipeField = { + name: 'include_views', + label: 'Include Views', + tooltip: 'Extract Views from source.', + type: FieldType.BOOLEAN, + fieldPath: 'source.config.include_views', + rules: null, +}; + export const GITHUB_INFO_REPO: RecipeField = { name: 'github_info.repo', label: 'GitHub Repo', @@ -390,7 +446,7 @@ export const START_TIME: RecipeField = { name: 'start_time', label: 'Start Time', tooltip: - 'Earliest date used when processing audit logs for lineage, usage, and more. Default: Last full day in UTC or last time DataHub ingested usage (if stateful ingestion is enabled). Tip: Set this to an older date (e.g. 1 month ago) to bootstrap your first ingestion run, and then reduce for subsequent runs.', + 'Earliest date used when processing audit logs for lineage, usage, and more. Default: Last full day in UTC or last time DataHub ingested usage (if stateful ingestion is enabled). Tip: Set this to an older date (e.g. 1 month ago) to bootstrap your first ingestion run, and then reduce for subsequent runs. Changing this may increase the duration of the extraction process.', placeholder: 'Select date and time', type: FieldType.DATE, fieldPath: startTimeFieldPath, diff --git a/datahub-web-react/src/app/ingest/source/builder/RecipeForm/constants.ts b/datahub-web-react/src/app/ingest/source/builder/RecipeForm/constants.ts index c2758310357d9..fa4cf551ba2eb 100644 --- a/datahub-web-react/src/app/ingest/source/builder/RecipeForm/constants.ts +++ b/datahub-web-react/src/app/ingest/source/builder/RecipeForm/constants.ts @@ -6,22 +6,28 @@ import { TABLEAU } from '../../conf/tableau/tableau'; import { KAFKA } from '../../conf/kafka/kafka'; import { INCLUDE_LINEAGE, - PROFILING_ENABLED, + TABLE_PROFILING_ENABLED, STATEFUL_INGESTION_ENABLED, + INCLUDE_TABLES, + INCLUDE_VIEWS, DATABASE_ALLOW, DATABASE_DENY, TABLE_LINEAGE_MODE, INGEST_TAGS, INGEST_OWNER, - DASHBOARD_ALLOW, - DASHBOARD_DENY, - GITHUB_INFO_REPO, EXTRACT_USAGE_HISTORY, EXTRACT_OWNERS, SKIP_PERSONAL_FOLDERS, RecipeField, START_TIME, INCLUDE_TABLE_LINEAGE, + TABLE_DENY, + VIEW_DENY, + VIEW_ALLOW, + TABLE_ALLOW, + SCHEMA_DENY, + SCHEMA_ALLOW, + COLUMN_PROFILING_ENABLED, } from './common'; import { SNOWFLAKE_ACCOUNT_ID, @@ -29,37 +35,27 @@ import { SNOWFLAKE_USERNAME, SNOWFLAKE_PASSWORD, SNOWFLAKE_ROLE, - SNOWFLAKE_SCHEMA_ALLOW, - SNOWFLAKE_SCHEMA_DENY, - SNOWFLAKE_TABLE_ALLOW, - SNOWFLAKE_TABLE_DENY, - SNOWFLAKE_VIEW_ALLOW, - SNOWFLAKE_VIEW_DENY, } from './snowflake'; +import { BIGQUERY_PRIVATE_KEY, BIGQUERY_PRIVATE_KEY_ID, BIGQUERY_CLIENT_EMAIL, BIGQUERY_CLIENT_ID } from './bigquery'; +import { REDSHIFT_HOST_PORT, REDSHIFT_DATABASE, REDSHIFT_USERNAME, REDSHIFT_PASSWORD } from './redshift'; import { - BIGQUERY_PRIVATE_KEY, - BIGQUERY_PRIVATE_KEY_ID, - BIGQUERY_CLIENT_EMAIL, - BIGQUERY_CLIENT_ID, - BIGQUERY_TABLE_ALLOW, - BIGQUERY_TABLE_DENY, - BIGQUERY_VIEW_ALLOW, - BIGQUERY_VIEW_DENY, -} from './bigquery'; + TABLEAU_CONNECTION_URI, + TABLEAU_PROJECT, + TABLEAU_SITE, + TABLEAU_USERNAME, + TABLEAU_PASSWORD, + TABLEAU_TOKEN_NAME, + TABLEAU_TOKEN_VALUE, +} from './tableau'; import { - REDSHIFT_HOST_PORT, - REDSHIFT_DATABASE, - REDSHIFT_USERNAME, - REDSHIFT_PASSWORD, - REDSHIFT_SCHEMA_ALLOW, - REDSHIFT_SCHEMA_DENY, - REDSHIFT_TABLE_ALLOW, - REDSHIFT_TABLE_DENY, - REDSHIFT_VIEW_ALLOW, - REDSHIFT_VIEW_DENY, -} from './redshift'; -import { TABLEAU_CONNECTION_URI, TABLEAU_PROJECT, TABLEAU_SITE, TABLEAU_USERNAME, TABLEAU_PASSWORD } from './tableau'; -import { CHART_ALLOW, CHART_DENY, LOOKER_BASE_URL, LOOKER_CLIENT_ID, LOOKER_CLIENT_SECRET } from './looker'; + CHART_ALLOW, + CHART_DENY, + DASHBOARD_ALLOW as LOOKER_DASHBOARD_ALLOW, + DASHBOARD_DENY as LOOKER_DASHBOARD_DENY, + LOOKER_BASE_URL, + LOOKER_CLIENT_ID, + LOOKER_CLIENT_SECRET, +} from './looker'; import { KAFKA_SASL_USERNAME, KAFKA_SASL_PASSWORD, @@ -86,8 +82,13 @@ import { PARSE_TABLE_NAMES_FROM_SQL, PROJECT_NAME, } from './lookml'; -import { BIGQUERY_BETA } from '../constants'; +import { PRESTO, PRESTO_HOST_PORT, PRESTO_DATABASE, PRESTO_USERNAME, PRESTO_PASSWORD } from './presto'; +import { BIGQUERY_BETA, MYSQL } from '../constants'; import { BIGQUERY_BETA_PROJECT_ID, DATASET_ALLOW, DATASET_DENY, PROJECT_ALLOW, PROJECT_DENY } from './bigqueryBeta'; +import { MYSQL_HOST_PORT, MYSQL_PASSWORD, MYSQL_USERNAME } from './mysql'; +import { MSSQL, MSSQL_DATABASE, MSSQL_HOST_PORT, MSSQL_PASSWORD, MSSQL_USERNAME } from './mssql'; +import { TRINO, TRINO_DATABASE, TRINO_HOST_PORT, TRINO_PASSWORD, TRINO_USERNAME } from './trino'; +import { MARIADB, MARIADB_DATABASE, MARIADB_HOST_PORT, MARIADB_PASSWORD, MARIADB_USERNAME } from './mariadb'; export enum RecipeSections { Connection = 0, @@ -110,19 +111,25 @@ interface RecipeFields { export const RECIPE_FIELDS: RecipeFields = { [SNOWFLAKE]: { fields: [SNOWFLAKE_ACCOUNT_ID, SNOWFLAKE_WAREHOUSE, SNOWFLAKE_USERNAME, SNOWFLAKE_PASSWORD, SNOWFLAKE_ROLE], - advancedFields: [INCLUDE_LINEAGE, PROFILING_ENABLED, STATEFUL_INGESTION_ENABLED], + advancedFields: [ + INCLUDE_TABLES, + INCLUDE_VIEWS, + INCLUDE_LINEAGE, + TABLE_PROFILING_ENABLED, + COLUMN_PROFILING_ENABLED, + STATEFUL_INGESTION_ENABLED, + ], filterFields: [ DATABASE_ALLOW, DATABASE_DENY, - SNOWFLAKE_SCHEMA_ALLOW, - SNOWFLAKE_SCHEMA_DENY, - SNOWFLAKE_TABLE_ALLOW, - SNOWFLAKE_TABLE_DENY, - SNOWFLAKE_VIEW_ALLOW, - SNOWFLAKE_VIEW_DENY, + SCHEMA_ALLOW, + SCHEMA_DENY, + TABLE_ALLOW, + TABLE_DENY, + VIEW_ALLOW, + VIEW_DENY, ], - filterSectionTooltip: - 'Filter out data assets based on allow/deny regex patterns we match against. Deny patterns take precedence over allow patterns.', + filterSectionTooltip: 'Include or exclude specific Databases, Schemas, Tables and Views from ingestion.', }, [BIGQUERY]: { fields: [ @@ -132,19 +139,26 @@ export const RECIPE_FIELDS: RecipeFields = { BIGQUERY_CLIENT_EMAIL, BIGQUERY_CLIENT_ID, ], - advancedFields: [INCLUDE_TABLE_LINEAGE, PROFILING_ENABLED, STATEFUL_INGESTION_ENABLED, START_TIME], + advancedFields: [ + INCLUDE_TABLES, + INCLUDE_VIEWS, + INCLUDE_TABLE_LINEAGE, + TABLE_PROFILING_ENABLED, + COLUMN_PROFILING_ENABLED, + START_TIME, + STATEFUL_INGESTION_ENABLED, + ], filterFields: [ PROJECT_ALLOW, PROJECT_DENY, DATASET_ALLOW, DATASET_DENY, - BIGQUERY_TABLE_ALLOW, - BIGQUERY_TABLE_DENY, - BIGQUERY_VIEW_ALLOW, - BIGQUERY_VIEW_DENY, + TABLE_ALLOW, + TABLE_DENY, + VIEW_ALLOW, + VIEW_DENY, ], - filterSectionTooltip: - 'Filter out data assets based on allow/deny regex patterns we match against. Deny patterns take precedence over allow patterns.', + filterSectionTooltip: 'Include or exclude specific Projects, Datasets, Tables and Views from ingestion.', }, [BIGQUERY_BETA]: { fields: [ @@ -154,57 +168,70 @@ export const RECIPE_FIELDS: RecipeFields = { BIGQUERY_CLIENT_EMAIL, BIGQUERY_CLIENT_ID, ], - advancedFields: [INCLUDE_TABLE_LINEAGE, PROFILING_ENABLED, STATEFUL_INGESTION_ENABLED, START_TIME], + advancedFields: [ + INCLUDE_TABLE_LINEAGE, + TABLE_PROFILING_ENABLED, + COLUMN_PROFILING_ENABLED, + START_TIME, + STATEFUL_INGESTION_ENABLED, + ], filterFields: [ PROJECT_ALLOW, PROJECT_DENY, DATASET_ALLOW, DATASET_DENY, - BIGQUERY_TABLE_ALLOW, - BIGQUERY_TABLE_DENY, - BIGQUERY_VIEW_ALLOW, - BIGQUERY_VIEW_DENY, + TABLE_ALLOW, + TABLE_DENY, + VIEW_ALLOW, + VIEW_DENY, ], - filterSectionTooltip: - 'Filter out data assets based on allow/deny regex patterns we match against. Deny patterns take precedence over allow patterns.', + filterSectionTooltip: 'Include or exclude specific Projects, Datasets, Tables and Views from ingestion.', }, [REDSHIFT]: { fields: [REDSHIFT_HOST_PORT, REDSHIFT_DATABASE, REDSHIFT_USERNAME, REDSHIFT_PASSWORD], - advancedFields: [INCLUDE_TABLE_LINEAGE, PROFILING_ENABLED, STATEFUL_INGESTION_ENABLED, TABLE_LINEAGE_MODE], - filterFields: [ - REDSHIFT_SCHEMA_ALLOW, - REDSHIFT_SCHEMA_DENY, - REDSHIFT_TABLE_ALLOW, - REDSHIFT_TABLE_DENY, - REDSHIFT_VIEW_ALLOW, - REDSHIFT_VIEW_DENY, + advancedFields: [ + INCLUDE_TABLES, + INCLUDE_VIEWS, + INCLUDE_TABLE_LINEAGE, + TABLE_PROFILING_ENABLED, + COLUMN_PROFILING_ENABLED, + TABLE_LINEAGE_MODE, + STATEFUL_INGESTION_ENABLED, ], - filterSectionTooltip: - 'Filter out data assets based on allow/deny regex patterns we match against. Deny patterns take precedence over allow patterns.', + filterFields: [SCHEMA_ALLOW, SCHEMA_DENY, TABLE_ALLOW, TABLE_DENY, VIEW_ALLOW, VIEW_DENY], + filterSectionTooltip: 'Include or exclude specific Schemas, Tables and Views from ingestion.', }, [TABLEAU]: { - fields: [TABLEAU_CONNECTION_URI, TABLEAU_PROJECT, TABLEAU_SITE, TABLEAU_USERNAME, TABLEAU_PASSWORD], + fields: [ + TABLEAU_CONNECTION_URI, + TABLEAU_PROJECT, + TABLEAU_SITE, + TABLEAU_TOKEN_NAME, + TABLEAU_TOKEN_VALUE, + STATEFUL_INGESTION_ENABLED, + TABLEAU_USERNAME, + TABLEAU_PASSWORD, + ], filterFields: [], advancedFields: [INGEST_TAGS, INGEST_OWNER], }, [LOOKER]: { fields: [LOOKER_BASE_URL, LOOKER_CLIENT_ID, LOOKER_CLIENT_SECRET], - filterFields: [DASHBOARD_ALLOW, DASHBOARD_DENY, CHART_ALLOW, CHART_DENY], - advancedFields: [GITHUB_INFO_REPO, EXTRACT_USAGE_HISTORY, EXTRACT_OWNERS, SKIP_PERSONAL_FOLDERS], - filterSectionTooltip: - 'Filter out data assets based on allow/deny regex patterns we match against. Deny patterns take precedence over allow patterns.', + filterFields: [LOOKER_DASHBOARD_ALLOW, LOOKER_DASHBOARD_DENY, CHART_ALLOW, CHART_DENY], + advancedFields: [EXTRACT_USAGE_HISTORY, EXTRACT_OWNERS, SKIP_PERSONAL_FOLDERS, STATEFUL_INGESTION_ENABLED], + filterSectionTooltip: 'Include or exclude specific Dashboard, Charts from Looker ingestion.', }, [LOOKML]: { - fields: [LOOKML_GITHUB_INFO_REPO, DEPLOY_KEY], - filterFields: [], - advancedFields: [ - CONNECTION_TO_PLATFORM_MAP, + fields: [ + LOOKML_GITHUB_INFO_REPO, + DEPLOY_KEY, PROJECT_NAME, LOOKML_BASE_URL, LOOKML_CLIENT_ID, LOOKML_CLIENT_SECRET, - PARSE_TABLE_NAMES_FROM_SQL, ], + filterFields: [], + advancedFields: [PARSE_TABLE_NAMES_FROM_SQL, CONNECTION_TO_PLATFORM_MAP, STATEFUL_INGESTION_ENABLED], advancedSectionTooltip: 'In order to ingest LookML data properly, you must either fill out Looker API client information (Base URL, Client ID, Client Secret) or an offline specification of the connection to platform mapping and the project name (Connection To Platform Map, Project Name).', defaultOpenSections: [RecipeSections.Connection, RecipeSections.Advanced], @@ -225,32 +252,91 @@ export const RECIPE_FIELDS: RecipeFields = { 'Filter out data assets based on allow/deny regex patterns we match against. Deny patterns take precedence over allow patterns.', }, [POSTGRES]: { - fields: [POSTGRES_HOST_PORT, POSTGRES_DATABASE, POSTGRES_USERNAME, POSTGRES_PASSWORD], + fields: [POSTGRES_HOST_PORT, POSTGRES_USERNAME, POSTGRES_PASSWORD, POSTGRES_DATABASE], + filterFields: [SCHEMA_ALLOW, SCHEMA_DENY, TABLE_ALLOW, TABLE_DENY, VIEW_ALLOW, VIEW_DENY], + advancedFields: [ + INCLUDE_TABLES, + INCLUDE_VIEWS, + TABLE_PROFILING_ENABLED, + COLUMN_PROFILING_ENABLED, + STATEFUL_INGESTION_ENABLED, + ], + filterSectionTooltip: 'Include or exclude specific Schemas, Tables and Views from ingestion.', + }, + [MYSQL]: { + fields: [MYSQL_HOST_PORT, MYSQL_USERNAME, MYSQL_PASSWORD], filterFields: [ - REDSHIFT_SCHEMA_ALLOW, - REDSHIFT_SCHEMA_DENY, - REDSHIFT_TABLE_ALLOW, - REDSHIFT_TABLE_DENY, - REDSHIFT_VIEW_ALLOW, - REDSHIFT_VIEW_DENY, + DATABASE_ALLOW, + DATABASE_DENY, + SCHEMA_ALLOW, + SCHEMA_DENY, + TABLE_ALLOW, + TABLE_DENY, + VIEW_ALLOW, + VIEW_DENY, ], - advancedFields: [STATEFUL_INGESTION_ENABLED, PROFILING_ENABLED], - filterSectionTooltip: - 'Filter out data assets based on allow/deny regex patterns we match against. Deny patterns take precedence over allow patterns.', + advancedFields: [ + INCLUDE_TABLES, + INCLUDE_VIEWS, + TABLE_PROFILING_ENABLED, + COLUMN_PROFILING_ENABLED, + STATEFUL_INGESTION_ENABLED, + ], + filterSectionTooltip: 'Include or exclude specific Databases, Schemas, Tables and Views from ingestion.', }, [HIVE]: { - fields: [HIVE_HOST_PORT, HIVE_DATABASE, HIVE_USERNAME, HIVE_PASSWORD], - filterFields: [ - REDSHIFT_SCHEMA_ALLOW, - REDSHIFT_SCHEMA_DENY, - REDSHIFT_TABLE_ALLOW, - REDSHIFT_TABLE_DENY, - REDSHIFT_VIEW_ALLOW, - REDSHIFT_VIEW_DENY, + fields: [HIVE_HOST_PORT, HIVE_USERNAME, HIVE_PASSWORD, HIVE_DATABASE], + filterFields: [SCHEMA_ALLOW, SCHEMA_DENY, TABLE_ALLOW, TABLE_DENY, VIEW_ALLOW, VIEW_DENY], + advancedFields: [INCLUDE_TABLES, TABLE_PROFILING_ENABLED, COLUMN_PROFILING_ENABLED, STATEFUL_INGESTION_ENABLED], + filterSectionTooltip: 'Include or exclude specific Schemas, Tables and Views from ingestion.', + }, + [PRESTO]: { + fields: [PRESTO_HOST_PORT, PRESTO_USERNAME, PRESTO_PASSWORD, PRESTO_DATABASE], + filterFields: [SCHEMA_ALLOW, SCHEMA_DENY, TABLE_ALLOW, TABLE_DENY, VIEW_ALLOW, VIEW_DENY], + advancedFields: [ + INCLUDE_TABLES, + INCLUDE_VIEWS, + TABLE_PROFILING_ENABLED, + COLUMN_PROFILING_ENABLED, + STATEFUL_INGESTION_ENABLED, ], - advancedFields: [STATEFUL_INGESTION_ENABLED, PROFILING_ENABLED], - filterSectionTooltip: - 'Filter out data assets based on allow/deny regex patterns we match against. Deny patterns take precedence over allow patterns.', + filterSectionTooltip: 'Include or exclude specific Schemas, Tables and Views from ingestion.', + }, + [MSSQL]: { + fields: [MSSQL_HOST_PORT, MSSQL_USERNAME, MSSQL_PASSWORD, MSSQL_DATABASE], + filterFields: [SCHEMA_ALLOW, SCHEMA_DENY, TABLE_ALLOW, TABLE_DENY, VIEW_ALLOW, VIEW_DENY], + advancedFields: [ + INCLUDE_TABLES, + INCLUDE_VIEWS, + TABLE_PROFILING_ENABLED, + COLUMN_PROFILING_ENABLED, + STATEFUL_INGESTION_ENABLED, + ], + filterSectionTooltip: 'Include or exclude specific Schemas, Tables and Views from ingestion.', + }, + [TRINO]: { + fields: [TRINO_HOST_PORT, TRINO_USERNAME, TRINO_PASSWORD, TRINO_DATABASE], + filterFields: [SCHEMA_ALLOW, SCHEMA_DENY, TABLE_ALLOW, TABLE_DENY, VIEW_ALLOW, VIEW_DENY], + advancedFields: [ + INCLUDE_TABLES, + INCLUDE_VIEWS, + TABLE_PROFILING_ENABLED, + COLUMN_PROFILING_ENABLED, + STATEFUL_INGESTION_ENABLED, + ], + filterSectionTooltip: 'Include or exclude specific Schemas, Tables and Views from ingestion.', + }, + [MARIADB]: { + fields: [MARIADB_HOST_PORT, MARIADB_USERNAME, MARIADB_PASSWORD, MARIADB_DATABASE], + filterFields: [SCHEMA_ALLOW, SCHEMA_DENY, TABLE_ALLOW, TABLE_DENY, VIEW_ALLOW, VIEW_DENY], + advancedFields: [ + INCLUDE_TABLES, + INCLUDE_VIEWS, + TABLE_PROFILING_ENABLED, + COLUMN_PROFILING_ENABLED, + STATEFUL_INGESTION_ENABLED, + ], + filterSectionTooltip: 'Include or exclude specific Schemas, Tables and Views from ingestion.', }, }; diff --git a/datahub-web-react/src/app/ingest/source/builder/RecipeForm/hive.ts b/datahub-web-react/src/app/ingest/source/builder/RecipeForm/hive.ts index 932f2312d4204..8ed60f885db96 100644 --- a/datahub-web-react/src/app/ingest/source/builder/RecipeForm/hive.ts +++ b/datahub-web-react/src/app/ingest/source/builder/RecipeForm/hive.ts @@ -2,36 +2,44 @@ import { RecipeField, FieldType } from './common'; export const HIVE_HOST_PORT: RecipeField = { name: 'host_port', - label: 'Host Port', - tooltip: 'host URL.', + label: 'Host and Port', + tooltip: + "The host and port where Hive is running. For example, 'hive:9083'. Note: this host must be accessible on the network where DataHub is running (or allowed via an IP Allow List, AWS PrivateLink, etc).", type: FieldType.TEXT, fieldPath: 'source.config.host_port', + placeholder: 'hive:9083', + required: true, rules: null, }; export const HIVE_DATABASE: RecipeField = { name: 'database', label: 'Database', - tooltip: 'Database (catalog). Optional, if not specified, ingests from all databases.', + tooltip: 'Ingest metadata for a specific Database. If left blank, metadata for all databases will be extracted.', type: FieldType.TEXT, fieldPath: 'source.config.database', + placeholder: 'my_db', rules: null, }; export const HIVE_USERNAME: RecipeField = { name: 'username', label: 'Username', - tooltip: 'Username', + tooltip: 'The Hive username used to extract metadata.', type: FieldType.TEXT, fieldPath: 'source.config.username', + placeholder: 'hive', + required: true, rules: null, }; export const HIVE_PASSWORD: RecipeField = { name: 'password', label: 'Password', - tooltip: 'Password', + tooltip: 'The Hive password for the user.', type: FieldType.SECRET, fieldPath: 'source.config.password', + placeholder: 'password', + required: true, rules: null, }; diff --git a/datahub-web-react/src/app/ingest/source/builder/RecipeForm/kafka.ts b/datahub-web-react/src/app/ingest/source/builder/RecipeForm/kafka.ts index 7b30fda0b70ad..eb97f8fb8fa64 100644 --- a/datahub-web-react/src/app/ingest/source/builder/RecipeForm/kafka.ts +++ b/datahub-web-react/src/app/ingest/source/builder/RecipeForm/kafka.ts @@ -107,7 +107,7 @@ const topicAllowFieldPath = 'source.config.topic_patterns.allow'; export const TOPIC_ALLOW: RecipeField = { name: 'topic_patterns.allow', label: 'Allow Patterns', - tooltip: 'Provide an optional Regular Expresssion (REGEX) to include specific Kafka Topic names in ingestion.', + tooltip: 'Provide an optional Regular Expression (REGEX) to include specific Kafka Topic names in ingestion.', type: FieldType.LIST, buttonLabel: 'Add pattern', fieldPath: topicAllowFieldPath, @@ -121,7 +121,7 @@ const topicDenyFieldPath = 'source.config.topic_patterns.deny'; export const TOPIC_DENY: RecipeField = { name: 'topic_patterns.deny', label: 'Deny Patterns', - tooltip: 'Provide an optional Regular Expresssion (REGEX) to exclude specific Kafka Topic names from ingestion.', + tooltip: 'Provide an optional Regular Expression (REGEX) to exclude specific Kafka Topic names from ingestion.', type: FieldType.LIST, buttonLabel: 'Add pattern', fieldPath: topicDenyFieldPath, diff --git a/datahub-web-react/src/app/ingest/source/builder/RecipeForm/looker.ts b/datahub-web-react/src/app/ingest/source/builder/RecipeForm/looker.ts index 414f86dff092a..c42fb8eb5df5b 100644 --- a/datahub-web-react/src/app/ingest/source/builder/RecipeForm/looker.ts +++ b/datahub-web-react/src/app/ingest/source/builder/RecipeForm/looker.ts @@ -3,28 +3,33 @@ import { RecipeField, FieldType, setListValuesOnRecipe } from './common'; export const LOOKER_BASE_URL: RecipeField = { name: 'base_url', label: 'Base URL', - tooltip: - 'Url to your Looker instance: https://company.looker.com:19999 or https://looker.company.com, or similar. Used for making API calls to Looker and constructing clickable dashboard and chart urls.', + tooltip: 'The URL where your Looker instance is hosted.', type: FieldType.TEXT, fieldPath: 'source.config.base_url', + placeholder: 'https://looker.company.com', + required: true, rules: null, }; export const LOOKER_CLIENT_ID: RecipeField = { name: 'client_id', label: 'Client ID', - tooltip: 'Looker API client id.', + tooltip: 'Looker API Client ID.', type: FieldType.TEXT, fieldPath: 'source.config.client_id', + placeholder: 'client_id', + required: true, rules: null, }; export const LOOKER_CLIENT_SECRET: RecipeField = { name: 'client_secret', label: 'Client Secret', - tooltip: 'Looker API client secret.', + tooltip: 'Looker API Client Secret.', type: FieldType.SECRET, fieldPath: 'source.config.client_secret', + placeholder: 'client_secret', + required: true, rules: null, }; @@ -32,12 +37,14 @@ const chartAllowFieldPath = 'source.config.chart_pattern.allow'; export const CHART_ALLOW: RecipeField = { name: 'chart_pattern.allow', label: 'Allow Patterns', - tooltip: 'Use regex here.', + tooltip: + 'Only include specific Charts by providing the numeric id of a Chart, or a Regular Expression (REGEX). If not provided, all Charts will be included.', type: FieldType.LIST, buttonLabel: 'Add pattern', fieldPath: chartAllowFieldPath, rules: null, section: 'Charts', + placeholder: '12', setValueOnRecipeOverride: (recipe: any, values: string[]) => setListValuesOnRecipe(recipe, values, chartAllowFieldPath), }; @@ -46,12 +53,46 @@ const chartDenyFieldPath = 'source.config.chart_pattern.deny'; export const CHART_DENY: RecipeField = { name: 'chart_pattern.deny', label: 'Deny Patterns', - tooltip: 'Use regex here.', + tooltip: + 'Exclude specific Charts by providing the numeric id of a Chart, or a Regular Expression (REGEX). If not provided, all Charts will be included. Deny patterns always take precendence over Allow patterns.', type: FieldType.LIST, buttonLabel: 'Add pattern', fieldPath: chartDenyFieldPath, rules: null, section: 'Charts', + placeholder: '12', setValueOnRecipeOverride: (recipe: any, values: string[]) => setListValuesOnRecipe(recipe, values, chartDenyFieldPath), }; + +const dashboardAllowFieldPath = 'source.config.dashboard_pattern.allow'; +export const DASHBOARD_ALLOW: RecipeField = { + name: 'dashboard_pattern.allow', + label: 'Allow Patterns', + tooltip: + 'Only include specific Dashboards by providing the numeric id of a Dashboard, or a Regular Expression (REGEX). If not provided, all Dashboards will be included.', + type: FieldType.LIST, + buttonLabel: 'Add pattern', + fieldPath: dashboardAllowFieldPath, + rules: null, + section: 'Dashboards', + placeholder: '1232', + setValueOnRecipeOverride: (recipe: any, values: string[]) => + setListValuesOnRecipe(recipe, values, dashboardAllowFieldPath), +}; + +const dashboardDenyFieldPath = 'source.config.dashboard_pattern.deny'; +export const DASHBOARD_DENY: RecipeField = { + name: 'dashboard_pattern.deny', + label: 'Deny Patterns', + tooltip: + 'Exclude specific Dashboards by providing the numeric id of a Dashboard, or a Regular Expression (REGEX). If not provided, all Dashboards will be included. Deny patterns always take precendence over Allow patterns.', + type: FieldType.LIST, + buttonLabel: 'Add pattern', + fieldPath: dashboardDenyFieldPath, + rules: null, + section: 'Dashboards', + placeholder: '1232', + setValueOnRecipeOverride: (recipe: any, values: string[]) => + setListValuesOnRecipe(recipe, values, dashboardDenyFieldPath), +}; diff --git a/datahub-web-react/src/app/ingest/source/builder/RecipeForm/lookml.tsx b/datahub-web-react/src/app/ingest/source/builder/RecipeForm/lookml.tsx index 656fe3de5e025..6afc51dbcaca9 100644 --- a/datahub-web-react/src/app/ingest/source/builder/RecipeForm/lookml.tsx +++ b/datahub-web-react/src/app/ingest/source/builder/RecipeForm/lookml.tsx @@ -7,28 +7,35 @@ export const LOOKML = 'lookml'; export const LOOKML_GITHUB_INFO_REPO: RecipeField = { name: 'github_info.repo', label: 'GitHub Repo', - tooltip: ( -
-

- Name of your github repo. e.g. repo for{' '} - - https://github.com/datahub-project/datahub - {' '} - is datahub-project/datahub. -

-
- ), + tooltip: 'The name of the GitHub repository where your LookML is defined.', type: FieldType.TEXT, fieldPath: 'source.config.github_info.repo', placeholder: 'datahub-project/datahub', rules: [{ required: true, message: 'Github Repo is required' }], + required: true, }; const deployKeyFieldPath = 'source.config.github_info.deploy_key'; export const DEPLOY_KEY: RecipeField = { name: 'github_info.deploy_key', label: 'GitHub Deploy Key', - tooltip: 'The SSH private key that has been provisioned for read access on the GitHub repository.', + tooltip: ( + <> + An SSH private key that has been provisioned for read access on the GitHub repository where the LookML is + defined. +
+ Learn how to generate an SSH for your GitHub repository{' '} + + here + + . +
+ + ), type: FieldType.TEXTAREA, fieldPath: 'source.config.github_info.deploy_key', placeholder: '-----BEGIN OPENSSH PRIVATE KEY-----\n...', @@ -37,6 +44,7 @@ export const DEPLOY_KEY: RecipeField = { const valueWithNewLine = `${value}\n`; return setFieldValueOnRecipe(recipe, valueWithNewLine, deployKeyFieldPath); }, + required: true, }; function validateApiSection(getFieldValue, fieldName) { @@ -59,9 +67,8 @@ function validateApiSection(getFieldValue, fieldName) { export const LOOKML_BASE_URL: RecipeField = { name: 'base_url', - label: 'Base URL', - tooltip: - 'Url to your Looker instance: https://company.looker.com:19999 or https://looker.company.com, or similar. Used for making API calls to Looker and constructing clickable dashboard and chart urls.', + label: 'Looker Base URL', + tooltip: 'Optional URL to your Looker instance. This is used to generate external URLs for models in your project.', type: FieldType.TEXT, fieldPath: 'source.config.api.base_url', placeholder: 'https://looker.company.com', @@ -70,45 +77,43 @@ export const LOOKML_BASE_URL: RecipeField = { export const LOOKML_CLIENT_ID: RecipeField = { name: 'client_id', - label: 'Client ID', - tooltip: 'Looker API client id.', + label: 'Looker Client ID', + tooltip: 'The Looker API Client ID. Required if Looker Base URL is present.', type: FieldType.SECRET, - placeholder: 'LOOKER_CLIENT_ID', + placeholder: 'client_id', fieldPath: 'source.config.api.client_id', rules: [({ getFieldValue }) => validateApiSection(getFieldValue, 'Client ID')], }; export const LOOKML_CLIENT_SECRET: RecipeField = { name: 'client_secret', - label: 'Client Secret', - tooltip: 'Looker API client secret.', + label: 'Looker Client Secret', + tooltip: 'A Looker API Client Secret. Required if Looker Base URL is present.', type: FieldType.SECRET, fieldPath: 'source.config.api.client_secret', - placeholder: 'LOOKER_CLIENT_SECRET', + placeholder: 'client_secret', rules: [({ getFieldValue }) => validateApiSection(getFieldValue, 'Client Secret')], }; export const PROJECT_NAME: RecipeField = { name: 'project_name', - label: 'Project Name', + label: 'LookML Project Name', tooltip: ( -
- Required if you don't specify the api section. The project name within which all the model files live. - See{' '} + <> + The project name within which the LookML files live. See - https://docs.looker.com/data-modeling/getting-started/how-project-works - {' '} - to understand what the Looker project name should be. The simplest way to see your projects is to click on - Develop followed by Manage LookML Projects in the Looker application. -
+ this document + + for more details. Required if Looker Base URL is not present. + ), type: FieldType.TEXT, fieldPath: 'source.config.project_name', - placeholder: 'Looker Project Name', + placeholder: 'My Project', rules: [ ({ getFieldValue }) => ({ validator(_, value) { @@ -128,8 +133,9 @@ export const PROJECT_NAME: RecipeField = { export const PARSE_TABLE_NAMES_FROM_SQL: RecipeField = { name: 'parse_table_names_from_sql', - label: 'Parse Table Names from SQL', - tooltip: 'Use an SQL parser to try to parse the tables the views depends on.', + label: 'Extract External Lineage', + tooltip: + 'Extract lineage between Looker and the external upstream Data Sources (e.g. Data Warehouses or Databases).', type: FieldType.BOOLEAN, fieldPath: 'source.config.parse_table_names_from_sql', rules: null, @@ -154,24 +160,24 @@ export const CONNECTION_TO_PLATFORM_MAP_NAME: RecipeField = { type: FieldType.TEXT, fieldPath: 'name', - placeholder: 'mysql_db', + placeholder: 'my_mysql_connection', rules: [{ required: true, message: 'Name is required' }], }; export const PLATFORM: RecipeField = { name: 'platform', label: 'Platform', - tooltip: 'Associated platform in DataHub', + tooltip: 'The Data Platform ID in DataHub (e.g. snowflake, bigquery, redshift, mysql, postgres)', type: FieldType.TEXT, fieldPath: 'platform', - placeholder: 'looker', + placeholder: 'snowflake', rules: [{ required: true, message: 'Platform is required' }], }; export const DEFAULT_DB: RecipeField = { name: 'default_db', label: 'Default Database', - tooltip: 'Associated database in DataHub', + tooltip: 'The Database associated with assets from the Looker connection.', type: FieldType.TEXT, fieldPath: 'default_db', placeholder: 'default_db', @@ -183,7 +189,8 @@ const connectionToPlatformMapFieldPath = 'source.config.connection_to_platform_m export const CONNECTION_TO_PLATFORM_MAP: RecipeField = { name: 'connection_to_platform_map', label: 'Connection To Platform Map', - tooltip: 'A mapping of Looker connection names to DataHub platform and database values', + tooltip: + 'A mapping of Looker connection names to DataHub Data Platform and Database names. This is used to create an accurate picture of the Lineage between LookML models and upstream Data Sources.', type: FieldType.DICT, buttonLabel: 'Add mapping', fieldPath: connectionToPlatformMapFieldPath, diff --git a/datahub-web-react/src/app/ingest/source/builder/RecipeForm/mariadb.ts b/datahub-web-react/src/app/ingest/source/builder/RecipeForm/mariadb.ts new file mode 100644 index 0000000000000..483630604f092 --- /dev/null +++ b/datahub-web-react/src/app/ingest/source/builder/RecipeForm/mariadb.ts @@ -0,0 +1,48 @@ +import { RecipeField, FieldType } from './common'; + +export const MARIADB = 'mariadb'; + +export const MARIADB_HOST_PORT: RecipeField = { + name: 'host_port', + label: 'Host and Port', + tooltip: + "The host and port where MariaDB is running. For example, 'mariadb-server:5432'. Note: this host must be accessible on the network where DataHub is running (or allowed via an IP Allow List, AWS PrivateLink, etc).", + type: FieldType.TEXT, + fieldPath: 'source.config.host_port', + placeholder: 'mariadb-server:5432', + required: true, + rules: null, +}; + +export const MARIADB_DATABASE: RecipeField = { + name: 'database', + label: 'Database', + tooltip: 'Ingest metadata for a specific Database.', + type: FieldType.TEXT, + fieldPath: 'source.config.database', + placeholder: 'my_db', + required: true, + rules: null, +}; + +export const MARIADB_USERNAME: RecipeField = { + name: 'username', + label: 'Username', + tooltip: 'The MariaDB username used to extract metadata.', + type: FieldType.TEXT, + fieldPath: 'source.config.username', + placeholder: 'mariadb', + required: true, + rules: null, +}; + +export const MARIADB_PASSWORD: RecipeField = { + name: 'password', + label: 'Password', + tooltip: 'The MariaDB password for the user.', + type: FieldType.SECRET, + fieldPath: 'source.config.password', + placeholder: 'password', + required: true, + rules: null, +}; diff --git a/datahub-web-react/src/app/ingest/source/builder/RecipeForm/mssql.ts b/datahub-web-react/src/app/ingest/source/builder/RecipeForm/mssql.ts new file mode 100644 index 0000000000000..9e6fb388144eb --- /dev/null +++ b/datahub-web-react/src/app/ingest/source/builder/RecipeForm/mssql.ts @@ -0,0 +1,48 @@ +import { RecipeField, FieldType } from './common'; + +export const MSSQL = 'mssql'; + +export const MSSQL_HOST_PORT: RecipeField = { + name: 'host_port', + label: 'Host and Port', + tooltip: + "The host and port where SQL Server is running. For example, 'mssql-server:5432'. Note: this host must be accessible on the network where DataHub is running (or allowed via an IP Allow List, AWS PrivateLink, etc).", + type: FieldType.TEXT, + fieldPath: 'source.config.host_port', + placeholder: 'mssql-server:5432', + required: true, + rules: null, +}; + +export const MSSQL_DATABASE: RecipeField = { + name: 'database', + label: 'Database', + tooltip: 'Ingest metadata for a specific Database.', + type: FieldType.TEXT, + fieldPath: 'source.config.database', + placeholder: 'my_db', + required: true, + rules: null, +}; + +export const MSSQL_USERNAME: RecipeField = { + name: 'username', + label: 'Username', + tooltip: 'The SQL Server username used to extract metadata.', + type: FieldType.TEXT, + fieldPath: 'source.config.username', + placeholder: 'mssql', + required: true, + rules: null, +}; + +export const MSSQL_PASSWORD: RecipeField = { + name: 'password', + label: 'Password', + tooltip: 'The SQL Server password for the user.', + type: FieldType.SECRET, + fieldPath: 'source.config.password', + placeholder: 'password', + required: true, + rules: null, +}; diff --git a/datahub-web-react/src/app/ingest/source/builder/RecipeForm/mysql.ts b/datahub-web-react/src/app/ingest/source/builder/RecipeForm/mysql.ts new file mode 100644 index 0000000000000..fa14d1eaa5d2e --- /dev/null +++ b/datahub-web-react/src/app/ingest/source/builder/RecipeForm/mysql.ts @@ -0,0 +1,35 @@ +import { RecipeField, FieldType } from './common'; + +export const MYSQL_HOST_PORT: RecipeField = { + name: 'host_port', + label: 'Host and Port', + tooltip: + "The host and port where Postgres is running. For example, 'localhost:5432'. Note: this host must be accessible on the network where DataHub is running (or allowed via an IP Allow List, AWS PrivateLink, etc).", + type: FieldType.TEXT, + fieldPath: 'source.config.host_port', + placeholder: 'mysql:5432', + required: true, + rules: null, +}; + +export const MYSQL_USERNAME: RecipeField = { + name: 'username', + label: 'Username', + tooltip: 'The MySQL username used to extract metadata.', + type: FieldType.TEXT, + fieldPath: 'source.config.username', + placeholder: 'mysql', + required: true, + rules: null, +}; + +export const MYSQL_PASSWORD: RecipeField = { + name: 'password', + label: 'Password', + tooltip: 'The MySQL password for the user.', + type: FieldType.SECRET, + fieldPath: 'source.config.password', + placeholder: 'password', + required: true, + rules: null, +}; diff --git a/datahub-web-react/src/app/ingest/source/builder/RecipeForm/postgres.ts b/datahub-web-react/src/app/ingest/source/builder/RecipeForm/postgres.ts index b1d32d08e1596..f816bbca0a5a9 100644 --- a/datahub-web-react/src/app/ingest/source/builder/RecipeForm/postgres.ts +++ b/datahub-web-react/src/app/ingest/source/builder/RecipeForm/postgres.ts @@ -2,36 +2,45 @@ import { RecipeField, FieldType } from './common'; export const POSTGRES_HOST_PORT: RecipeField = { name: 'host_port', - label: 'Host Port', - tooltip: 'host URL.', + label: 'Host and Port', + tooltip: + "The host and port where Postgres is running. For example, 'postgres:5432'. Note: this host must be accessible on the network where DataHub is running (or allowed via an IP Allow List, AWS PrivateLink, etc).", type: FieldType.TEXT, fieldPath: 'source.config.host_port', + placeholder: 'postgres:5432', + required: true, rules: null, }; export const POSTGRES_DATABASE: RecipeField = { name: 'database', label: 'Database', - tooltip: 'Database (catalog). Optional, if not specified, ingests from all databases.', + tooltip: 'Ingest metadata for a specific Database.', type: FieldType.TEXT, fieldPath: 'source.config.database', + placeholder: 'my_db', + required: true, rules: null, }; export const POSTGRES_USERNAME: RecipeField = { name: 'username', label: 'Username', - tooltip: 'Username', + tooltip: 'The Postgres username used to extract metadata.', type: FieldType.TEXT, fieldPath: 'source.config.username', + placeholder: 'postgres', + required: true, rules: null, }; export const POSTGRES_PASSWORD: RecipeField = { name: 'password', label: 'Password', - tooltip: 'Password', + tooltip: 'The Postgres password for the user.', type: FieldType.SECRET, fieldPath: 'source.config.password', + placeholder: 'password', + required: true, rules: null, }; diff --git a/datahub-web-react/src/app/ingest/source/builder/RecipeForm/presto.ts b/datahub-web-react/src/app/ingest/source/builder/RecipeForm/presto.ts new file mode 100644 index 0000000000000..33b178701057f --- /dev/null +++ b/datahub-web-react/src/app/ingest/source/builder/RecipeForm/presto.ts @@ -0,0 +1,48 @@ +import { RecipeField, FieldType } from './common'; + +export const PRESTO = 'presto'; + +export const PRESTO_HOST_PORT: RecipeField = { + name: 'host_port', + label: 'Host and Port', + tooltip: + "The host and port where Presto is running. For example, 'presto-server:5432'. Note: this host must be accessible on the network where DataHub is running (or allowed via an IP Allow List, AWS PrivateLink, etc).", + type: FieldType.TEXT, + fieldPath: 'source.config.host_port', + placeholder: 'presto-server:5432', + required: true, + rules: null, +}; + +export const PRESTO_DATABASE: RecipeField = { + name: 'database', + label: 'Database', + tooltip: 'Ingest metadata for a specific Database.', + type: FieldType.TEXT, + fieldPath: 'source.config.database', + placeholder: 'my_db', + required: true, + rules: null, +}; + +export const PRESTO_USERNAME: RecipeField = { + name: 'username', + label: 'Username', + tooltip: 'The Presto username used to extract metadata.', + type: FieldType.TEXT, + fieldPath: 'source.config.username', + placeholder: 'presto', + required: true, + rules: null, +}; + +export const PRESTO_PASSWORD: RecipeField = { + name: 'password', + label: 'Password', + tooltip: 'The Presto password for the user.', + type: FieldType.SECRET, + fieldPath: 'source.config.password', + placeholder: 'password', + required: true, + rules: null, +}; diff --git a/datahub-web-react/src/app/ingest/source/builder/RecipeForm/redshift.ts b/datahub-web-react/src/app/ingest/source/builder/RecipeForm/redshift.ts index 62db34e06d94f..0091bccf77325 100644 --- a/datahub-web-react/src/app/ingest/source/builder/RecipeForm/redshift.ts +++ b/datahub-web-react/src/app/ingest/source/builder/RecipeForm/redshift.ts @@ -1,127 +1,46 @@ -import { RecipeField, FieldType, setListValuesOnRecipe } from './common'; +import { RecipeField, FieldType } from './common'; export const REDSHIFT_HOST_PORT: RecipeField = { name: 'host_port', - label: 'Host Port', - tooltip: 'Host URL.', + label: 'Host and Port', + tooltip: + "The host and port where Redshift is running. For example, 'redshift:5439'. Note: this host must be accessible on the network where DataHub is running (or allowed via an IP Allow List, AWS PrivateLink, etc).", type: FieldType.TEXT, fieldPath: 'source.config.host_port', + placeholder: 'redshift.company.us-west-1.redshift.amazonaws.com:5439', rules: null, + required: true, }; export const REDSHIFT_DATABASE: RecipeField = { name: 'database', label: 'Database', - tooltip: 'Database (catalog).', + tooltip: 'Ingest metadata for a specific Database.', type: FieldType.TEXT, fieldPath: 'source.config.database', + placeholder: 'database_name', rules: null, + required: true, }; export const REDSHIFT_USERNAME: RecipeField = { name: 'redshift.username', - label: 'Redshift username', - tooltip: 'Username', + label: 'Username', + tooltip: 'A Redshift username used to extract metadata.', type: FieldType.TEXT, fieldPath: 'source.config.username', + placeholder: 'redshift', rules: null, + required: true, }; export const REDSHIFT_PASSWORD: RecipeField = { name: 'redshift.password', - label: 'Redshift password', - tooltip: 'Password', + label: 'Password', + tooltip: 'The password of the username.', type: FieldType.SECRET, fieldPath: 'source.config.password', + placeholder: 'password', rules: null, -}; - -const schemaAllowFieldPath = 'source.config.schema_pattern.allow'; -export const REDSHIFT_SCHEMA_ALLOW: RecipeField = { - name: 'schema_pattern.allow', - label: 'Allow Patterns', - tooltip: 'Use regex here.', - placeholder: '^my_schema$', - type: FieldType.LIST, - buttonLabel: 'Add pattern', - fieldPath: schemaAllowFieldPath, - rules: null, - section: 'Schemas', - setValueOnRecipeOverride: (recipe: any, values: string[]) => - setListValuesOnRecipe(recipe, values, schemaAllowFieldPath), -}; - -const schemaDenyFieldPath = 'source.config.schema_pattern.deny'; -export const REDSHIFT_SCHEMA_DENY: RecipeField = { - name: 'schema_pattern.deny', - label: 'Deny Patterns', - tooltip: 'Use regex here.', - placeholder: '^my_schema$', - type: FieldType.LIST, - buttonLabel: 'Add pattern', - fieldPath: schemaDenyFieldPath, - rules: null, - section: 'Schemas', - setValueOnRecipeOverride: (recipe: any, values: string[]) => - setListValuesOnRecipe(recipe, values, schemaDenyFieldPath), -}; - -const tableAllowFieldPath = 'source.config.table_pattern.allow'; -export const REDSHIFT_TABLE_ALLOW: RecipeField = { - name: 'table_pattern.allow', - label: 'Allow Patterns', - tooltip: 'Use regex here.', - placeholder: '^my_schema\\.table_name$', - type: FieldType.LIST, - buttonLabel: 'Add pattern', - fieldPath: tableAllowFieldPath, - rules: null, - section: 'Tables', - setValueOnRecipeOverride: (recipe: any, values: string[]) => - setListValuesOnRecipe(recipe, values, tableAllowFieldPath), -}; - -const tableDenyFieldPath = 'source.config.table_pattern.deny'; -export const REDSHIFT_TABLE_DENY: RecipeField = { - name: 'table_pattern.deny', - label: 'Deny Patterns', - tooltip: 'Use regex here.', - placeholder: '^my_schema\\.table_name$', - type: FieldType.LIST, - buttonLabel: 'Add pattern', - fieldPath: tableDenyFieldPath, - rules: null, - section: 'Tables', - setValueOnRecipeOverride: (recipe: any, values: string[]) => - setListValuesOnRecipe(recipe, values, tableDenyFieldPath), -}; - -const viewAllowFieldPath = 'source.config.view_pattern.allow'; -export const REDSHIFT_VIEW_ALLOW: RecipeField = { - name: 'view_pattern.allow', - label: 'Allow Patterns', - tooltip: 'Use regex here.', - placeholder: '^my_schema\\.view_name$', - type: FieldType.LIST, - buttonLabel: 'Add pattern', - fieldPath: viewAllowFieldPath, - rules: null, - section: 'Views', - setValueOnRecipeOverride: (recipe: any, values: string[]) => - setListValuesOnRecipe(recipe, values, viewAllowFieldPath), -}; - -const viewDenyFieldPath = 'source.config.view_pattern.deny'; -export const REDSHIFT_VIEW_DENY: RecipeField = { - name: 'view_pattern.deny', - label: 'Deny Patterns', - tooltip: 'Use regex here.', - placeholder: '^my_schema\\.view_name$', - type: FieldType.LIST, - buttonLabel: 'Add pattern', - fieldPath: viewDenyFieldPath, - rules: null, - section: 'Views', - setValueOnRecipeOverride: (recipe: any, values: string[]) => - setListValuesOnRecipe(recipe, values, viewDenyFieldPath), + required: true, }; diff --git a/datahub-web-react/src/app/ingest/source/builder/RecipeForm/snowflake.ts b/datahub-web-react/src/app/ingest/source/builder/RecipeForm/snowflake.ts index 6602f87b988d0..0d85676e6201d 100644 --- a/datahub-web-react/src/app/ingest/source/builder/RecipeForm/snowflake.ts +++ b/datahub-web-react/src/app/ingest/source/builder/RecipeForm/snowflake.ts @@ -1,30 +1,37 @@ -import { FieldType, RecipeField, setListValuesOnRecipe } from './common'; +import { FieldType, RecipeField } from './common'; export const SNOWFLAKE_ACCOUNT_ID: RecipeField = { name: 'account_id', label: 'Account ID', - tooltip: 'Snowflake account. e.g. abc48144', + tooltip: + 'The Snowflake Account Identifier e.g. myorg-account123, account123-eu-central-1, account123.west-us-2.azure', type: FieldType.TEXT, fieldPath: 'source.config.account_id', + placeholder: 'xyz123', rules: null, + required: true, }; export const SNOWFLAKE_WAREHOUSE: RecipeField = { name: 'warehouse', label: 'Warehouse', - tooltip: 'Snowflake warehouse.', + tooltip: 'The name of the Snowflake Warehouse to extract metadata from.', type: FieldType.TEXT, fieldPath: 'source.config.warehouse', + placeholder: 'COMPUTE_WH', rules: null, + required: true, }; export const SNOWFLAKE_USERNAME: RecipeField = { name: 'username', label: 'Username', tooltip: 'Snowflake username.', - type: FieldType.SECRET, + type: FieldType.TEXT, fieldPath: 'source.config.username', + placeholder: 'snowflake', rules: null, + required: true, }; export const SNOWFLAKE_PASSWORD: RecipeField = { @@ -33,104 +40,18 @@ export const SNOWFLAKE_PASSWORD: RecipeField = { tooltip: 'Snowflake password.', type: FieldType.SECRET, fieldPath: 'source.config.password', + placeholder: 'password', rules: null, + required: true, }; export const SNOWFLAKE_ROLE: RecipeField = { name: 'role', label: 'Role', - tooltip: 'Snowflake role.', + tooltip: 'The Role to use when extracting metadata from Snowflake.', type: FieldType.TEXT, fieldPath: 'source.config.role', + placeholder: 'datahub_role', rules: null, -}; - -const schemaAllowFieldPath = 'source.config.schema_pattern.allow'; -export const SNOWFLAKE_SCHEMA_ALLOW: RecipeField = { - name: 'schema_pattern.allow', - label: 'Allow Patterns', - tooltip: 'Use regex here.', - placeholder: '^my_schema$', - type: FieldType.LIST, - buttonLabel: 'Add pattern', - fieldPath: schemaAllowFieldPath, - rules: null, - section: 'Schemas', - setValueOnRecipeOverride: (recipe: any, values: string[]) => - setListValuesOnRecipe(recipe, values, schemaAllowFieldPath), -}; - -const schemaDenyFieldPath = 'source.config.schema_pattern.deny'; -export const SNOWFLAKE_SCHEMA_DENY: RecipeField = { - name: 'schema_pattern.deny', - label: 'Deny Patterns', - tooltip: 'Use regex here.', - placeholder: '^my_schema$', - type: FieldType.LIST, - buttonLabel: 'Add pattern', - fieldPath: schemaDenyFieldPath, - rules: null, - section: 'Schemas', - setValueOnRecipeOverride: (recipe: any, values: string[]) => - setListValuesOnRecipe(recipe, values, schemaDenyFieldPath), -}; - -const tableAllowFieldPath = 'source.config.table_pattern.allow'; -export const SNOWFLAKE_TABLE_ALLOW: RecipeField = { - name: 'table_pattern.allow', - label: 'Allow Patterns', - tooltip: 'Use regex here.', - placeholder: '^my_db\\.my_schema\\.table_name$', - type: FieldType.LIST, - buttonLabel: 'Add pattern', - fieldPath: tableAllowFieldPath, - rules: null, - section: 'Tables', - setValueOnRecipeOverride: (recipe: any, values: string[]) => - setListValuesOnRecipe(recipe, values, tableAllowFieldPath), -}; - -const tableDenyFieldPath = 'source.config.table_pattern.deny'; -export const SNOWFLAKE_TABLE_DENY: RecipeField = { - name: 'table_pattern.deny', - label: 'Deny Patterns', - tooltip: 'Use regex here.', - placeholder: '^my_db\\.my_schema\\.table_name$', - type: FieldType.LIST, - buttonLabel: 'Add pattern', - fieldPath: tableDenyFieldPath, - rules: null, - section: 'Tables', - setValueOnRecipeOverride: (recipe: any, values: string[]) => - setListValuesOnRecipe(recipe, values, tableDenyFieldPath), -}; - -const viewAllowFieldPath = 'source.config.view_pattern.allow'; -export const SNOWFLAKE_VIEW_ALLOW: RecipeField = { - name: 'view_pattern.allow', - label: 'Allow Patterns', - tooltip: 'Use regex here.', - placeholder: '^my_db\\.my_schema\\.view_name$', - type: FieldType.LIST, - buttonLabel: 'Add pattern', - fieldPath: viewAllowFieldPath, - rules: null, - section: 'Views', - setValueOnRecipeOverride: (recipe: any, values: string[]) => - setListValuesOnRecipe(recipe, values, viewAllowFieldPath), -}; - -const viewDenyFieldPath = 'source.config.view_pattern.deny'; -export const SNOWFLAKE_VIEW_DENY: RecipeField = { - name: 'view_pattern.deny', - label: 'Deny Patterns', - tooltip: 'Use regex here.', - placeholder: '^my_db\\.my_schema\\.view_name$', - type: FieldType.LIST, - buttonLabel: 'Add pattern', - fieldPath: viewDenyFieldPath, - rules: null, - section: 'Views', - setValueOnRecipeOverride: (recipe: any, values: string[]) => - setListValuesOnRecipe(recipe, values, viewDenyFieldPath), + required: true, }; diff --git a/datahub-web-react/src/app/ingest/source/builder/RecipeForm/tableau.ts b/datahub-web-react/src/app/ingest/source/builder/RecipeForm/tableau.ts index 671165799c0f6..e465cf15f2c1b 100644 --- a/datahub-web-react/src/app/ingest/source/builder/RecipeForm/tableau.ts +++ b/datahub-web-react/src/app/ingest/source/builder/RecipeForm/tableau.ts @@ -2,10 +2,12 @@ import { RecipeField, FieldType, setListValuesOnRecipe } from './common'; export const TABLEAU_CONNECTION_URI: RecipeField = { name: 'connect_uri', - label: 'Connection URI', - tooltip: 'Tableau host URL.', + label: 'Host URL', + tooltip: 'The URL where the Tableau instance is hosted.', type: FieldType.TEXT, fieldPath: 'source.config.connect_uri', + placeholder: 'https://prod-ca-a.online.tableau.com', + required: true, rules: null, }; @@ -13,7 +15,7 @@ const tableauProjectFieldPath = 'source.config.projects'; export const TABLEAU_PROJECT: RecipeField = { name: 'projects', label: 'Projects', - tooltip: 'List of projects', + tooltip: 'The list of Projects to extract metadata for.', type: FieldType.LIST, buttonLabel: 'Add project', fieldPath: tableauProjectFieldPath, @@ -26,26 +28,51 @@ export const TABLEAU_SITE: RecipeField = { name: 'site', label: 'Tableau Site', tooltip: - 'Tableau Site. Always required for Tableau Online. Use empty string to connect with Default site on Tableau Server.', + 'The Tableau Site. Required for Tableau Online. Leave this blank to extract from the default site on Tableau Server.', type: FieldType.TEXT, fieldPath: 'source.config.site', + placeholder: 'datahub', + rules: null, +}; + +export const TABLEAU_TOKEN_NAME: RecipeField = { + name: 'tableau.token_name', + label: 'Token Name', + tooltip: + 'The name of the Personal Access Token used to extract metadata. Required if authenticating using a Personal Access Token.', + type: FieldType.TEXT, + fieldPath: 'source.config.username', + placeholder: 'access_token_name', + rules: null, +}; + +export const TABLEAU_TOKEN_VALUE: RecipeField = { + name: 'tableau.token_value', + label: 'Token Value', + tooltip: + 'The value of the Personal Access Token used to extract metadata. Required if authenticating using a Personal Access Token.', + type: FieldType.SECRET, + fieldPath: 'source.config.password', + placeholder: 'access_token_value', rules: null, }; export const TABLEAU_USERNAME: RecipeField = { name: 'tableau.username', label: 'Username', - tooltip: 'Tableau username, must be set if authenticating using username/password.', + tooltip: 'Tableau username. Only required if Token is not provided.', type: FieldType.TEXT, fieldPath: 'source.config.username', + placeholder: 'tableau', rules: null, }; export const TABLEAU_PASSWORD: RecipeField = { name: 'tableau.password', label: 'Password', - tooltip: 'Tableau password, must be set if authenticating using username/password.', + tooltip: 'Tableau password. Only required if Token is not provided.', type: FieldType.SECRET, fieldPath: 'source.config.password', + placeholder: 'password', rules: null, }; diff --git a/datahub-web-react/src/app/ingest/source/builder/RecipeForm/trino.ts b/datahub-web-react/src/app/ingest/source/builder/RecipeForm/trino.ts new file mode 100644 index 0000000000000..ed3c7ee73b819 --- /dev/null +++ b/datahub-web-react/src/app/ingest/source/builder/RecipeForm/trino.ts @@ -0,0 +1,48 @@ +import { RecipeField, FieldType } from './common'; + +export const TRINO = 'trino'; + +export const TRINO_HOST_PORT: RecipeField = { + name: 'host_port', + label: 'Host and Port', + tooltip: + "The host and port where Trino is running. For example, 'trino-server:5432'. Note: this host must be accessible on the network where DataHub is running (or allowed via an IP Allow List, AWS PrivateLink, etc).", + type: FieldType.TEXT, + fieldPath: 'source.config.host_port', + placeholder: 'trino-server:5432', + required: true, + rules: null, +}; + +export const TRINO_DATABASE: RecipeField = { + name: 'database', + label: 'Database', + tooltip: 'Ingest metadata for a specific Database.', + type: FieldType.TEXT, + fieldPath: 'source.config.database', + placeholder: 'my_db', + required: true, + rules: null, +}; + +export const TRINO_USERNAME: RecipeField = { + name: 'username', + label: 'Username', + tooltip: 'The Trino username used to extract metadata.', + type: FieldType.TEXT, + fieldPath: 'source.config.username', + placeholder: 'trino', + required: true, + rules: null, +}; + +export const TRINO_PASSWORD: RecipeField = { + name: 'password', + label: 'Password', + tooltip: 'The Trino password for the user.', + type: FieldType.SECRET, + fieldPath: 'source.config.password', + placeholder: 'password', + required: true, + rules: null, +}; diff --git a/datahub-web-react/src/app/ingest/source/builder/sources.json b/datahub-web-react/src/app/ingest/source/builder/sources.json index 10e3176b41a7f..6f6c63672fb8a 100644 --- a/datahub-web-react/src/app/ingest/source/builder/sources.json +++ b/datahub-web-react/src/app/ingest/source/builder/sources.json @@ -4,21 +4,21 @@ "name": "bigquery", "displayName": "BigQuery", "docsUrl": "https://datahubproject.io/docs/generated/ingestion/sources/bigquery/", - "recipe": "source:\n type: bigquery\n config:\n include_table_lineage: true\n include_usage_statistics: true\n credential:\n private_key_id: \n client_email: \n private_key: \n client_id: \n project_id: \n profiling:\n enabled: true\n profile_table_level_only: true\n stateful_ingestion:\n enabled: true" + "recipe": "source:\n type: bigquery\n config:\n include_table_lineage: true\n include_usage_statistics: true\n include_tables: true\n include_views: true\n profiling:\n enabled: true\n profile_table_level_only: true\n stateful_ingestion:\n enabled: true" }, { "urn": "urn:li:dataPlatform:redshift", "name": "redshift", "displayName": "Redshift", "docsUrl": "https://datahubproject.io/docs/generated/ingestion/sources/redshift/", - "recipe": "source: \n type: redshift\n config:\n # Coordinates\n host_port: # Your Redshift host and post, e.g. example.something.us-west-2.redshift.amazonaws.com:5439\n database: # Your Redshift database, e.g. SampleDatabase\n\n # Credentials\n # Add secret in Secrets Tab with relevant names for each variable\n username: \"${REDSHIFT_USERNAME}\" # Your Redshift username, e.g. admin\n password: \"${REDSHIFT_PASSWORD}\" # Your Redshift password, e.g. password_01\n\n table_lineage_mode: stl_scan_based\n include_table_lineage: true\n profiling:\n enabled: true\n stateful_ingestion:\n enabled: true" + "recipe": "source: \n type: redshift\n config:\n # Coordinates\n host_port: # Your Redshift host and post, e.g. example.something.us-west-2.redshift.amazonaws.com:5439\n database: # Your Redshift database, e.g. SampleDatabase\n\n # Credentials\n # Add secret in Secrets Tab with relevant names for each variable\n username: null # Your Redshift username, e.g. admin\n\n table_lineage_mode: stl_scan_based\n include_table_lineage: true\n include_tables: true\n include_views: true\n profiling:\n enabled: true\n profile_table_level_only: true\n stateful_ingestion:\n enabled: true" }, { "urn": "urn:li:dataPlatform:snowflake", "name": "snowflake", "displayName": "Snowflake", "docsUrl": "https://datahubproject.io/docs/generated/ingestion/sources/snowflake/", - "recipe": "source: \n type: snowflake\n config:\n account_id: \"example_id\"\n warehouse: \"example_warehouse\"\n role: \"datahub_role\"\n include_table_lineage: true\n include_view_lineage: true\n profiling:\n enabled: true\n stateful_ingestion:\n enabled: true" + "recipe": "source: \n type: snowflake\n config:\n account_id: null\n include_table_lineage: true\n include_view_lineage: true\n include_tables: true\n include_views: true\n profiling:\n enabled: true\n profile_table_level_only: true\n stateful_ingestion:\n enabled: true" }, { "urn": "urn:li:dataPlatform:kafka", @@ -32,35 +32,70 @@ "name": "looker", "displayName": "Looker", "docsUrl": "https://datahubproject.io/docs/generated/ingestion/sources/looker/", - "recipe": "source:\n type: looker\n config:\n # Coordinates\n base_url: # Your Looker instance URL, e.g. https://company.looker.com:19999\n\n # Credentials\n # Add secret in Secrets Tab with relevant names for each variable\n client_id: \"${LOOKER_CLIENT_ID}\" # Your Looker client id, e.g. admin\n client_secret: \"${LOOKER_CLIENT_SECRET}\" # Your Looker password, e.g. password_01" + "recipe": "source:\n type: looker\n config:\n # Coordinates\n base_url: # Your Looker instance URL, e.g. https://company.looker.com:19999\n\n # Credentials\n # Add secret in Secrets Tab with relevant names for each variable\n client_id: null # Your Looker client id, e.g. admin\n stateful_ingestion:\n enabled: true" }, { "urn": "urn:li:dataPlatform:lookml", "name": "lookml", "displayName": "LookML", "docsUrl": "https://datahubproject.io/docs/generated/ingestion/sources/looker/#module-lookml", - "recipe": "source:\n type: lookml\n config:\n parse_table_names_from_sql: true" + "recipe": "source:\n type: lookml\n config:\n parse_table_names_from_sql: true\n stateful_ingestion:\n enabled: true" }, { "urn": "urn:li:dataPlatform:tableau", "name": "tableau", "displayName": "Tableau", "docsUrl": "https://datahubproject.io/docs/generated/ingestion/sources/tableau/", - "recipe": "source:\n type: tableau\n config:\n # Coordinates\n connect_uri: https://prod-ca-a.online.tableau.com\n site: acryl\n projects: [\"default\", \"Project 2\"]\n\n # Credentials\n username: \"${TABLEAU_USER}\"\n password: \"${TABLEAU_PASSWORD}\"\n\n # Options\n ingest_tags: True\n ingest_owner: True\n default_schema_map:\n mydatabase: public\n anotherdatabase: anotherschema" + "recipe": "source:\n type: tableau\n config:\n # Coordinates\n connect_uri: null\n stateful_ingestion:\n enabled: true" }, { "urn": "urn:li:dataPlatform:mysql", "name": "mysql", "displayName": "MySQL", "docsUrl": "https://datahubproject.io/docs/generated/ingestion/sources/mysql/", - "recipe": "source: \n type: mysql\n config: \n # Coordinates\n host_port: # Your MySQL host and post, e.g. mysql:3306\n database: # Your MySQL database name, e.g. datahub\n \n # Credentials\n # Add secret in Secrets Tab with relevant names for each variable\n username: \"${MYSQL_USERNAME}\" # Your MySQL username, e.g. admin\n password: \"${MYSQL_PASSWORD}\" # Your MySQL password, e.g. password_01\n\n # Options\n include_tables: True\n include_views: True\n\n # Profiling\n profiling:\n enabled: false" + "recipe": "source: \n type: mysql\n config: \n # Coordinates\n host_port: # Your MySQL host and post, e.g. mysql:3306\n database: # Your MySQL database name, e.g. datahub\n \n # Credentials\n # Add secret in Secrets Tab with relevant names for each variable\n username: null # Your MySQL username, e.g. admin\n\n # Options\n include_tables: true\n include_views: true\n\n # Profiling\n profiling:\n enabled: true\n profile_table_level_only: true\n stateful_ingestion:\n enabled: true" }, { "urn": "urn:li:dataPlatform:postgres", "name": "postgres", "displayName": "Postgres", "docsUrl": "https://datahubproject.io/docs/generated/ingestion/sources/postgres/", - "recipe": "source: \n type: postgres\n config:\n # Coordinates\n host_port: # Your Postgres host and port, e.g. postgres:5432\n database: # Your Postgres Database, e.g. sample_db\n\n # Credentials\n # Add secret in Secrets Tab with relevant names for each variable\n username: \"${POSTGRES_USERNAME}\" # Your Postgres username, e.g. admin\n password: \"${POSTGRES_PASSWORD}\" # Your Postgres password, e.g. password_01\n\n # Options\n include_tables: True\n include_views: True\n\n # Profiling\n profiling:\n enabled: false\n stateful_ingestion:\n enabled: true" + "recipe": "source: \n type: postgres\n config:\n # Coordinates\n host_port: # Your Postgres host and port, e.g. postgres:5432\n database: # Your Postgres Database, e.g. sample_db\n\n # Credentials\n # Add secret in Secrets Tab with relevant names for each variable\n username: null # Your Postgres username, e.g. admin\n\n # Options\n include_tables: true\n include_views: true\n\n # Profiling\n profiling:\n enabled: true\n profile_table_level_only: true\n stateful_ingestion:\n enabled: true" + }, + { + "urn": "urn:li:dataPlatform:hive", + "name": "hive", + "displayName": "Hive", + "docsUrl": "https://datahubproject.io/docs/generated/ingestion/sources/hive/", + "recipe": "source: \n type: hive\n config:\n # Coordinates\n host_port: # Your Hive host and port, e.g. hive:10000\n database: # Your Hive database name, e.g. SampleDatabase (Optional, if not specified, ingests from all databases)\n\n # Credentials\n # Add secret in Secrets Tab with relevant names for each variable\n username: null # Your Hive username, e.g. admin\n stateful_ingestion:\n enabled: true" + }, + { + "urn": "urn:li:dataPlatform:presto", + "name": "presto", + "displayName": "Presto", + "docsUrl": "https://datahubproject.io/docs/generated/ingestion/sources/presto/", + "recipe": "source:\n type: presto\n config:\n # Coordinates\n host_port: null\n # The name of the catalog from getting the usage\n database: null\n # Credentials\n username: null\n include_views: true\n include_tables: true\n profiling:\n enabled: true\n profile_table_level_only: true\n stateful_ingestion:\n enabled: true" + }, + { + "urn": "urn:li:dataPlatform:trino", + "name": "trino", + "displayName": "Trino", + "docsUrl": "https://datahubproject.io/docs/generated/ingestion/sources/trino/", + "recipe": "source:\n type: trino\n config:\n # Coordinates\n host_port: null\n # The name of the catalog from getting the usage\n database: null\n # Credentials\n username: null\n include_views: true\n include_tables: true\n profiling:\n enabled: true\n profile_table_level_only: true\n stateful_ingestion:\n enabled: true" + }, + { + "urn": "urn:li:dataPlatform:mssql", + "name": "mssql", + "displayName": "Microsoft SQL Server", + "docsUrl": "https://datahubproject.io/docs/generated/ingestion/sources/mssql/", + "recipe": "source:\n type: mssql\n config:\n # Coordinates\n host_port: null\n # The name\n database: null\n # Credentials\n username: null\n include_views: true\n include_tables: true\n profiling:\n enabled: true\n profile_table_level_only: true\n stateful_ingestion:\n enabled: true" + }, + { + "urn": "urn:li:dataPlatform:mariadb", + "name": "mariadb", + "displayName": "MariaDB", + "docsUrl": "https://datahubproject.io/docs/generated/ingestion/sources/mariadb/", + "recipe": "source:\n type: mariadb\n config:\n # Coordinates\n host_port: null\n # The name\n database: null\n # Credentials\n username: null\n include_views: true\n include_tables: true\n profiling:\n enabled: true\n profile_table_level_only: true\n stateful_ingestion:\n enabled: true" }, { "urn": "urn:li:dataPlatform:mongodb", @@ -69,20 +104,6 @@ "docsUrl": "https://datahubproject.io/docs/generated/ingestion/sources/mongodb/", "recipe": "source:\n type: mongodb\n config:\n # Coordinates\n connect_uri: # Your MongoDB connect URI, e.g. \"mongodb://localhost\"\n\n # Credentials\n # Add secret in Secrets Tab with relevant names for each variable\n username: \"${MONGO_USERNAME}\" # Your MongoDB username, e.g. admin\n password: \"${MONGO_PASSWORD}\" # Your MongoDB password, e.g. password_01\n\n # Options (recommended)\n enableSchemaInference: True\n useRandomSampling: True\n maxSchemaSize: 300" }, - { - "urn": "urn:li:dataPlatform:azure-ad", - "name": "azure-ad", - "displayName": "Azure AD", - "docsUrl": "https://datahubproject.io/docs/generated/ingestion/sources/azure-ad/", - "recipe": "source:\n type: azure-ad\n config:\n client_id: # Your Azure Client ID, e.g. \"00000000-0000-0000-0000-000000000000\"\n tenant_id: # Your Azure Tenant ID, e.g. \"00000000-0000-0000-0000-000000000000\"\n # Add secret in Secrets Tab with this name\n client_secret: \"${AZURE_AD_CLIENT_SECRET}\"\n redirect: # Your Redirect URL, e.g. \"https://login.microsoftonline.com/common/oauth2/nativeclient\"\n authority: # Your Authority URL, e.g. \"https://login.microsoftonline.com/00000000-0000-0000-0000-000000000000\"\n token_url: # Your Token URL, e.g. \"https://login.microsoftonline.com/00000000-0000-0000-0000-000000000000/oauth2/token\"\n graph_url: # The Graph URL, e.g. \"https://graph.microsoft.com/v1.0\"\n \n # Optional flags to ingest users, groups, or both\n ingest_users: True\n ingest_groups: True\n \n # Optional Allow / Deny extraction of particular Groups\n # groups_pattern:\n # allow:\n # - \".*\"\n\n # Optional Allow / Deny extraction of particular Users.\n # users_pattern:\n # allow:\n # - \".*\"" - }, - { - "urn": "urn:li:dataPlatform:okta", - "name": "okta", - "displayName": "Okta", - "docsUrl": "https://datahubproject.io/docs/generated/ingestion/sources/okta/", - "recipe": "source:\n type: okta\n config:\n # Coordinates\n okta_domain: # Your Okta Domain, e.g. \"dev-35531955.okta.com\"\n\n # Credentials\n # Add secret in Secrets Tab with relevant names for each variable\n okta_api_token: \"${OKTA_API_TOKEN}\" # Your Okta API Token, e.g. \"11be4R_M2MzDqXawbTHfKGpKee0kuEOfX1RCQSRx99\"\n\n # Optional flags to ingest users, groups, or both\n ingest_users: True\n ingest_groups: True\n\n # Optional: Customize the mapping to DataHub Username from an attribute appearing in the Okta User\n # profile. Reference: https://developer.okta.com/docs/reference/api/users/\n # okta_profile_to_username_attr: str = \"login\"\n # okta_profile_to_username_regex: str = \"([^@]+)\"\n \n # Optional: Customize the mapping to DataHub Group from an attribute appearing in the Okta Group\n # profile. Reference: https://developer.okta.com/docs/reference/api/groups/\n # okta_profile_to_group_name_attr: str = \"name\"\n # okta_profile_to_group_name_regex: str = \"(.*)\"\n \n # Optional: Include deprovisioned or suspended Okta users in the ingestion.\n # include_deprovisioned_users = False\n # include_suspended_users = False" - }, { "urn": "urn:li:dataPlatform:glue", "name": "glue", @@ -97,13 +118,6 @@ "docsUrl": "https://datahubproject.io/docs/generated/ingestion/sources/oracle/", "recipe": "source: \n type: oracle\n config:\n # Coordinates\n host_port: # Your Oracle host and port, e.g. oracle:5432\n database: # Your Oracle database name, e.g. sample_db\n\n # Credentials\n # Add secret in Secrets Tab with relevant names for each variable\n username: \"${ORACLE_USERNAME}\" # Your Oracle username, e.g. admin\n password: \"${ORACLE_PASSWORD}\" # Your Oracle password, e.g. password_01\n\n # Optional service name\n # service_name: # Your service name, e.g. svc # omit database if using this option" }, - { - "urn": "urn:li:dataPlatform:hive", - "name": "hive", - "displayName": "Hive", - "docsUrl": "https://datahubproject.io/docs/generated/ingestion/sources/hive/", - "recipe": "source: \n type: hive\n config:\n # Coordinates\n host_port: # Your Hive host and port, e.g. hive:10000\n database: # Your Hive database name, e.g. SampleDatabase (Optional, if not specified, ingests from all databases)\n\n # Credentials\n # Add secret in Secrets Tab with relevant names for each variable\n username: \"${HIVE_USERNAME}\" # Your Hive username, e.g. admin\n password: \"${HIVE_PASSWORD}\"# Your Hive password, e.g. password_01\n stateful_ingestion:\n enabled: true" - }, { "urn": "urn:li:dataPlatform:superset", "name": "superset", @@ -118,26 +132,12 @@ "docsUrl": "https://datahubproject.io/docs/generated/ingestion/sources/athena/", "recipe": "source:\n type: athena\n config:\n # Coordinates\n aws_region: my_aws_region\n work_group: primary\n\n # Options\n s3_staging_dir: \"s3://my_staging_athena_results_bucket/results/\"" }, - { - "urn": "urn:li:dataPlatform:mssql", - "name": "mssql", - "displayName": "SQL Server", - "docsUrl": "https://datahubproject.io/docs/generated/ingestion/sources/mssql/", - "recipe": "source:\n type: mssql\n config:\n # Coordinates\n host_port: localhost:1433\n database: DemoDatabase\n\n # Credentials\n username: user\n password: pass" - }, { "urn": "urn:li:dataPlatform:clickhouse", "name": "clickhouse", "displayName": "ClickHouse", "docsUrl": "https://datahubproject.io/docs/generated/ingestion/sources/clickhouse/", - "recipe": "source:\n type: clickhouse\n config:\n # Coordinates\n host_port: localhost:9000\n\n # Credentials\n username: user\n password: pass\n\n # Options\n platform_instance: DatabaseNameToBeIngested\n\n include_views: True # whether to include views, defaults to True\n include_tables: True # whether to include views, defaults to True\n\nsink:\n # sink configs\n\n#---------------------------------------------------------------------------\n# For the HTTP interface:\n#---------------------------------------------------------------------------\nsource:\n type: clickhouse\n config:\n host_port: localhost:8443\n protocol: https\n\n#---------------------------------------------------------------------------\n# For the Native interface:\n#---------------------------------------------------------------------------\n\nsource:\n type: clickhouse\n config:\n host_port: localhost:9440\n scheme: clickhouse+native\n secure: True" - }, - { - "urn": "urn:li:dataPlatform:trino", - "name": "trino", - "displayName": "Trino", - "docsUrl": "https://datahubproject.io/docs/generated/ingestion/sources/trino/", - "recipe": "source:\n type: starburst-trino-usage\n config:\n # Coordinates\n host_port: yourtrinohost:port\n # The name of the catalog from getting the usage\n database: hive\n # Credentials\n username: trino_username\n password: trino_password\n email_domain: test.com\n audit_catalog: audit\n audit_schema: audit_schema" + "recipe": "source:\n type: clickhouse\n config:\n # Coordinates\n host_port: localhost:9000\n\n # Credentials\n username: user\n password: pass\n\n # Options\n platform_instance: DatabaseNameToBeIngested\n\n include_views: true # whether to include views, defaults to True\n include_tables: true # whether to include views, defaults to True\n\nsink:\n # sink configs\n\n#---------------------------------------------------------------------------\n# For the HTTP interface:\n#---------------------------------------------------------------------------\nsource:\n type: clickhouse\n config:\n host_port: localhost:8443\n protocol: https\n\n#---------------------------------------------------------------------------\n# For the Native interface:\n#---------------------------------------------------------------------------\n\nsource:\n type: clickhouse\n config:\n host_port: localhost:9440\n scheme: clickhouse+native\n secure: True" }, { "urn": "urn:li:dataPlatform:druid", @@ -153,13 +153,6 @@ "docsUrl": "https://datahubproject.io/docs/generated/ingestion/sources/metabase/", "recipe": "source:\n type: metabase\n config:\n # Coordinates\n connect_uri:\n\n # Credentials\n username: root\n password: example" }, - { - "urn": "urn:li:dataPlatform:mariadb", - "name": "mariadb", - "displayName": "MariaDB", - "docsUrl": "https://datahubproject.io/docs/generated/ingestion/sources/mariadb/", - "recipe": "source:\n type: mariadb\n config:\n # Coordinates\n host_port: localhost:3306\n database: dbname\n\n # Credentials\n username: root\n password: example" - }, { "urn": "urn:li:dataPlatform:powerbi", "name": "powerbi", @@ -174,6 +167,20 @@ "docsUrl": "https://datahubproject.io/docs/generated/ingestion/sources/mode/", "recipe": "source:\n type: mode\n config:\n # Coordinates\n connect_uri: http://app.mode.com\n\n # Credentials\n token: token\n password: pass\n\n # Options\n workspace: \"datahub\"\n default_schema: \"public\"\n owner_username_instead_of_email: False\n api_options:\n retry_backoff_multiplier: 2\n max_retry_interval: 10\n max_attempts: 5" }, + { + "urn": "urn:li:dataPlatform:azure-ad", + "name": "azure-ad", + "displayName": "Azure AD", + "docsUrl": "https://datahubproject.io/docs/generated/ingestion/sources/azure-ad/", + "recipe": "source:\n type: azure-ad\n config:\n client_id: # Your Azure Client ID, e.g. \"00000000-0000-0000-0000-000000000000\"\n tenant_id: # Your Azure Tenant ID, e.g. \"00000000-0000-0000-0000-000000000000\"\n # Add secret in Secrets Tab with this name\n client_secret: \"${AZURE_AD_CLIENT_SECRET}\"\n redirect: # Your Redirect URL, e.g. \"https://login.microsoftonline.com/common/oauth2/nativeclient\"\n authority: # Your Authority URL, e.g. \"https://login.microsoftonline.com/00000000-0000-0000-0000-000000000000\"\n token_url: # Your Token URL, e.g. \"https://login.microsoftonline.com/00000000-0000-0000-0000-000000000000/oauth2/token\"\n graph_url: # The Graph URL, e.g. \"https://graph.microsoft.com/v1.0\"\n \n # Optional flags to ingest users, groups, or both\n ingest_users: True\n ingest_groups: True\n \n # Optional Allow / Deny extraction of particular Groups\n # groups_pattern:\n # allow:\n # - \".*\"\n\n # Optional Allow / Deny extraction of particular Users.\n # users_pattern:\n # allow:\n # - \".*\"" + }, + { + "urn": "urn:li:dataPlatform:okta", + "name": "okta", + "displayName": "Okta", + "docsUrl": "https://datahubproject.io/docs/generated/ingestion/sources/okta/", + "recipe": "source:\n type: okta\n config:\n # Coordinates\n okta_domain: # Your Okta Domain, e.g. \"dev-35531955.okta.com\"\n\n # Credentials\n # Add secret in Secrets Tab with relevant names for each variable\n okta_api_token: \"${OKTA_API_TOKEN}\" # Your Okta API Token, e.g. \"11be4R_M2MzDqXawbTHfKGpKee0kuEOfX1RCQSRx99\"\n\n # Optional flags to ingest users, groups, or both\n ingest_users: True\n ingest_groups: True\n\n # Optional: Customize the mapping to DataHub Username from an attribute appearing in the Okta User\n # profile. Reference: https://developer.okta.com/docs/reference/api/users/\n # okta_profile_to_username_attr: str = \"login\"\n # okta_profile_to_username_regex: str = \"([^@]+)\"\n \n # Optional: Customize the mapping to DataHub Group from an attribute appearing in the Okta Group\n # profile. Reference: https://developer.okta.com/docs/reference/api/groups/\n # okta_profile_to_group_name_attr: str = \"name\"\n # okta_profile_to_group_name_regex: str = \"(.*)\"\n \n # Optional: Include deprovisioned or suspended Okta users in the ingestion.\n # include_deprovisioned_users = False\n # include_suspended_users = False" + }, { "urn": "urn:li:dataPlatform:custom", "name": "custom", diff --git a/datahub-web-react/src/app/ingest/source/utils.ts b/datahub-web-react/src/app/ingest/source/utils.ts index b2f20c93614e2..c372388e958b7 100644 --- a/datahub-web-react/src/app/ingest/source/utils.ts +++ b/datahub-web-react/src/app/ingest/source/utils.ts @@ -11,6 +11,7 @@ import { EntityType, FacetMetadata } from '../../../types.generated'; import { capitalizeFirstLetterOnly, pluralize } from '../../shared/textUtil'; import EntityRegistry from '../../entity/EntityRegistry'; import { SourceConfig } from './builder/types'; +import { ListIngestionSourcesDocument, ListIngestionSourcesQuery } from '../../../graphql/ingestion.generated'; export const getSourceConfigs = (ingestionSources: SourceConfig[], sourceType: string) => { const sourceConfigs = ingestionSources.find((source) => source.name === sourceType); @@ -171,3 +172,85 @@ export const extractEntityTypeCountsFromFacets = ( return finalCounts; }; + +/** + * Add an entry to the ListIngestionSources cache. + */ +export const addToListIngestionSourcesCache = (client, newSource, pageSize, query) => { + // Read the data from our cache for this query. + const currData: ListIngestionSourcesQuery | null = client.readQuery({ + query: ListIngestionSourcesDocument, + variables: { + input: { + start: 0, + count: pageSize, + query, + }, + }, + }); + + // Add our new source into the existing list. + const newSources = [newSource, ...(currData?.listIngestionSources?.ingestionSources || [])]; + + // Write our data back to the cache. + client.writeQuery({ + query: ListIngestionSourcesDocument, + variables: { + input: { + start: 0, + count: pageSize, + query, + }, + }, + data: { + listIngestionSources: { + start: 0, + count: (currData?.listIngestionSources?.count || 0) + 1, + total: (currData?.listIngestionSources?.total || 0) + 1, + ingestionSources: newSources, + }, + }, + }); +}; + +/** + * Remove an entry from the ListIngestionSources cache. + */ +export const removeFromListIngestionSourcesCache = (client, urn, page, pageSize, query) => { + // Read the data from our cache for this query. + const currData: ListIngestionSourcesQuery | null = client.readQuery({ + query: ListIngestionSourcesDocument, + variables: { + input: { + start: (page - 1) * pageSize, + count: pageSize, + query, + }, + }, + }); + + // Remove the source from the existing sources set. + const newSources = [ + ...(currData?.listIngestionSources?.ingestionSources || []).filter((source) => source.urn !== urn), + ]; + + // Write our data back to the cache. + client.writeQuery({ + query: ListIngestionSourcesDocument, + variables: { + input: { + start: (page - 1) * pageSize, + count: pageSize, + query, + }, + }, + data: { + listIngestionSources: { + start: currData?.listIngestionSources?.start || 0, + count: (currData?.listIngestionSources?.count || 1) - 1, + total: (currData?.listIngestionSources?.total || 1) - 1, + ingestionSources: newSources, + }, + }, + }); +}; diff --git a/datahub-web-react/src/app/onboarding/OnboardingConfig.tsx b/datahub-web-react/src/app/onboarding/OnboardingConfig.tsx new file mode 100644 index 0000000000000..28639a459f98d --- /dev/null +++ b/datahub-web-react/src/app/onboarding/OnboardingConfig.tsx @@ -0,0 +1,30 @@ +import { BusinessGlossaryOnboardingConfig } from './config/BusinessGlossaryOnboardingConfig'; +import { DomainsOnboardingConfig } from './config/DomainsOnboardingConfig'; +import { EntityProfileOnboardingConfig } from './config/EntityProfileOnboardingConfig'; +import { GroupsOnboardingConfig } from './config/GroupsOnboardingConfig'; +import { HomePageOnboardingConfig } from './config/HomePageOnboardingConfig'; +import { IngestionOnboardingConfig } from './config/IngestionOnboardingConfig'; +import { PoliciesOnboardingConfig } from './config/PoliciesOnboardingConfig'; +import { RolesOnboardingConfig } from './config/RolesOnboardingConfig'; +import { SearchOnboardingConfig } from './config/SearchOnboardingConfig'; +import { UsersOnboardingConfig } from './config/UsersOnboardingConfig'; +import { OnboardingStep } from './OnboardingStep'; + +const ALL_ONBOARDING_CONFIGS: OnboardingStep[][] = [ + HomePageOnboardingConfig, + SearchOnboardingConfig, + EntityProfileOnboardingConfig, + IngestionOnboardingConfig, + BusinessGlossaryOnboardingConfig, + DomainsOnboardingConfig, + UsersOnboardingConfig, + GroupsOnboardingConfig, + RolesOnboardingConfig, + PoliciesOnboardingConfig, +]; +export const OnboardingConfig: OnboardingStep[] = ALL_ONBOARDING_CONFIGS.reduce( + (acc, config) => [...acc, ...config], + [], +); + +export const CURRENT_ONBOARDING_IDS: string[] = OnboardingConfig.map((step) => step.id as string); diff --git a/datahub-web-react/src/app/onboarding/OnboardingStep.tsx b/datahub-web-react/src/app/onboarding/OnboardingStep.tsx new file mode 100644 index 0000000000000..9c2d83351afbe --- /dev/null +++ b/datahub-web-react/src/app/onboarding/OnboardingStep.tsx @@ -0,0 +1,9 @@ +import { ReactNode } from 'react'; + +export type OnboardingStep = { + id?: string; + title?: string | ReactNode; + content?: ReactNode; + selector?: string; + style?: any; +}; diff --git a/datahub-web-react/src/app/onboarding/OnboardingTour.tsx b/datahub-web-react/src/app/onboarding/OnboardingTour.tsx new file mode 100644 index 0000000000000..3205f837eb231 --- /dev/null +++ b/datahub-web-react/src/app/onboarding/OnboardingTour.tsx @@ -0,0 +1,63 @@ +import { Button } from 'antd'; +import React, { useContext, useEffect, useState } from 'react'; +import Tour from 'reactour'; +import { useBatchUpdateStepStatesMutation } from '../../graphql/step.generated'; +import { EducationStepsContext } from '../../providers/EducationStepsContext'; +import { StepStateResult } from '../../types.generated'; +import { useGetAuthenticatedUser } from '../useGetAuthenticatedUser'; +import { convertStepId, getStepsToRender } from './utils'; + +type Props = { + stepIds: string[]; +}; + +export const OnboardingTour = ({ stepIds }: Props) => { + const { educationSteps, setEducationSteps, educationStepIdsAllowlist } = useContext(EducationStepsContext); + const userUrn = useGetAuthenticatedUser()?.corpUser.urn; + const [isOpen, setIsOpen] = useState(true); + const [reshow, setReshow] = useState(false); + const accentColor = '#5cb7b7'; + + useEffect(() => { + function handleKeyDown(e) { + // Allow reshow if Cmnd + Ctrl + T is pressed + if (e.metaKey && e.ctrlKey && e.key === 't') { + setReshow(true); + setIsOpen(true); + } + } + document.addEventListener('keydown', handleKeyDown); + }, []); + + const steps = getStepsToRender(educationSteps, stepIds, userUrn || '', reshow); + const filteredSteps = steps.filter((step) => step.id && educationStepIdsAllowlist.has(step.id)); + const filteredStepIds: string[] = filteredSteps.map((step) => step?.id).filter((stepId) => !!stepId) as string[]; + + const [batchUpdateStepStates] = useBatchUpdateStepStatesMutation(); + + function closeTour() { + setIsOpen(false); + setReshow(false); + const convertedIds = filteredStepIds.map((id) => convertStepId(id, userUrn || '')); + const stepStates = convertedIds.map((id) => ({ id, properties: [] })); + batchUpdateStepStates({ variables: { input: { states: stepStates } } }).then(() => { + const results = convertedIds.map((id) => ({ id, properties: [{}] } as StepStateResult)); + setEducationSteps((existingSteps) => (existingSteps ? [...existingSteps, ...results] : results)); + }); + } + + if (!filteredSteps.length) return null; + + return ( + Let's go!} + /> + ); +}; diff --git a/datahub-web-react/src/app/onboarding/config/BusinessGlossaryOnboardingConfig.tsx b/datahub-web-react/src/app/onboarding/config/BusinessGlossaryOnboardingConfig.tsx new file mode 100644 index 0000000000000..6840f7e5e47c5 --- /dev/null +++ b/datahub-web-react/src/app/onboarding/config/BusinessGlossaryOnboardingConfig.tsx @@ -0,0 +1,71 @@ +import React from 'react'; +import { Typography } from 'antd'; +import { OnboardingStep } from '../OnboardingStep'; + +export const BUSINESS_GLOSSARY_INTRO_ID = 'business-glossary-intro'; +export const BUSINESS_GLOSSARY_CREATE_TERM_ID = 'business-glossary-create-term'; +export const BUSINESS_GLOSSARY_CREATE_TERM_GROUP_ID = 'business-glossary-create-term-group'; + +export const BusinessGlossaryOnboardingConfig: OnboardingStep[] = [ + { + id: BUSINESS_GLOSSARY_INTRO_ID, + title: 'Business Glossary 📖', + content: ( + +

+ Welcome to the Business Glossary! +

+

+ The Glossary is a collection of structured, standarized labels you can use to categorize data + assets. You can view and create both Terms and Term Groups here. +

+
+ ), + }, + { + id: BUSINESS_GLOSSARY_CREATE_TERM_ID, + selector: `#${BUSINESS_GLOSSARY_CREATE_TERM_ID}`, + title: 'Glossary Terms', + content: ( + +

+ Click here to create a new Term . +

+

+ Terms are words or phrases with a specific business definition assigned to them. +

+
+ ), + }, + { + id: BUSINESS_GLOSSARY_CREATE_TERM_GROUP_ID, + selector: `#${BUSINESS_GLOSSARY_CREATE_TERM_GROUP_ID}`, + title: 'Glossary Term Groups', + content: ( + +

+ Click here to create a new Term Group. +

+

+ Term Groups act as folders, containing Terms and even other Term Groups to allow + for nesting. +

+

+ For example, there could be a PII Term Group containing Terms for different types + of PII, such as Email or Phone Number. +

+

+ Learn more about the Business Glossary{' '} + + {' '} + here. + +

+
+ ), + }, +]; diff --git a/datahub-web-react/src/app/onboarding/config/DomainsOnboardingConfig.tsx b/datahub-web-react/src/app/onboarding/config/DomainsOnboardingConfig.tsx new file mode 100644 index 0000000000000..2a1c7003c4a5c --- /dev/null +++ b/datahub-web-react/src/app/onboarding/config/DomainsOnboardingConfig.tsx @@ -0,0 +1,43 @@ +import React from 'react'; +import { Typography } from 'antd'; +import { OnboardingStep } from '../OnboardingStep'; + +export const DOMAINS_INTRO_ID = 'domains-intro'; +export const DOMAINS_CREATE_DOMAIN_ID = 'domains-create-domain'; + +export const DomainsOnboardingConfig: OnboardingStep[] = [ + { + id: DOMAINS_INTRO_ID, + title: 'Domains', + content: ( + +

+ Welcome to DataHub Domains! +

+

+ Domains are collections of related data assets associated with a specific part of + your organization, such as the Marketing department. +

+

+ Learn more about Domains{' '} + + {' '} + here. + +

+
+ ), + }, + { + id: DOMAINS_CREATE_DOMAIN_ID, + selector: `#${DOMAINS_CREATE_DOMAIN_ID}`, + title: 'Create a new Domain', + content: ( + +

+ Click here to create a new Domain. +

+
+ ), + }, +]; diff --git a/datahub-web-react/src/app/onboarding/config/EntityProfileOnboardingConfig.tsx b/datahub-web-react/src/app/onboarding/config/EntityProfileOnboardingConfig.tsx new file mode 100644 index 0000000000000..a5e627705fffd --- /dev/null +++ b/datahub-web-react/src/app/onboarding/config/EntityProfileOnboardingConfig.tsx @@ -0,0 +1,188 @@ +import React from 'react'; +import { Typography } from 'antd'; +import { OnboardingStep } from '../OnboardingStep'; + +// Entity profile tabs. Note that the 'rc-tab' prefix for the ID is added by the antd library and may change in the future. +export const ENTITY_PROFILE_ENTITIES_ID = 'entity-profile-entities'; +export const ENTITY_PROFILE_PROPERTIES_ID = 'entity-profile-properties'; +export const ENTITY_PROFILE_DOCUMENTATION_ID = 'entity-profile-documentation'; +export const ENTITY_PROFILE_LINEAGE_ID = 'entity-profile-lineage'; +export const ENTITY_PROFILE_SCHEMA_ID = 'entity-profile-schema'; + +// Entity profile sidebar +export const ENTITY_PROFILE_OWNERS_ID = 'entity-profile-owners'; +export const ENTITY_PROFILE_TAGS_ID = 'entity-profile-tags'; +export const ENTITY_PROFILE_GLOSSARY_TERMS_ID = 'entity-profile-glossary-terms'; +export const ENTITY_PROFILE_DOMAINS_ID = 'entity-profile-domains'; + +export const EntityProfileOnboardingConfig: OnboardingStep[] = [ + { + id: ENTITY_PROFILE_ENTITIES_ID, + selector: `[id^='rc-tabs'][id$='Entities']`, + title: 'Entities Tab', + content: ( + +

+ You can view the Entities that belong to a Container on this tab. +

+
+ ), + }, + { + id: ENTITY_PROFILE_PROPERTIES_ID, + selector: `[id^='rc-tabs'][id$='Properties']`, + title: 'Properties Tab', + content: ( + +

+ You can view and edit an entity's key-value Properties on this tab. These are + sourced from the original Data Platform. +

+

+ If this tab is disabled, Properties have not been ingested for this entity. +

+
+ ), + }, + { + id: ENTITY_PROFILE_DOCUMENTATION_ID, + selector: `[id^='rc-tabs'][id$='Documentation']`, + title: 'Documentation Tab', + content: ( + +

+ You can view and edit an entity's Documentation on this tab. +

+

+ Documentation should provide descriptive information about this data asset. It can + also contain links to external resources. +

+
+ ), + }, + { + id: ENTITY_PROFILE_LINEAGE_ID, + selector: `[id^='rc-tabs'][id$='Lineage']`, + title: 'Lineage Tab', + content: ( + +

+ You can view an entity's Lineage on this tab. +

+

+ Data Lineage allows you to visualize and understand both the upstream dependencies + and downstream consumers of this entity. +

+

+ If this tab is disabled, Lineage have not been ingested for this entity. +

+
+ ), + }, + { + id: ENTITY_PROFILE_SCHEMA_ID, + selector: `[id^='rc-tabs'][id$='Schema']`, + title: 'Schema Tab', + content: ( + +

+ You can view a Dataset's Schema on this tab. +

+

+ You can also view or add Documentation, Tags, and{' '} + Glossary Terms for specific columns. +

+
+ ), + }, + { + id: ENTITY_PROFILE_OWNERS_ID, + selector: `#${ENTITY_PROFILE_OWNERS_ID}`, + title: 'Owners', + content: ( + +

+ You can view and add Owners to this asset here. +

+

+ Owners are Users or Groups who are responsible + for managing this asset. +

+
+ ), + }, + { + id: ENTITY_PROFILE_TAGS_ID, + selector: `#${ENTITY_PROFILE_TAGS_ID}`, + title: 'Tags', + content: ( + +

+ You can view and add Tags to this asset here. +

+

+ Tags are labels for organizing your data. For example, you can add a Tag marking an + asset as Deprecated. +

+

+ Learn more about Tags{' '} + + {' '} + here.{' '} + +

+
+ ), + }, + { + id: ENTITY_PROFILE_GLOSSARY_TERMS_ID, + selector: `#${ENTITY_PROFILE_GLOSSARY_TERMS_ID}`, + title: 'Glossary Terms', + content: ( + +

+ You can view and add Glossary Terms to this asset here. +

+

+ Glossary Terms are structured, standarized labels for organizing your + mission-critical data. For example, if you're marking assets containing PII fields, you might + add the Term Email. +

+

+ Learn more about Glossary Terms{' '} + + {' '} + here. + +

+
+ ), + }, + { + id: ENTITY_PROFILE_DOMAINS_ID, + selector: `#${ENTITY_PROFILE_DOMAINS_ID}`, + title: 'Domain', + content: ( + +

+ You can view and set this asset's Domain here. +

+

+ Domains are collections of related data assets associated with a specific part of + your organization, such as the Marketing department. +

+

+ Learn more about Domains{' '} + + {' '} + here. + +

+
+ ), + }, +]; diff --git a/datahub-web-react/src/app/onboarding/config/GroupsOnboardingConfig.tsx b/datahub-web-react/src/app/onboarding/config/GroupsOnboardingConfig.tsx new file mode 100644 index 0000000000000..c9dcee7f50649 --- /dev/null +++ b/datahub-web-react/src/app/onboarding/config/GroupsOnboardingConfig.tsx @@ -0,0 +1,51 @@ +import React from 'react'; +import { Typography } from 'antd'; +import { OnboardingStep } from '../OnboardingStep'; + +export const GROUPS_INTRO_ID = 'groups-intro'; +export const GROUPS_CREATE_GROUP_ID = 'groups-create-group'; + +export const GroupsOnboardingConfig: OnboardingStep[] = [ + { + id: GROUPS_INTRO_ID, + title: 'Groups', + content: ( + +

+ Welcome to Datahub Groups! +

+

+ Groups are collections of users which can be used to assign ownership to assets and + manage access. +

+

+ Groups can be created natively within DataHub, or synced from your Identity + Provider. +

+

+ Learn more about Groups{' '} + + {' '} + here. + +

+
+ ), + }, + { + id: GROUPS_CREATE_GROUP_ID, + selector: `#${GROUPS_CREATE_GROUP_ID}`, + title: 'Create a new Group', + content: ( + +

+ Click here to create a new Group. +

+
+ ), + }, +]; diff --git a/datahub-web-react/src/app/onboarding/config/HomePageOnboardingConfig.tsx b/datahub-web-react/src/app/onboarding/config/HomePageOnboardingConfig.tsx new file mode 100644 index 0000000000000..28a0465a1b2f7 --- /dev/null +++ b/datahub-web-react/src/app/onboarding/config/HomePageOnboardingConfig.tsx @@ -0,0 +1,124 @@ +import React from 'react'; +import { Image, Typography } from 'antd'; +import { OnboardingStep } from '../OnboardingStep'; +import { ANTD_GRAY } from '../../entity/shared/constants'; + +export const GLOBAL_WELCOME_TO_DATAHUB_ID = 'global-welcome-to-datahub'; +export const HOME_PAGE_INGESTION_ID = 'home-page-ingestion'; +export const HOME_PAGE_DOMAINS_ID = 'home-page-domains'; +export const HOME_PAGE_PLATFORMS_ID = 'home-page-platforms'; +export const HOME_PAGE_MOST_POPULAR_ID = 'home-page-most-popular'; +export const HOME_PAGE_SEARCH_BAR_ID = 'home-page-search-bar'; + +export const HomePageOnboardingConfig: OnboardingStep[] = [ + { + id: GLOBAL_WELCOME_TO_DATAHUB_ID, + content: ( +
+ + Welcome to DataHub! 👋 + + DataHub helps you discover and organize the important data within your + organization. You can: + + +
    +
  • + Quickly search for Datasets, Dashboards, Data Pipelines, and more +
  • +
  • + View and understand the full end-to-end Lineage of how data is created, + transformed, and consumed +
  • +
  • + Gain insights about how others within your organization are using data +
  • +
  • + Define ownership and capture knowledge to empower others +
  • +
+

Let's get started! 🚀

+
+ 💡 + + Press Cmd + Ctrl + T to open up this tutorial at any time. + +
+
+
+ ), + style: { minWidth: '650px' }, + }, + { + id: HOME_PAGE_INGESTION_ID, + selector: `#${HOME_PAGE_INGESTION_ID}`, + title: 'Ingest Data', + content: ( + + Start integrating your data sources immediately by navigating to the Ingestion page. + + ), + }, + { + id: HOME_PAGE_DOMAINS_ID, + selector: `#${HOME_PAGE_DOMAINS_ID}`, + title: 'Explore by Domain', + content: ( + + Here are your organization's Domains. Domains are collections of data assets - + such as Tables, Dashboards, and ML Models - that make it easy to discover information relevant to a + particular part of your organization. + + ), + }, + { + id: HOME_PAGE_PLATFORMS_ID, + selector: `#${HOME_PAGE_PLATFORMS_ID}`, + title: 'Explore by Platform', + content: ( + + Here are your organization's Data Platforms. Data Platforms represent specific + third-party Data Systems or Tools. Examples include Data Warehouses like Snowflake, + Orchestrators like + Airflow, and Dashboarding tools like Looker. + + ), + }, + { + id: HOME_PAGE_MOST_POPULAR_ID, + selector: `#${HOME_PAGE_MOST_POPULAR_ID}`, + title: 'Explore Most Popular', + content: "Here you'll find the assets that are viewed most frequently within your organization.", + }, + { + id: HOME_PAGE_SEARCH_BAR_ID, + selector: `#${HOME_PAGE_SEARCH_BAR_ID}`, + title: 'Find your Data 🔍', + content: ( + +

+ This is the Search Bar. It will serve as your launch point for discovering and + collaborating around the data most important to you. +

+

+ Not sure where to start? Click on Explore All! +

+
+ ), + }, +]; diff --git a/datahub-web-react/src/app/onboarding/config/IngestionOnboardingConfig.tsx b/datahub-web-react/src/app/onboarding/config/IngestionOnboardingConfig.tsx new file mode 100644 index 0000000000000..690917095f615 --- /dev/null +++ b/datahub-web-react/src/app/onboarding/config/IngestionOnboardingConfig.tsx @@ -0,0 +1,45 @@ +import React from 'react'; +import { Typography } from 'antd'; +import { OnboardingStep } from '../OnboardingStep'; + +export const INGESTION_CREATE_SOURCE_ID = 'ingestion-create-source'; +export const INGESTION_REFRESH_SOURCES_ID = 'ingestion-refresh-sources'; + +export const IngestionOnboardingConfig: OnboardingStep[] = [ + { + id: INGESTION_CREATE_SOURCE_ID, + selector: `#${INGESTION_CREATE_SOURCE_ID}`, + title: 'Create a new Ingestion Source', + content: ( + +

+ Click here to configure new Integrations from DataHub to your Data Platforms, + including Transactional Databases like MySQL, Data Warehouses like{' '} + Snowflake, Dashboarding tools like Looker, and many more! +

+

+ Learn more about ingestion and view the full list of supported Integrations{' '} + + {' '} + here. + +

+
+ ), + }, + { + id: INGESTION_REFRESH_SOURCES_ID, + selector: `#${INGESTION_REFRESH_SOURCES_ID}`, + title: 'Refresh Ingestion Pipelines', + content: ( + +

Click here to refresh and check whether new ingestion pipelines have been set up.

+

You can view both pipelines created on this page and those set up using the DataHub CLI.

+
+ ), + }, +]; diff --git a/datahub-web-react/src/app/onboarding/config/PoliciesOnboardingConfig.tsx b/datahub-web-react/src/app/onboarding/config/PoliciesOnboardingConfig.tsx new file mode 100644 index 0000000000000..2d8062cd2ad21 --- /dev/null +++ b/datahub-web-react/src/app/onboarding/config/PoliciesOnboardingConfig.tsx @@ -0,0 +1,50 @@ +import React from 'react'; +import { Typography } from 'antd'; +import { OnboardingStep } from '../OnboardingStep'; + +export const POLICIES_INTRO_ID = 'policies-intro'; +export const POLICIES_CREATE_POLICY_ID = 'policies-create-policy'; + +export const PoliciesOnboardingConfig: OnboardingStep[] = [ + { + id: POLICIES_INTRO_ID, + title: 'Policies', + content: ( + +

+ Welcome to DataHub Policies! +

+

+ If you need fine-grained access controls, Policies will do the trick. +

+

+ For most users, Roles are the recommended way to manage permissions on DataHub. If + Roles do not fit your use case, then Policies can be used. +

+

+ Learn more about Policies{' '} + + {' '} + here. + +

+
+ ), + }, + { + id: POLICIES_CREATE_POLICY_ID, + selector: `#${POLICIES_CREATE_POLICY_ID}`, + title: 'Create a new Policy', + content: ( + +

+ Click here to create a new Policy. +

+
+ ), + }, +]; diff --git a/datahub-web-react/src/app/onboarding/config/RolesOnboardingConfig.tsx b/datahub-web-react/src/app/onboarding/config/RolesOnboardingConfig.tsx new file mode 100644 index 0000000000000..cd83cb8da6913 --- /dev/null +++ b/datahub-web-react/src/app/onboarding/config/RolesOnboardingConfig.tsx @@ -0,0 +1,37 @@ +import React from 'react'; +import { Typography } from 'antd'; +import { OnboardingStep } from '../OnboardingStep'; + +export const ROLES_INTRO_ID = 'roles-intro'; + +export const RolesOnboardingConfig: OnboardingStep[] = [ + { + id: ROLES_INTRO_ID, + title: 'Roles', + content: ( + +

+ Welcome to DataHub Roles! +

+

+ Roles are the recommended way to manage permissions on DataHub. +

+

+ DataHub currently supports three out-of-the-box Roles: Admin,{' '} + Editor and Reader. +

+

+ Learn more about Roles and the different permissions for each Role{' '} + + {' '} + here. + +

+
+ ), + }, +]; diff --git a/datahub-web-react/src/app/onboarding/config/SearchOnboardingConfig.tsx b/datahub-web-react/src/app/onboarding/config/SearchOnboardingConfig.tsx new file mode 100644 index 0000000000000..2497a967d3a51 --- /dev/null +++ b/datahub-web-react/src/app/onboarding/config/SearchOnboardingConfig.tsx @@ -0,0 +1,30 @@ +import React from 'react'; +import { Typography } from 'antd'; +import { OnboardingStep } from '../OnboardingStep'; + +export const SEARCH_RESULTS_FILTERS_ID = 'search-results-filters'; +export const SEARCH_RESULTS_ADVANCED_SEARCH_ID = 'search-results-advanced-search'; + +export const SearchOnboardingConfig: OnboardingStep[] = [ + { + id: SEARCH_RESULTS_FILTERS_ID, + selector: `#${SEARCH_RESULTS_FILTERS_ID}`, + title: 'Narrow your search ⚡', + content: ( + + Quickly find relevant assets by applying one or more filters. Try filtering by{' '} + Entity Type, Owner, and more! + + ), + }, + { + id: SEARCH_RESULTS_ADVANCED_SEARCH_ID, + selector: `#${SEARCH_RESULTS_ADVANCED_SEARCH_ID}`, + title: 'Dive Deeper with Advanced Search 💪', + content: ( + + Use Advanced Search to find specific assets using granular filter predicates. + + ), + }, +]; diff --git a/datahub-web-react/src/app/onboarding/config/UsersOnboardingConfig.tsx b/datahub-web-react/src/app/onboarding/config/UsersOnboardingConfig.tsx new file mode 100644 index 0000000000000..d2ff34f47735e --- /dev/null +++ b/datahub-web-react/src/app/onboarding/config/UsersOnboardingConfig.tsx @@ -0,0 +1,95 @@ +import React from 'react'; +import { Typography } from 'antd'; +import { OnboardingStep } from '../OnboardingStep'; + +export const USERS_INTRO_ID = 'users-intro'; +export const USERS_SSO_ID = 'users-sso'; +export const USERS_INVITE_LINK_ID = 'users-invite-link'; +export const USERS_ASSIGN_ROLE_ID = 'users-assign-role'; + +export const UsersOnboardingConfig: OnboardingStep[] = [ + { + id: USERS_INTRO_ID, + title: 'Users', + content: ( + +

+ Welcome to DataHub Users! +

+

+ There are a few different ways to onboard new Users onto DataHub. +

+
+ ), + }, + { + id: USERS_SSO_ID, + title: 'Configuring Single Sign-On (SSO)', + content: ( + +

+ The preferred way to onboard new Users is to use Single Sign-On. + Currently, DataHub supports OIDC SSO. +

+

+ Learn more about how to configure Single Sign-On{' '} + + {' '} + here. + +

+
+ ), + }, + { + id: USERS_INVITE_LINK_ID, + selector: `#${USERS_INVITE_LINK_ID}`, + title: 'Invite New Users', + content: ( + +

+ Easily share an invite link with your colleagues to onboard them onto DataHub. Optionally assign a{' '} + Role to anyone who joins using the link. +

+

+ Learn more about configuring invite links{' '} + + {' '} + here. + +

+
+ ), + }, + { + id: USERS_ASSIGN_ROLE_ID, + selector: `#${USERS_ASSIGN_ROLE_ID}`, + title: 'Assigning Roles', + content: ( + +

+ You can assign Roles to existing Users here. +

+

+ Learn more about Roles{' '} + + {' '} + here. + +

+
+ ), + }, +]; diff --git a/datahub-web-react/src/app/onboarding/useUpdateEducationStepIdsAllowlist.tsx b/datahub-web-react/src/app/onboarding/useUpdateEducationStepIdsAllowlist.tsx new file mode 100644 index 0000000000000..4eb1f6c02b6b8 --- /dev/null +++ b/datahub-web-react/src/app/onboarding/useUpdateEducationStepIdsAllowlist.tsx @@ -0,0 +1,20 @@ +import { useContext, useEffect } from 'react'; +import { EducationStepsContext } from '../../providers/EducationStepsContext'; + +export function useUpdateEducationStepIdsAllowlist(condition: boolean, id: string) { + const { educationStepIdsAllowlist, setEducationStepIdsAllowlist } = useContext(EducationStepsContext); + + useEffect(() => { + const allowlistIncludesStepId = educationStepIdsAllowlist.has(id); + + if (condition && !allowlistIncludesStepId) { + const newStepdIdsAllowlist: Set = new Set(educationStepIdsAllowlist); + newStepdIdsAllowlist.add(id); + setEducationStepIdsAllowlist(newStepdIdsAllowlist); + } else if (!condition && allowlistIncludesStepId) { + const newStepdIdsAllowlist: Set = new Set(educationStepIdsAllowlist); + newStepdIdsAllowlist.delete(id); + setEducationStepIdsAllowlist(newStepdIdsAllowlist); + } + }, [condition, id, educationStepIdsAllowlist, setEducationStepIdsAllowlist]); +} diff --git a/datahub-web-react/src/app/onboarding/utils.tsx b/datahub-web-react/src/app/onboarding/utils.tsx new file mode 100644 index 0000000000000..202760bc1ba11 --- /dev/null +++ b/datahub-web-react/src/app/onboarding/utils.tsx @@ -0,0 +1,49 @@ +import { Typography } from 'antd'; +import React from 'react'; +import styled from 'styled-components'; +import { StepStateResult } from '../../types.generated'; +import { OnboardingConfig } from './OnboardingConfig'; +import { OnboardingStep } from './OnboardingStep'; + +export function convertStepId(stepId: string, userUrn: string) { + const step = OnboardingConfig.find((configStep) => configStep.id === stepId); + return `${userUrn}-${step?.id}`; +} + +// used to get all of the steps on our initial fetch +export function getStepIds(userUrn: string) { + return OnboardingConfig.map((step) => `${userUrn}-${step.id}`); +} + +const StepTitle = styled(Typography.Title)` + margin-botton: 5px; +`; + +export function getStepsToRender( + educationSteps: StepStateResult[] | null, + stepIds: string[], + userUrn: string, + reshow: boolean, +): OnboardingStep[] { + if (!educationSteps) return []; + const filteredStepIds: string[] = reshow + ? stepIds + : stepIds.filter((stepId) => { + const convertedStepId = convertStepId(stepId, userUrn); + // if we don't have this step in our educationSteps from GMS we haven't seen it yet + return !educationSteps.find((step) => step.id === convertedStepId); + }); + + return filteredStepIds + .map((stepId) => OnboardingConfig.find((step: OnboardingStep) => step.id === stepId)) + .filter((step) => !!step) + .map((step) => ({ + ...step, + content: ( +
+ {step?.title} +
{step?.content}
+
+ ), + })); +} diff --git a/datahub-web-react/src/app/permissions/policy/ManagePolicies.tsx b/datahub-web-react/src/app/permissions/policy/ManagePolicies.tsx index 301a64a715f79..e38cde0e5f99f 100644 --- a/datahub-web-react/src/app/permissions/policy/ManagePolicies.tsx +++ b/datahub-web-react/src/app/permissions/policy/ManagePolicies.tsx @@ -35,6 +35,8 @@ import { ANTD_GRAY } from '../../entity/shared/constants'; import { SearchBar } from '../../search/SearchBar'; import { scrollToTop } from '../../shared/searchUtils'; import analytics, { EventType } from '../../analytics'; +import { POLICIES_CREATE_POLICY_ID, POLICIES_INTRO_ID } from '../../onboarding/config/PoliciesOnboardingConfig'; +import { OnboardingTour } from '../../onboarding/OnboardingTour'; const SourceContainer = styled.div``; @@ -442,6 +444,7 @@ export const ManagePolicies = () => { return ( + {policiesLoading && !policiesData && ( )} @@ -450,7 +453,12 @@ export const ManagePolicies = () => {
-
diff --git a/datahub-web-react/src/app/permissions/roles/ManageRoles.tsx b/datahub-web-react/src/app/permissions/roles/ManageRoles.tsx index 8fbc932abb14c..6d8f92dfb7471 100644 --- a/datahub-web-react/src/app/permissions/roles/ManageRoles.tsx +++ b/datahub-web-react/src/app/permissions/roles/ManageRoles.tsx @@ -17,6 +17,8 @@ import { CorpUser, DataHubRole, DataHubPolicy } from '../../../types.generated'; import RoleDetailsModal from './RoleDetailsModal'; import analytics, { EventType } from '../../analytics'; import { ANTD_GRAY } from '../../entity/shared/constants'; +import { OnboardingTour } from '../../onboarding/OnboardingTour'; +import { ROLES_INTRO_ID } from '../../onboarding/config/RolesOnboardingConfig'; const SourceContainer = styled.div``; @@ -212,6 +214,7 @@ export const ManageRoles = () => { return ( + {rolesLoading && !rolesData && ( )} diff --git a/datahub-web-react/src/app/search/SearchBar.tsx b/datahub-web-react/src/app/search/SearchBar.tsx index 135696b88c714..c5b686de3ddc6 100644 --- a/datahub-web-react/src/app/search/SearchBar.tsx +++ b/datahub-web-react/src/app/search/SearchBar.tsx @@ -1,7 +1,7 @@ import React, { useEffect, useMemo, useState, useRef } from 'react'; import { Input, AutoComplete, Image, Typography } from 'antd'; import { SearchOutlined } from '@ant-design/icons'; -import styled from 'styled-components'; +import styled from 'styled-components/macro'; import { useHistory } from 'react-router'; import { AutoCompleteResultForEntity, CorpUser, Entity, EntityType, ScenarioType, Tag } from '../../types.generated'; import { IconStyleType } from '../entity/Entity'; diff --git a/datahub-web-react/src/app/search/SearchFiltersSection.tsx b/datahub-web-react/src/app/search/SearchFiltersSection.tsx index cca78ae2ae492..b315e55d7ebba 100644 --- a/datahub-web-react/src/app/search/SearchFiltersSection.tsx +++ b/datahub-web-react/src/app/search/SearchFiltersSection.tsx @@ -6,6 +6,7 @@ import { UnionType } from './utils/constants'; import { hasAdvancedFilters } from './utils/hasAdvancedFilters'; import { AdvancedSearchFilters } from './AdvancedSearchFilters'; import { SimpleSearchFilters } from './SimpleSearchFilters'; +import { SEARCH_RESULTS_ADVANCED_SEARCH_ID } from '../onboarding/config/SearchOnboardingConfig'; type Props = { filters?: Array | null; @@ -72,6 +73,7 @@ export const SearchFiltersSection = ({ disabled={onlyShowAdvancedFilters} type="link" onClick={() => setSeeAdvancedFilters(!seeAdvancedFilters)} + id={SEARCH_RESULTS_ADVANCED_SEARCH_ID} > {seeAdvancedFilters ? 'Basic' : 'Advanced'} diff --git a/datahub-web-react/src/app/search/SearchPage.tsx b/datahub-web-react/src/app/search/SearchPage.tsx index 1f0f112ba8f86..ed9377af97f9d 100644 --- a/datahub-web-react/src/app/search/SearchPage.tsx +++ b/datahub-web-react/src/app/search/SearchPage.tsx @@ -14,6 +14,11 @@ import { GetSearchResultsParams } from '../entity/shared/components/styled/searc import { EntityAndType } from '../entity/shared/types'; import { scrollToTop } from '../shared/searchUtils'; import { generateOrFilters } from './utils/generateOrFilters'; +import { OnboardingTour } from '../onboarding/OnboardingTour'; +import { + SEARCH_RESULTS_ADVANCED_SEARCH_ID, + SEARCH_RESULTS_FILTERS_ID, +} from '../onboarding/config/SearchOnboardingConfig'; type SearchPageParams = { type?: string; @@ -147,6 +152,7 @@ export const SearchPage = () => { return ( <> + {!loading && } >> + id="search-result-list" dataSource={searchResults} split={false} locale={{ diff --git a/datahub-web-react/src/app/search/SearchResults.tsx b/datahub-web-react/src/app/search/SearchResults.tsx index 83b6cf6f56c36..3eaee3a7620ef 100644 --- a/datahub-web-react/src/app/search/SearchResults.tsx +++ b/datahub-web-react/src/app/search/SearchResults.tsx @@ -25,6 +25,7 @@ import { ErrorSection } from '../shared/error/ErrorSection'; import { UnionType } from './utils/constants'; import { SearchFiltersSection } from './SearchFiltersSection'; import { generateOrFilters } from './utils/generateOrFilters'; +import { SEARCH_RESULTS_FILTERS_ID } from '../onboarding/config/SearchOnboardingConfig'; const SearchBody = styled.div` display: flex; @@ -140,14 +141,16 @@ export const SearchResults = ({ {loading && }
- +
+ +
<> @@ -194,7 +197,7 @@ export const SearchResults = ({ selectedEntities={selectedEntities} setSelectedEntities={setSelectedEntities} /> - + {showAnalytics && ( @@ -92,7 +96,7 @@ export function HeaderLinks(props: Props) { {showIngestion && ( -
- -Read on if you are interested in ingesting Snowflake metadata using the **datahub** cli, or want to learn about all the configuration parameters that are supported by the connectors. \ No newline at end of file +Read on if you are interested in ingesting Snowflake metadata using the **datahub** cli, or want to learn about all the configuration parameters that are supported by the connectors. diff --git a/metadata-ingestion/docs/sources/snowflake/snowflake-legacy_pre.md b/metadata-ingestion/docs/sources/snowflake/snowflake-legacy_pre.md deleted file mode 100644 index a2190b7c1ba50..0000000000000 --- a/metadata-ingestion/docs/sources/snowflake/snowflake-legacy_pre.md +++ /dev/null @@ -1,56 +0,0 @@ -### Prerequisites - -In order to execute this source, your Snowflake user will need to have specific privileges granted to it for reading metadata -from your warehouse. - -You can use the `provision_role` block in the recipe to grant the requires roles. - -If your system admins prefer running the commands themselves then they can follow this guide to create a DataHub-specific role, assign it the required privileges, and assign it to a new DataHub user by executing the following Snowflake commands from a user with the `ACCOUNTADMIN` role or `MANAGE GRANTS` privilege. - -```sql -create or replace role datahub_role; - -// Grant access to a warehouse to run queries to view metadata -grant operate, usage on warehouse "" to role datahub_role; - -// Grant access to view database and schema in which your tables/views exist -grant usage on DATABASE "" to role datahub_role; -grant usage on all schemas in database "" to role datahub_role; -grant usage on future schemas in database "" to role datahub_role; - -// If you are NOT using Snowflake Profiling feature: Grant references privileges to your tables and views -grant references on all tables in database "" to role datahub_role; -grant references on future tables in database "" to role datahub_role; -grant references on all external tables in database "" to role datahub_role; -grant references on future external tables in database "" to role datahub_role; -grant references on all views in database "" to role datahub_role; -grant references on future views in database "" to role datahub_role; - -// If you ARE using Snowflake Profiling feature: Grant select privileges to your tables and views -grant select on all tables in database "" to role datahub_role; -grant select on future tables in database "" to role datahub_role; -grant select on all external tables in database "" to role datahub_role; -grant select on future external tables in database "" to role datahub_role; -grant select on all views in database "" to role datahub_role; -grant select on future views in database "" to role datahub_role; - -// Create a new DataHub user and assign the DataHub role to it -create user datahub_user display_name = 'DataHub' password='' default_role = datahub_role default_warehouse = ''; - -// Grant the datahub_role to the new DataHub user. -grant role datahub_role to user datahub_user; -``` - -The details of each granted privilege can be viewed in [snowflake docs](https://docs.snowflake.com/en/user-guide/security-access-control-privileges.html). A summarization of each privilege, and why it is required for this connector: -- `operate` is required on warehouse to execute queries -- `usage` is required for us to run queries using the warehouse -- `usage` on `database` and `schema` are required because without it tables and views inside them are not accessible. If an admin does the required grants on `table` but misses the grants on `schema` or the `database` in which the table/view exists then we will not be able to get metadata for the table/view. -- If metadata is required only on some schemas then you can grant the usage privilieges only on a particular schema like -```sql -grant usage on schema ""."" to role datahub_role; -``` -- To get the lineage and usage data we need access to the default `snowflake` database - -This represents the bare minimum privileges required to extract databases, schemas, views, tables from Snowflake. - -If you plan to enable extraction of table lineage, via the `include_table_lineage` config flag, you'll need to grant additional privileges. See [snowflake usage prerequisites](#prerequisites-1) as the same privilege is required for this purpose too. diff --git a/metadata-ingestion/docs/sources/snowflake/snowflake-legacy_recipe.yml b/metadata-ingestion/docs/sources/snowflake/snowflake-legacy_recipe.yml deleted file mode 100644 index 190d7cd6d3762..0000000000000 --- a/metadata-ingestion/docs/sources/snowflake/snowflake-legacy_recipe.yml +++ /dev/null @@ -1,53 +0,0 @@ -source: - type: snowflake-legacy - config: - - check_role_grants: True - provision_role: # Optional - enabled: false - dry_run: true - run_ingestion: false - admin_username: "${SNOWFLAKE_ADMIN_USER}" - admin_password: "${SNOWFLAKE_ADMIN_PASS}" - - # This option is recommended to be used for the first time to ingest all lineage - ignore_start_time_lineage: true - # This is an alternative option to specify the start_time for lineage - # if you don't want to look back since beginning - start_time: '2022-03-01T00:00:00Z' - - # Coordinates - account_id: "abc48144" - warehouse: "COMPUTE_WH" - - # Credentials - username: "${SNOWFLAKE_USER}" - password: "${SNOWFLAKE_PASS}" - role: "datahub_role" - - # Change these as per your database names. Remove to get all databases - database_pattern: - allow: - - "^ACCOUNTING_DB$" - - "^MARKETING_DB$" - schema_pattern: - deny: - - "information_schema.*" - table_pattern: - allow: - # If you want to ingest only few tables with name revenue and sales - - ".*revenue" - - ".*sales" - - profiling: - # Change to false to disable profiling - enabled: true - profile_pattern: - allow: - - 'ACCOUNTING_DB.*.*' - - 'MARKETING_DB.*.*' - deny: - - '.*information_schema.*' - -# Default sink is datahub-rest and doesn't need to be configured -# See https://datahubproject.io/docs/metadata-ingestion/sink_docs/datahub for customization options diff --git a/metadata-ingestion/docs/sources/snowflake/snowflake-usage-legacy_pre.md b/metadata-ingestion/docs/sources/snowflake/snowflake-usage-legacy_pre.md deleted file mode 100644 index a5082273bf881..0000000000000 --- a/metadata-ingestion/docs/sources/snowflake/snowflake-usage-legacy_pre.md +++ /dev/null @@ -1,26 +0,0 @@ -### Prerequisites - -In order to execute the `snowflake-usage-legacy` source, your Snowflake user will need to have specific privileges granted to it. Specifically, you'll need to grant access to the [Account Usage](https://docs.snowflake.com/en/sql-reference/account-usage.html) system tables, using which the DataHub source extracts information. Assuming you've followed the steps outlined in `snowflake` plugin to create a DataHub-specific User & Role, you'll simply need to execute the following commands in Snowflake. This will require a user with the `ACCOUNTADMIN` role (or a role granted the IMPORT SHARES global privilege). Please see [Snowflake docs for more details](https://docs.snowflake.com/en/user-guide/data-share-consumers.html). - -```sql -grant imported privileges on database snowflake to role datahub_role; -``` - -### Capabilities - -This plugin extracts the following: - -- Statistics on queries issued and tables and columns accessed (excludes views) -- Aggregation of these statistics into buckets, by day or hour granularity - - -:::note - -This source only does usage statistics. To get the tables, views, and schemas in your Snowflake warehouse, ingest using the `snowflake-legacy` source described above. - -::: - -### Caveats -- Some of the features are only available in the Snowflake Enterprise Edition. This docs has notes mentioning where this applies. -- The underlying Snowflake views that we use to get metadata have a [latency of 45 minutes to 3 hours](https://docs.snowflake.com/en/sql-reference/account-usage.html#differences-between-account-usage-and-information-schema). So we would not be able to get very recent metadata in some cases like queries you ran within that time period etc.. -- If there is any [incident going on for Snowflake](https://status.snowflake.com/) we will not be able to get the metadata until that incident is resolved. diff --git a/metadata-ingestion/docs/sources/snowflake/snowflake-usage-legacy_recipe.yml b/metadata-ingestion/docs/sources/snowflake/snowflake-usage-legacy_recipe.yml deleted file mode 100644 index e1eb4ca7b6784..0000000000000 --- a/metadata-ingestion/docs/sources/snowflake/snowflake-usage-legacy_recipe.yml +++ /dev/null @@ -1,27 +0,0 @@ -source: - type: snowflake-usage-legacy - config: - # Coordinates - account_id: account_name - warehouse: "COMPUTE_WH" - - # Credentials - username: "${SNOWFLAKE_USER}" - password: "${SNOWFLAKE_PASS}" - role: "datahub_role" - - # Options - top_n_queries: 10 - email_domain: mycompany.com - - database_pattern: - allow: - - "^ACCOUNTING_DB$" - - "^MARKETING_DB$" - schema_pattern: - deny: - - "information_schema.*" - -# Default sink is datahub-rest and doesn't need to be configured -# See https://datahubproject.io/docs/metadata-ingestion/sink_docs/datahub for customization options - diff --git a/metadata-ingestion/docs/sources/snowflake/snowflake_recipe.yml b/metadata-ingestion/docs/sources/snowflake/snowflake_recipe.yml index 66e6524fb2aa7..7e8dbcff88e1c 100644 --- a/metadata-ingestion/docs/sources/snowflake/snowflake_recipe.yml +++ b/metadata-ingestion/docs/sources/snowflake/snowflake_recipe.yml @@ -1,11 +1,8 @@ source: type: snowflake config: - # This option is recommended to be used for the first time to ingest all lineage + # This option is recommended to be used to ingest all lineage ignore_start_time_lineage: true - # This is an alternative option to specify the start_time for lineage - # if you don't want to look back since beginning - start_time: "2022-03-01T00:00:00Z" # Coordinates account_id: "abc48144" @@ -16,25 +13,23 @@ source: password: "${SNOWFLAKE_PASS}" role: "datahub_role" - # Change these as per your database names. Remove to get all databases - database_pattern: - allow: - - "^ACCOUNTING_DB$" - - "^MARKETING_DB$" - - table_pattern: - allow: - # If you want to ingest only few tables with name revenue and sales - - ".*revenue" - - ".*sales" + # (Optional) Uncomment and update this section to filter ingested datasets + # database_pattern: + # allow: + # - "^ACCOUNTING_DB$" + # - "^MARKETING_DB$" profiling: # Change to false to disable profiling enabled: true + # This option is recommended to reduce profiling time and costs. turn_off_expensive_profiling_metrics: true - profile_pattern: - allow: - - "ACCOUNTING_DB.*.*" - - "MARKETING_DB.*.*" + + # (Optional) Uncomment and update this section to filter profiled tables + # profile_pattern: + # allow: + # - "ACCOUNTING_DB.*.*" + # - "MARKETING_DB.*.*" + # Default sink is datahub-rest and doesn't need to be configured # See https://datahubproject.io/docs/metadata-ingestion/sink_docs/datahub for customization options diff --git a/metadata-ingestion/docs/transformer/dataset_transformer.md b/metadata-ingestion/docs/transformer/dataset_transformer.md index 86bac07410e10..8478664f31b00 100644 --- a/metadata-ingestion/docs/transformer/dataset_transformer.md +++ b/metadata-ingestion/docs/transformer/dataset_transformer.md @@ -1145,7 +1145,7 @@ def transform_one(self, mce: MetadataChangeEventClass) -> MetadataChangeEventCla Now that we've defined the transformer, we need to make it visible to DataHub. The easiest way to do this is to just place it in the same directory as your recipe, in which case the module name is the same as the file – in this case, `custom_transform_example`.
- Advanced: installing as a package + Advanced: Installing as a package and enable discoverability Alternatively, create a `setup.py` in the same directory as our transform script to make it visible globally. After installing this package (e.g. with `python setup.py` or `pip install -e .`), our module will be installed and importable as `custom_transform_example`. ```python @@ -1156,17 +1156,25 @@ setup( version="1.0", packages=find_packages(), # if you don't already have DataHub installed, add it under install_requires - # install_requires=["acryl-datahub"] + # install_requires=["acryl-datahub"], + entry_points={ + "datahub.ingestion.transformer.plugins": [ + "custom_transform_example_alias = custom_transform_example:AddCustomOwnership", + ], + }, ) ``` +Additionally, declare the transformer under the `entry_points` variable of the setup script. This enables the transformer to be +listed when running `datahub check plugins`, and sets up the transformer's shortened alias for use in recipes. +
### Running the transform ```yaml transformers: - - type: "custom_transform_example.AddCustomOwnership" + - type: "custom_transform_example_alias" config: owners_json: "" # the JSON file mentioned at the start ``` diff --git a/metadata-ingestion/examples/bootstrap_data/business_glossary.yml b/metadata-ingestion/examples/bootstrap_data/business_glossary.yml index 70ea3a2ce3404..a80f2fec84924 100644 --- a/metadata-ingestion/examples/bootstrap_data/business_glossary.yml +++ b/metadata-ingestion/examples/bootstrap_data/business_glossary.yml @@ -7,6 +7,9 @@ url: "https://github.com/datahub-project/datahub/" nodes: - name: Classification description: A set of terms related to Data Classification + knowledge_links: + - label: Wiki link for classification + url: "https://en.wikipedia.org/wiki/Classification" terms: - name: Sensitive description: Sensitive Data @@ -110,3 +113,6 @@ nodes: source_url: "https://spec.edmcouncil.org/fibo/ontology/FBC/ProductsAndServices/ClientsAndAccounts/Account" related_terms: - House.Kitchen + knowledge_links: + - url: "https://en.wikipedia.org/wiki/Spoon" + label: Wiki link diff --git a/metadata-ingestion/examples/recipes/feast_to_datahub.dhub.yaml b/metadata-ingestion/examples/recipes/feast_to_datahub.dhub.yaml deleted file mode 100644 index 0efdb4c1d0d71..0000000000000 --- a/metadata-ingestion/examples/recipes/feast_to_datahub.dhub.yaml +++ /dev/null @@ -1,13 +0,0 @@ -# see https://datahubproject.io/docs/generated/ingestion/sources/feast for complete documentation -source: - type: feast-legacy - config: - core_url: localhost:6565 # default - env: "PROD" # Optional, default is "PROD" - use_local_build: False # Whether to build Feast ingestion image locally, default is False - -# see https://datahubproject.io/docs/metadata-ingestion/sink_docs/datahub for complete documentation -sink: - type: "datahub-rest" - config: - server: "http://localhost:8080" diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py index 5547914236de6..1496dde1102ed 100644 --- a/metadata-ingestion/setup.py +++ b/metadata-ingestion/setup.py @@ -281,7 +281,6 @@ def get_long_description(): # https://www.elastic.co/guide/en/elasticsearch/client/python-api/current/release-notes.html#rn-7-14-0 # https://github.com/elastic/elasticsearch-py/issues/1639#issuecomment-883587433 "elasticsearch": {"elasticsearch==7.13.4"}, - "feast-legacy": {"docker"}, "feast": {"feast~=0.26.0", "flask-openid>=1.3.0"}, "glue": aws_common, # hdbcli is supported officially by SAP, sqlalchemy-hana is built on top but not officially supported @@ -330,12 +329,6 @@ def get_long_description(): "s3": {*s3_base, *data_lake_profiling}, "sagemaker": aws_common, "salesforce": {"simple-salesforce"}, - "snowflake-legacy": snowflake_common, - "snowflake-usage-legacy": snowflake_common - | usage_common - | { - "more-itertools>=8.12.0", - }, "snowflake": snowflake_common | usage_common, "snowflake-beta": ( snowflake_common | usage_common @@ -475,7 +468,6 @@ def get_long_description(): "clickhouse", "delta-lake", "druid", - "feast-legacy", "hana", "hive", "iceberg", @@ -511,7 +503,6 @@ def get_long_description(): "dbt-cloud = datahub.ingestion.source.dbt.dbt_cloud:DBTCloudSource", "druid = datahub.ingestion.source.sql.druid:DruidSource", "elasticsearch = datahub.ingestion.source.elastic_search:ElasticsearchSource", - "feast-legacy = datahub.ingestion.source.feast_legacy:FeastSource", "feast = datahub.ingestion.source.feast:FeastRepositorySource", "glue = datahub.ingestion.source.aws.glue:GlueSource", "sagemaker = datahub.ingestion.source.aws.sagemaker:SagemakerSource", @@ -535,8 +526,6 @@ def get_long_description(): "redash = datahub.ingestion.source.redash:RedashSource", "redshift = datahub.ingestion.source.sql.redshift:RedshiftSource", "redshift-usage = datahub.ingestion.source.usage.redshift_usage:RedshiftUsageSource", - "snowflake-legacy = datahub.ingestion.source.sql.snowflake:SnowflakeSource", - "snowflake-usage-legacy = datahub.ingestion.source.usage.snowflake_usage:SnowflakeUsageSource", "snowflake = datahub.ingestion.source.snowflake.snowflake_v2:SnowflakeV2Source", "superset = datahub.ingestion.source.superset:SupersetSource", "tableau = datahub.ingestion.source.tableau:TableauSource", @@ -553,8 +542,30 @@ def get_long_description(): "presto-on-hive = datahub.ingestion.source.sql.presto_on_hive:PrestoOnHiveSource", "pulsar = datahub.ingestion.source.pulsar:PulsarSource", "salesforce = datahub.ingestion.source.salesforce:SalesforceSource", + "demo-data = datahub.ingestion.source.demo_data.DemoDataSource", "unity-catalog = datahub.ingestion.source.unity.source:UnityCatalogSource", ], + "datahub.ingestion.transformer.plugins": [ + "simple_remove_dataset_ownership = datahub.ingestion.transformer.remove_dataset_ownership:SimpleRemoveDatasetOwnership", + "mark_dataset_status = datahub.ingestion.transformer.mark_dataset_status:MarkDatasetStatus", + "set_dataset_browse_path = datahub.ingestion.transformer.add_dataset_browse_path:AddDatasetBrowsePathTransformer", + "add_dataset_ownership = datahub.ingestion.transformer.add_dataset_ownership:AddDatasetOwnership", + "simple_add_dataset_ownership = datahub.ingestion.transformer.add_dataset_ownership:SimpleAddDatasetOwnership", + "pattern_add_dataset_ownership = datahub.ingestion.transformer.add_dataset_ownership:PatternAddDatasetOwnership", + "add_dataset_domain = datahub.ingestion.transformer.dataset_domain:AddDatasetDomain", + "simple_add_dataset_domain = datahub.ingestion.transformer.dataset_domain:SimpleAddDatasetDomain", + "pattern_add_dataset_domain = datahub.ingestion.transformer.dataset_domain:PatternAddDatasetDomain", + "add_dataset_tags = datahub.ingestion.transformer.add_dataset_tags:AddDatasetTags", + "simple_add_dataset_tags = datahub.ingestion.transformer.add_dataset_tags:SimpleAddDatasetTags", + "pattern_add_dataset_tags = datahub.ingestion.transformer.add_dataset_tags:PatternAddDatasetTags", + "add_dataset_terms = datahub.ingestion.transformer.add_dataset_terms:AddDatasetTerms", + "simple_add_dataset_terms = datahub.ingestion.transformer.add_dataset_terms:SimpleAddDatasetTerms", + "pattern_add_dataset_terms = datahub.ingestion.transformer.add_dataset_terms:PatternAddDatasetTerms", + "add_dataset_properties = datahub.ingestion.transformer.add_dataset_properties:AddDatasetProperties", + "simple_add_dataset_properties = datahub.ingestion.transformer.add_dataset_properties:SimpleAddDatasetProperties", + "pattern_add_dataset_schema_terms = datahub.ingestion.transformer.add_dataset_schema_terms:PatternAddDatasetSchemaTerms", + "pattern_add_dataset_schema_tags = datahub.ingestion.transformer.add_dataset_schema_tags:PatternAddDatasetSchemaTags", + ], "datahub.ingestion.sink.plugins": [ "file = datahub.ingestion.sink.file:FileSink", "console = datahub.ingestion.sink.console:ConsoleSink", @@ -615,7 +626,6 @@ def get_long_description(): "datahub": ["py.typed"], "datahub.metadata": ["schema.avsc"], "datahub.metadata.schemas": ["*.avsc"], - "datahub.ingestion.source.feast_image": ["Dockerfile", "requirements.txt"], }, entry_points=entry_points, # Dependencies. diff --git a/metadata-ingestion/src/datahub/cli/docker_cli.py b/metadata-ingestion/src/datahub/cli/docker_cli.py index b8ca9f9826351..be5141f3e9744 100644 --- a/metadata-ingestion/src/datahub/cli/docker_cli.py +++ b/metadata-ingestion/src/datahub/cli/docker_cli.py @@ -29,6 +29,11 @@ from datahub.ingestion.run.pipeline import Pipeline from datahub.telemetry import telemetry from datahub.upgrade import upgrade +from datahub.utilities.sample_data import ( + BOOTSTRAP_MCES_FILE, + DOCKER_COMPOSE_BASE, + download_sample_data, +) logger = logging.getLogger(__name__) @@ -48,13 +53,6 @@ "docker/quickstart/docker-compose.consumers-without-neo4j.quickstart.yml" ) -BOOTSTRAP_MCES_FILE = "metadata-ingestion/examples/mce_files/bootstrap_mce.json" - -DOCKER_COMPOSE_BASE = os.getenv( - "DOCKER_COMPOSE_BASE", - "https://raw.githubusercontent.com/datahub-project/datahub/master", -) - NEO4J_AND_ELASTIC_QUICKSTART_COMPOSE_URL = ( f"{DOCKER_COMPOSE_BASE}/{NEO4J_AND_ELASTIC_QUICKSTART_COMPOSE_FILE}" ) @@ -62,7 +60,6 @@ f"{DOCKER_COMPOSE_BASE}/{ELASTIC_QUICKSTART_COMPOSE_FILE}" ) M1_QUICKSTART_COMPOSE_URL = f"{DOCKER_COMPOSE_BASE}/{M1_QUICKSTART_COMPOSE_FILE}" -BOOTSTRAP_MCES_URL = f"{DOCKER_COMPOSE_BASE}/{BOOTSTRAP_MCES_FILE}" class Architectures(Enum): @@ -885,14 +882,7 @@ def ingest_sample_data(path: Optional[str], token: Optional[str]) -> None: if path is None: click.echo("Downloading sample data...") - with tempfile.NamedTemporaryFile(suffix=".json", delete=False) as tmp_file: - path = str(pathlib.Path(tmp_file.name)) - - # Download the bootstrap MCE file from GitHub. - mce_json_download_response = requests.get(BOOTSTRAP_MCES_URL) - mce_json_download_response.raise_for_status() - tmp_file.write(mce_json_download_response.content) - click.echo(f"Downloaded to {path}") + path = download_sample_data() # Verify that docker is up. issues = check_local_docker_containers() diff --git a/metadata-ingestion/src/datahub/configuration/common.py b/metadata-ingestion/src/datahub/configuration/common.py index 95d852bbe7b60..72982bce93a6f 100644 --- a/metadata-ingestion/src/datahub/configuration/common.py +++ b/metadata-ingestion/src/datahub/configuration/common.py @@ -224,3 +224,8 @@ class LineageConfig(ConfigModel): default=True, description="When enabled, emits lineage as incremental to existing lineage already in DataHub. When disabled, re-states lineage on each run.", ) + + sql_parser_use_external_process: bool = Field( + default=False, + description="When enabled, sql parser will run in isolated in a separate process. This can affect processing time but can protect from sql parser's mem leak.", + ) diff --git a/metadata-ingestion/src/datahub/configuration/pattern_utils.py b/metadata-ingestion/src/datahub/configuration/pattern_utils.py new file mode 100644 index 0000000000000..313e68c41812f --- /dev/null +++ b/metadata-ingestion/src/datahub/configuration/pattern_utils.py @@ -0,0 +1,13 @@ +from datahub.configuration.common import AllowDenyPattern + + +def is_schema_allowed( + schema_pattern: AllowDenyPattern, + schema_name: str, + db_name: str, + match_fully_qualified_schema_name: bool, +) -> bool: + if match_fully_qualified_schema_name: + return schema_pattern.allowed(f"{db_name}.{schema_name}") + else: + return schema_pattern.allowed(schema_name) diff --git a/metadata-ingestion/src/datahub/emitter/mcp_builder.py b/metadata-ingestion/src/datahub/emitter/mcp_builder.py index 98c16b69a2c5e..a74d33c9c7b9a 100644 --- a/metadata-ingestion/src/datahub/emitter/mcp_builder.py +++ b/metadata-ingestion/src/datahub/emitter/mcp_builder.py @@ -12,7 +12,10 @@ ) from datahub.emitter.mcp import MetadataChangeProposalWrapper from datahub.ingestion.api.workunit import MetadataWorkUnit -from datahub.metadata.com.linkedin.pegasus2avro.common import DataPlatformInstance +from datahub.metadata.com.linkedin.pegasus2avro.common import ( + DataPlatformInstance, + TimeStamp, +) from datahub.metadata.com.linkedin.pegasus2avro.container import ContainerProperties from datahub.metadata.com.linkedin.pegasus2avro.events.metadata import ChangeType from datahub.metadata.schema_classes import ( @@ -205,6 +208,8 @@ def gen_containers( external_url: Optional[str] = None, tags: Optional[List[str]] = None, qualified_name: Optional[str] = None, + created: Optional[int] = None, + last_modified: Optional[int] = None, ) -> Iterable[MetadataWorkUnit]: container_urn = make_container_urn( guid=container_key.guid(), @@ -220,6 +225,10 @@ def gen_containers( customProperties=container_key.guid_dict(), externalUrl=external_url, qualifiedName=qualified_name, + created=TimeStamp(time=created) if created is not None else None, + lastModified=TimeStamp(time=last_modified) + if last_modified is not None + else None, ), ) wu = MetadataWorkUnit(id=f"container-info-{name}-{container_urn}", mcp=mcp) diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py index 8066df165a01f..d50c3039c6082 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py +++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py @@ -67,7 +67,11 @@ from datahub.ingestion.source.state.stateful_ingestion_base import ( StatefulIngestionSourceBase, ) -from datahub.metadata.com.linkedin.pegasus2avro.common import Status, SubTypes +from datahub.metadata.com.linkedin.pegasus2avro.common import ( + Status, + SubTypes, + TimeStamp, +) from datahub.metadata.com.linkedin.pegasus2avro.dataset import ( DatasetProperties, UpstreamLineage, @@ -568,6 +572,31 @@ def _process_project( ) continue + if self.config.include_table_lineage: + logger.info(f"Generate lineage for {project_id}") + for dataset in self.db_tables[project_id]: + for table in self.db_tables[project_id][dataset]: + dataset_urn = self.gen_dataset_urn(dataset, project_id, table.name) + lineage_info = self.lineage_extractor.get_upstream_lineage_info( + project_id=project_id, + dataset_name=dataset, + table=table, + platform=self.platform, + ) + if lineage_info: + yield from self.gen_lineage(dataset_urn, lineage_info) + + for dataset in self.db_views[project_id]: + for view in self.db_views[project_id][dataset]: + dataset_urn = self.gen_dataset_urn(dataset, project_id, view.name) + lineage_info = self.lineage_extractor.get_upstream_lineage_info( + project_id=project_id, + dataset_name=dataset, + table=view, + platform=self.platform, + ) + yield from self.gen_lineage(dataset_urn, lineage_info) + if self.config.include_usage_statistics: logger.info(f"Generate usage for {project_id}") tables: Dict[str, List[str]] = {} @@ -642,18 +671,8 @@ def _process_table( f"Table doesn't have any column or unable to get columns for table: {table_identifier}" ) - lineage_info: Optional[Tuple[UpstreamLineage, Dict[str, str]]] = None - - if self.config.include_table_lineage: - lineage_info = self.lineage_extractor.get_upstream_lineage_info( - project_id=project_id, - dataset_name=schema_name, - table=table, - platform=self.platform, - ) - table_workunits = self.gen_table_dataset_workunits( - table, project_id, schema_name, lineage_info + table, project_id, schema_name ) for wu in table_workunits: self.report.report_workunit(wu) @@ -679,18 +698,12 @@ def _process_view( conn, table_identifier, column_limit=self.config.column_limit ) - lineage_info: Optional[Tuple[UpstreamLineage, Dict[str, str]]] = None - if self.config.include_table_lineage: - lineage_info = self.lineage_extractor.get_upstream_lineage_info( - project_id=project_id, - dataset_name=dataset_name, - table=view, - platform=self.platform, - ) + if dataset_name not in self.db_views[project_id]: + self.db_views[project_id][dataset_name] = [] - view_workunits = self.gen_view_dataset_workunits( - view, project_id, dataset_name, lineage_info - ) + self.db_views[project_id][dataset_name].append(view) + + view_workunits = self.gen_view_dataset_workunits(view, project_id, dataset_name) for wu in view_workunits: self.report.report_workunit(wu) yield wu @@ -718,7 +731,6 @@ def gen_table_dataset_workunits( table: BigqueryTable, project_id: str, dataset_name: str, - lineage_info: Optional[Tuple[UpstreamLineage, Dict[str, str]]], ) -> Iterable[MetadataWorkUnit]: custom_properties: Dict[str, str] = {} if table.expires: @@ -761,7 +773,6 @@ def gen_table_dataset_workunits( project_id=project_id, dataset_name=dataset_name, sub_type="table", - lineage_info=lineage_info, tags_to_add=tags_to_add, custom_properties=custom_properties, ) @@ -771,7 +782,6 @@ def gen_view_dataset_workunits( table: BigqueryView, project_id: str, dataset_name: str, - lineage_info: Optional[Tuple[UpstreamLineage, Dict[str, str]]], ) -> Iterable[MetadataWorkUnit]: yield from self.gen_dataset_workunits( @@ -779,11 +789,10 @@ def gen_view_dataset_workunits( project_id=project_id, dataset_name=dataset_name, sub_type="view", - lineage_info=lineage_info, ) view = cast(BigqueryView, table) - view_definition_string = view.ddl + view_definition_string = view.view_definition view_properties_aspect = ViewProperties( materialized=False, viewLanguage="SQL", viewLogic=view_definition_string ) @@ -802,7 +811,6 @@ def gen_dataset_workunits( project_id: str, dataset_name: str, sub_type: str, - lineage_info: Optional[Tuple[UpstreamLineage, Dict[str, str]]] = None, tags_to_add: Optional[List[str]] = None, custom_properties: Optional[Dict[str, str]] = None, ) -> Iterable[MetadataWorkUnit]: @@ -819,43 +827,16 @@ def gen_dataset_workunits( yield self.gen_schema_metadata(dataset_urn, table, str(datahub_dataset_name)) - if lineage_info is not None: - upstream_lineage, upstream_column_props = lineage_info - else: - upstream_column_props = {} - upstream_lineage = None - - if upstream_lineage is not None: - if self.config.incremental_lineage: - patch_builder: DatasetPatchBuilder = DatasetPatchBuilder( - urn=dataset_urn - ) - for upstream in upstream_lineage.upstreams: - patch_builder.add_upstream_lineage(upstream) - - lineage_workunits = [ - MetadataWorkUnit( - id=f"upstreamLineage-for-{dataset_urn}", - mcp_raw=mcp, - ) - for mcp in patch_builder.build() - ] - else: - lineage_workunits = [ - wrap_aspect_as_workunit( - "dataset", dataset_urn, "upstreamLineage", upstream_lineage - ) - ] - - for wu in lineage_workunits: - yield wu - self.report.report_workunit(wu) - dataset_properties = DatasetProperties( name=datahub_dataset_name.get_table_display_name(), description=table.comment, qualifiedName=str(datahub_dataset_name), - customProperties={**upstream_column_props}, + created=TimeStamp(time=int(table.created.timestamp() * 1000)) + if table.created is not None + else None, + lastModified=TimeStamp(time=int(table.last_altered.timestamp() * 1000)) + if table.last_altered is not None + else None, ) if custom_properties: dataset_properties.customProperties.update(custom_properties) @@ -895,6 +876,41 @@ def gen_dataset_workunits( urn=dataset_urn, ) + def gen_lineage( + self, + dataset_urn: str, + lineage_info: Optional[Tuple[UpstreamLineage, Dict[str, str]]] = None, + ) -> Iterable[MetadataWorkUnit]: + if lineage_info is None: + return + + upstream_lineage, upstream_column_props = lineage_info + if upstream_lineage is not None: + if self.config.incremental_lineage: + patch_builder: DatasetPatchBuilder = DatasetPatchBuilder( + urn=dataset_urn + ) + for upstream in upstream_lineage.upstreams: + patch_builder.add_upstream_lineage(upstream) + + lineage_workunits = [ + MetadataWorkUnit( + id=f"upstreamLineage-for-{dataset_urn}", + mcp_raw=mcp, + ) + for mcp in patch_builder.build() + ] + else: + lineage_workunits = [ + wrap_aspect_as_workunit( + "dataset", dataset_urn, "upstreamLineage", upstream_lineage + ) + ] + + for wu in lineage_workunits: + yield wu + self.report.report_workunit(wu) + def gen_tags_aspect_workunit( self, dataset_urn: str, tags_to_add: List[str] ) -> MetadataWorkUnit: @@ -1133,8 +1149,6 @@ def get_views_for_dataset( views = self.db_views.get(project_id) - # get all views for database failed, - # falling back to get views for schema if not views: return BigQueryDataDictionary.get_views_for_dataset( conn, project_id, dataset_name, self.config.profiling.enabled diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_report.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_report.py index 683d369973cd7..152394b4ebda8 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_report.py +++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_report.py @@ -6,13 +6,13 @@ import pydantic -from datahub.ingestion.source.sql.sql_common import SQLSourceReport +from datahub.ingestion.source.sql.sql_generic_profiler import ProfilingSqlReport from datahub.utilities.lossy_collections import LossyDict, LossyList from datahub.utilities.stats_collections import TopKDict @dataclass -class BigQueryV2Report(SQLSourceReport): +class BigQueryV2Report(ProfilingSqlReport): num_total_lineage_entries: TopKDict[str, int] = field(default_factory=TopKDict) num_skipped_lineage_entries_missing_data: TopKDict[str, int] = field( default_factory=TopKDict @@ -55,7 +55,6 @@ class BigQueryV2Report(SQLSourceReport): upstream_lineage: LossyDict = field(default_factory=LossyDict) partition_info: Dict[str, str] = field(default_factory=TopKDict) profile_table_selection_criteria: Dict[str, str] = field(default_factory=TopKDict) - num_tables_not_eligible_profiling: Dict[str, int] = field(default_factory=TopKDict) selected_profile_tables: Dict[str, List[str]] = field(default_factory=TopKDict) invalid_partition_ids: Dict[str, str] = field(default_factory=TopKDict) allow_pattern: Optional[str] = None diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_schema.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_schema.py index fa475f66d0120..3302c873bd56c 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_schema.py +++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_schema.py @@ -8,49 +8,33 @@ from google.cloud.bigquery.table import RowIterator, TableListItem, TimePartitioning from datahub.ingestion.source.bigquery_v2.bigquery_audit import BigqueryTableIdentifier +from datahub.ingestion.source.sql.sql_generic import BaseColumn, BaseTable, BaseView logger: logging.Logger = logging.getLogger(__name__) @dataclass(frozen=True, eq=True) -class BigqueryColumn: - name: str - ordinal_position: int +class BigqueryColumn(BaseColumn): field_path: str - is_nullable: bool is_partition_column: bool - data_type: str - comment: str @dataclass -class BigqueryTable: - name: str - created: datetime - last_altered: Optional[datetime] - size_in_bytes: Optional[int] - rows_count: Optional[int] - expires: Optional[datetime] - clustering_fields: Optional[List[str]] - labels: Optional[Dict[str, str]] - num_partitions: Optional[int] - max_partition_id: Optional[str] - max_shard_id: Optional[str] - active_billable_bytes: Optional[int] - long_term_billable_bytes: Optional[int] - comment: str - ddl: str - time_partitioning: TimePartitioning +class BigqueryTable(BaseTable): + expires: Optional[datetime] = None + clustering_fields: Optional[List[str]] = None + labels: Optional[Dict[str, str]] = None + num_partitions: Optional[int] = None + max_partition_id: Optional[str] = None + max_shard_id: Optional[str] = None + active_billable_bytes: Optional[int] = None + long_term_billable_bytes: Optional[int] = None + time_partitioning: Optional[TimePartitioning] = None columns: List[BigqueryColumn] = field(default_factory=list) @dataclass -class BigqueryView: - name: str - created: datetime - last_altered: datetime - comment: str - ddl: str +class BigqueryView(BaseView): columns: List[BigqueryColumn] = field(default_factory=list) @@ -350,7 +334,7 @@ def get_tables_for_dataset( table.last_altered / 1000, tz=timezone.utc ) if "last_altered" in table - else None, + else table.created, size_in_bytes=table.get("bytes"), rows_count=table.get("row_count"), comment=table.comment, @@ -404,9 +388,11 @@ def get_views_for_dataset( BigqueryView( name=table.table_name, created=table.created, - last_altered=table.last_altered if "last_altered" in table else None, + last_altered=table.last_altered + if "last_altered" in table + else table.created, comment=table.comment, - ddl=table.view_definition, + view_definition=table.view_definition, ) for table in cur ] diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/lineage.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/lineage.py index 16505f4d27dc9..faf02649d5b43 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/lineage.py +++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/lineage.py @@ -431,7 +431,9 @@ def _create_lineage_map(self, entries: Iterable[QueryEvent]) -> Dict[str, Set[st # in the references. There is no distinction between direct/base objects accessed. So doing sql parsing # to ensure we only use direct objects accessed for lineage try: - parser = BigQuerySQLParser(e.query) + parser = BigQuerySQLParser( + e.query, self.config.sql_parser_use_external_process + ) referenced_objs = set( map(lambda x: x.split(".")[-1], parser.get_tables()) ) @@ -466,13 +468,15 @@ def parse_view_lineage( self, project: str, dataset: str, view: BigqueryView ) -> List[BigqueryTableIdentifier]: parsed_tables = set() - if view.ddl: + if view.view_definition: try: - parser = BigQuerySQLParser(view.ddl) + parser = BigQuerySQLParser( + view.view_definition, self.config.sql_parser_use_external_process + ) tables = parser.get_tables() except Exception as ex: logger.debug( - f"View {view.name} definination sql parsing failed on query: {view.ddl}. Edge from physical table to view won't be added. The error was {ex}." + f"View {view.name} definination sql parsing failed on query: {view.view_definition}. Edge from physical table to view won't be added. The error was {ex}." ) return [] diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/profiler.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/profiler.py index d58457534977a..a83787beb84d8 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/profiler.py +++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/profiler.py @@ -4,8 +4,6 @@ from typing import Dict, Iterable, List, Optional, Tuple, cast from dateutil.relativedelta import relativedelta -from sqlalchemy import create_engine, inspect -from sqlalchemy.engine.reflection import Inspector from datahub.emitter.mce_builder import make_dataset_urn_with_platform_instance from datahub.emitter.mcp_builder import wrap_aspect_as_workunit @@ -17,12 +15,11 @@ BigqueryColumn, BigqueryTable, ) -from datahub.ingestion.source.ge_data_profiler import ( - DatahubGEProfiler, - GEProfilerRequest, +from datahub.ingestion.source.ge_data_profiler import GEProfilerRequest +from datahub.ingestion.source.sql.sql_generic_profiler import ( + GenericProfiler, + TableProfilerRequest, ) -from datahub.metadata.com.linkedin.pegasus2avro.dataset import DatasetProfile -from datahub.metadata.schema_classes import DatasetProfileClass logger = logging.getLogger(__name__) @@ -33,11 +30,14 @@ class BigqueryProfilerRequest(GEProfilerRequest): profile_table_level_only: bool = False -class BigqueryProfiler: +class BigqueryProfiler(GenericProfiler): + config: BigQueryV2Config + report: BigQueryV2Report + def __init__(self, config: BigQueryV2Config, report: BigQueryV2Report) -> None: + super().__init__(config, report, "bigquery") self.config = config self.report = report - self.platform = "bigquery" @staticmethod def get_partition_range_from_partition_id( @@ -184,9 +184,9 @@ def get_workunits( if len(profile_requests) == 0: continue - + table_profile_requests = cast(List[TableProfilerRequest], profile_requests) for request, profile in self.generate_profiles( - profile_requests, + table_profile_requests, self.config.profiling.max_workers, platform=self.platform, profiler_args=self.get_profile_args(), @@ -231,8 +231,11 @@ def get_bigquery_profile_request( dataset_name, table.last_altered, table.size_in_bytes, table.rows_count ): profile_table_level_only = True - self.report.num_tables_not_eligible_profiling[dataset] = ( - self.report.num_tables_not_eligible_profiling.get(dataset, 0) + 1 + self.report.num_tables_not_eligible_profiling[f"{project}.{dataset}"] = ( + self.report.num_tables_not_eligible_profiling.get( + f"{project}.{dataset}", 0 + ) + + 1 ) if not table.columns: @@ -276,107 +279,3 @@ def get_bigquery_profile_request( profile_table_level_only=profile_table_level_only, ) return profile_request - - def is_dataset_eligible_for_profiling( - self, - dataset_name: str, - last_altered: Optional[datetime.datetime], - size_in_bytes: Optional[int], - rows_count: Optional[int], - ) -> bool: - threshold_time: Optional[datetime.datetime] = None - if self.config.profiling.profile_if_updated_since_days is not None: - threshold_time = datetime.datetime.now( - datetime.timezone.utc - ) - datetime.timedelta(self.config.profiling.profile_if_updated_since_days) - - return ( - ( - self.config.table_pattern.allowed(dataset_name) - and self.config.profile_pattern.allowed(dataset_name) - ) - and ( - (threshold_time is None) - or (last_altered is not None and last_altered >= threshold_time) - ) - and ( - self.config.profiling.profile_table_size_limit is None - or ( - size_in_bytes is not None - and size_in_bytes / (2**30) - <= self.config.profiling.profile_table_size_limit - ) # Note: Profiling is not allowed is size_in_bytes is not available - ) - and ( - self.config.profiling.profile_table_row_limit is None - or ( - rows_count is not None - and rows_count <= self.config.profiling.profile_table_row_limit - ) # Note: Profiling is not allowed is rows_count is not available - ) - ) - - def get_inspectors(self) -> Iterable[Inspector]: - # This method can be overridden in the case that you want to dynamically - # run on multiple databases. - - url = self.config.get_sql_alchemy_url() - logger.debug(f"sql_alchemy_url={url}") - engine = create_engine(url, **self.config.options) - with engine.connect() as conn: - inspector = inspect(conn) - yield inspector - - def get_profiler_instance(self) -> "DatahubGEProfiler": - logger.debug("Getting profiler instance from bigquery") - url = self.config.get_sql_alchemy_url() - - logger.debug(f"sql_alchemy_url={url}") - - engine = create_engine(url, **self.config.options) - with engine.connect() as conn: - inspector = inspect(conn) - - return DatahubGEProfiler( - conn=inspector.bind, - report=self.report, - config=self.config.profiling, - platform=self.platform, - ) - - def get_profile_args(self) -> Dict: - """Passed down to GE profiler""" - return {} - - def generate_profiles( - self, - requests: List[BigqueryProfilerRequest], - max_workers: int, - platform: Optional[str] = None, - profiler_args: Optional[Dict] = None, - ) -> Iterable[Tuple[GEProfilerRequest, Optional[DatasetProfileClass]]]: - - ge_profile_requests: List[GEProfilerRequest] = [ - cast(GEProfilerRequest, request) - for request in requests - if not request.profile_table_level_only - ] - table_level_profile_requests: List[BigqueryProfilerRequest] = [ - request for request in requests if request.profile_table_level_only - ] - for request in table_level_profile_requests: - profile = DatasetProfile( - timestampMillis=int(datetime.datetime.now().timestamp() * 1000), - columnCount=len(request.table.columns), - rowCount=request.table.rows_count, - sizeInBytes=request.table.size_in_bytes, - ) - yield (request, profile) - - if not ge_profile_requests: - return - - ge_profiler = self.get_profiler_instance() - yield from ge_profiler.generate_profiles( - ge_profile_requests, max_workers, platform, profiler_args - ) diff --git a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_cloud.py b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_cloud.py index cec5f067ebbf7..5c2b01c3cf23a 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_cloud.py +++ b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_cloud.py @@ -278,11 +278,11 @@ def _parse_into_dbt_node(self, node: Dict) -> DBTNode: ) # The code fields are new in dbt 1.3, and replace the sql ones. - raw_sql = node["rawCode"] or node["rawSql"] - compiled_sql = node["compiledCode"] or node["compiledSql"] + raw_code = node["rawCode"] or node["rawSql"] + compiled_code = node["compiledCode"] or node["compiledSql"] else: - raw_sql = None - compiled_sql = None + raw_code = None + compiled_code = None max_loaded_at = None if node["resourceType"] == "source": @@ -369,8 +369,9 @@ def _parse_into_dbt_node(self, node: Dict) -> DBTNode: query_tag={}, # TODO: Get this from the dbt API. tags=tags, owner=owner, - raw_sql=raw_sql, - compiled_sql=compiled_sql, + language="sql", # TODO: dbt Cloud doesn't surface this + raw_code=raw_code, + compiled_code=compiled_code, columns=columns, test_info=test_info, test_result=test_result, diff --git a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py index a31e7a3a78029..2660bb606fbf9 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py +++ b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py @@ -354,7 +354,8 @@ class DBTNode: alias: Optional[str] # alias if present comment: str description: str - raw_sql: Optional[str] + language: Optional[str] + raw_code: Optional[str] dbt_adapter: str dbt_name: str @@ -375,7 +376,7 @@ class DBTNode: query_tag: Dict[str, Any] = field(default_factory=dict) tags: List[str] = field(default_factory=list) - compiled_sql: Optional[str] = None + compiled_code: Optional[str] = None test_info: Optional["DBTTest"] = None # only populated if node_type == 'test' test_result: Optional["DBTTestResult"] = None @@ -410,7 +411,13 @@ def get_custom_properties(node: DBTNode) -> Dict[str, str]: custom_properties = node.meta # additional node attributes to extract to custom properties - node_attributes = ["node_type", "materialization", "dbt_file_path", "catalog_type"] + node_attributes = [ + "node_type", + "materialization", + "dbt_file_path", + "catalog_type", + "language", + ] for attribute in node_attributes: node_attribute_value = getattr(node, attribute) @@ -834,7 +841,7 @@ def _make_assertion_from_test( mce_builder.make_schema_field_urn(upstream_urn, column_name) ], nativeType=node.name, - logic=node.compiled_sql if node.compiled_sql else node.raw_sql, + logic=node.compiled_code if node.compiled_code else node.raw_code, aggregation=AssertionStdAggregationClass._NATIVE_, nativeParameters=string_map(kw_args), ), @@ -848,7 +855,7 @@ def _make_assertion_from_test( dataset=upstream_urn, scope=DatasetAssertionScopeClass.DATASET_ROWS, operator=AssertionStdOperatorClass._NATIVE_, - logic=node.compiled_sql if node.compiled_sql else node.raw_sql, + logic=node.compiled_code if node.compiled_code else node.raw_code, nativeType=node.name, aggregation=AssertionStdAggregationClass._NATIVE_, nativeParameters=string_map(kw_args), @@ -1023,7 +1030,7 @@ def create_platform_mces( aspects.append(upstream_lineage_class) # add view properties aspect - if node.raw_sql: + if node.raw_code and node.language == "sql": view_prop_aspect = self._create_view_properties_aspect(node) aspects.append(view_prop_aspect) @@ -1157,11 +1164,11 @@ def get_external_url(self, node: DBTNode) -> Optional[str]: def _create_view_properties_aspect(self, node: DBTNode) -> ViewPropertiesClass: materialized = node.materialization in {"table", "incremental"} # this function is only called when raw sql is present. assert is added to satisfy lint checks - assert node.raw_sql is not None + assert node.raw_code is not None view_properties = ViewPropertiesClass( materialized=materialized, viewLanguage="SQL", - viewLogic=node.raw_sql, + viewLogic=node.raw_code, ) return view_properties diff --git a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_core.py b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_core.py index 0236dc7e26b79..2675c6346b4ce 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_core.py +++ b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_core.py @@ -222,7 +222,12 @@ def extract_dbt_entities( max_loaded_at=max_loaded_at, comment=comment, description=manifest_node.get("description", ""), - raw_sql=manifest_node.get("raw_sql"), + raw_code=manifest_node.get( + "raw_code", manifest_node.get("raw_sql") + ), # Backward compatibility dbt <=v1.2 + language=manifest_node.get( + "language", "sql" + ), # Backward compatibility dbt <=v1.2 upstream_nodes=upstream_nodes, materialization=materialization, catalog_type=catalog_type, @@ -230,7 +235,9 @@ def extract_dbt_entities( query_tag=query_tag_props, tags=tags, owner=owner, - compiled_sql=manifest_node.get("compiled_sql"), + compiled_code=manifest_node.get( + "compiled_code", manifest_node.get("compiled_sql") + ), # Backward compatibility dbt <=v1.2 test_info=test_info, ) diff --git a/metadata-ingestion/src/datahub/ingestion/source/demo_data.py b/metadata-ingestion/src/datahub/ingestion/source/demo_data.py new file mode 100644 index 0000000000000..1764596bb5e8d --- /dev/null +++ b/metadata-ingestion/src/datahub/ingestion/source/demo_data.py @@ -0,0 +1,33 @@ +from datahub.configuration.common import ConfigModel +from datahub.ingestion.api.common import PipelineContext +from datahub.ingestion.api.decorators import ( + SupportStatus, + config_class, + platform_name, + support_status, +) +from datahub.ingestion.source.file import FileSourceConfig, GenericFileSource +from datahub.utilities.sample_data import download_sample_data + + +class DemoDataConfig(ConfigModel): + # The demo data source does not accept any configuration. + pass + + +@platform_name("Demo Data") +@config_class(ConfigModel) +@support_status(SupportStatus.UNKNOWN) +class DemoDataSource(GenericFileSource): + """ + This source loads sample data into DataHub. It is intended for demo and testing purposes only. + """ + + def __init__(self, ctx: PipelineContext, config: DemoDataConfig): + file_config = FileSourceConfig(filename=download_sample_data()) + super().__init__(ctx, file_config) + + @classmethod + def create(cls, config_dict, ctx): + config = DemoDataConfig.parse_obj(config_dict or {}) + return cls(ctx, config) diff --git a/metadata-ingestion/src/datahub/ingestion/source/feast_legacy.py b/metadata-ingestion/src/datahub/ingestion/source/feast_legacy.py deleted file mode 100644 index 246dd2ef329bf..0000000000000 --- a/metadata-ingestion/src/datahub/ingestion/source/feast_legacy.py +++ /dev/null @@ -1,334 +0,0 @@ -import json -import os -import tempfile -from dataclasses import dataclass, field -from shlex import quote -from typing import Dict, Iterable, List - -import docker -from pydantic import Field - -import datahub.emitter.mce_builder as builder -from datahub.configuration.source_common import EnvBasedSourceConfigBase -from datahub.ingestion.api.common import PipelineContext -from datahub.ingestion.api.decorators import ( - SourceCapability, - SupportStatus, - capability, - config_class, - platform_name, - support_status, -) -from datahub.ingestion.api.source import Source, SourceReport -from datahub.ingestion.api.workunit import MetadataWorkUnit -from datahub.metadata.com.linkedin.pegasus2avro.common import MLFeatureDataType -from datahub.metadata.com.linkedin.pegasus2avro.metadata.snapshot import ( - MLFeatureSnapshot, - MLFeatureTableSnapshot, - MLPrimaryKeySnapshot, -) -from datahub.metadata.com.linkedin.pegasus2avro.mxe import MetadataChangeEvent -from datahub.metadata.schema_classes import ( - BrowsePathsClass, - MLFeaturePropertiesClass, - MLFeatureTablePropertiesClass, - MLPrimaryKeyPropertiesClass, -) - -# map Feast types to DataHub classes -_field_type_mapping: Dict[str, str] = { - "BYTES": MLFeatureDataType.BYTE, - "STRING": MLFeatureDataType.TEXT, - "INT32": MLFeatureDataType.ORDINAL, - "INT64": MLFeatureDataType.ORDINAL, - "DOUBLE": MLFeatureDataType.CONTINUOUS, - "FLOAT": MLFeatureDataType.CONTINUOUS, - "BOOL": MLFeatureDataType.BINARY, - "UNIX_TIMESTAMP": MLFeatureDataType.TIME, - "BYTES_LIST": MLFeatureDataType.SEQUENCE, - "STRING_LIST": MLFeatureDataType.SEQUENCE, - "INT32_LIST": MLFeatureDataType.SEQUENCE, - "INT64_LIST": MLFeatureDataType.SEQUENCE, - "DOUBLE_LIST": MLFeatureDataType.SEQUENCE, - "FLOAT_LIST": MLFeatureDataType.SEQUENCE, - "BOOL_LIST": MLFeatureDataType.SEQUENCE, - "UNIX_TIMESTAMP_LIST": MLFeatureDataType.SEQUENCE, -} - -# image to use for initial feast extraction -HOSTED_FEAST_IMAGE = "acryldata/datahub-ingestion-feast-wrapper" - - -class FeastConfig(EnvBasedSourceConfigBase): - core_url: str = Field( - default="localhost:6565", description="URL of Feast Core instance." - ) - - use_local_build: bool = Field( - default=False, - description="Whether to build Feast ingestion Docker image locally.", - ) - - -@dataclass -class FeastSourceReport(SourceReport): - filtered: List[str] = field(default_factory=list) - - def report_dropped(self, name: str) -> None: - self.filtered.append(name) - - -@platform_name("Feast") -@config_class(FeastConfig) -@support_status(SupportStatus.CERTIFIED) -@capability(SourceCapability.LINEAGE_COARSE, "Enabled by default") -@dataclass -class FeastSource(Source): - """ - This plugin extracts the following: - - - List of feature tables (modeled as [`MLFeatureTable`](https://github.com/datahub-project/datahub/blob/master/metadata-models/src/main/pegasus/com/linkedin/ml/metadata/MLFeatureTableProperties.pdl)s), - features ([`MLFeature`](https://github.com/datahub-project/datahub/blob/master/metadata-models/src/main/pegasus/com/linkedin/ml/metadata/MLFeatureProperties.pdl)s), - and entities ([`MLPrimaryKey`](https://github.com/datahub-project/datahub/blob/master/metadata-models/src/main/pegasus/com/linkedin/ml/metadata/MLPrimaryKeyProperties.pdl)s) - - Column types associated with each feature and entity - - Note: this uses a separate Docker container to extract Feast's metadata into a JSON file, which is then - parsed to DataHub's native objects. This separation was performed because of a dependency conflict in the `feast` module. - - """ - - config: FeastConfig - report: FeastSourceReport - - def __init__(self, ctx: PipelineContext, config: FeastConfig): - super().__init__(ctx) - self.config = config - self.report = FeastSourceReport() - - @classmethod - def create(cls, config_dict: dict, ctx: PipelineContext) -> "FeastSource": - config = FeastConfig.parse_obj(config_dict) - return cls(ctx, config) - - def get_field_type(self, field_type: str, parent_name: str) -> str: - """ - Maps types encountered in Feast to corresponding schema types. - - Parameters - ---------- - field_type: - type of a Feast object - parent_name: - name of table (for logging) - """ - enum_type = _field_type_mapping.get(field_type) - - if enum_type is None: - self.report.report_warning( - parent_name, f"unable to map type {field_type} to metadata schema" - ) - enum_type = MLFeatureDataType.UNKNOWN - - return enum_type - - def get_entity_wu(self, ingest_table, ingest_entity): - """ - Generate an MLPrimaryKey workunit for a Feast entity. - - Parameters - ---------- - ingest_table: - ingested Feast table - ingest_entity: - ingested Feast entity - """ - - # create snapshot instance for the entity - entity_snapshot = MLPrimaryKeySnapshot( - urn=builder.make_ml_primary_key_urn( - ingest_table["name"], ingest_entity["name"] - ), - aspects=[], - ) - - entity_sources = [] - - if ingest_entity["batch_source"] is not None: - entity_sources.append( - builder.make_dataset_urn( - ingest_entity["batch_source_platform"], - ingest_entity["batch_source_name"], - self.config.env, - ) - ) - - if ingest_entity["stream_source"] is not None: - entity_sources.append( - builder.make_dataset_urn( - ingest_entity["stream_source_platform"], - ingest_entity["stream_source_name"], - self.config.env, - ) - ) - - # append entity name and type - entity_snapshot.aspects.append( - MLPrimaryKeyPropertiesClass( - description=ingest_entity["description"], - dataType=self.get_field_type( - ingest_entity["type"], ingest_entity["name"] - ), - sources=entity_sources, - ) - ) - - # make the MCE and workunit - mce = MetadataChangeEvent(proposedSnapshot=entity_snapshot) - return MetadataWorkUnit(id=ingest_entity["name"], mce=mce) - - def get_feature_wu(self, ingest_table, ingest_feature): - """ - Generate an MLFeature workunit for a Feast feature. - - Parameters - ---------- - ingest_table: - ingested Feast table - ingest_feature: - ingested Feast feature - """ - - # create snapshot instance for the feature - feature_snapshot = MLFeatureSnapshot( - urn=builder.make_ml_feature_urn( - ingest_table["name"], ingest_feature["name"] - ), - aspects=[], - ) - - feature_sources = [] - - if ingest_feature["batch_source"] is not None: - feature_sources.append( - builder.make_dataset_urn( - ingest_feature["batch_source_platform"], - ingest_feature["batch_source_name"], - self.config.env, - ) - ) - - if ingest_feature["stream_source"] is not None: - feature_sources.append( - builder.make_dataset_urn( - ingest_feature["stream_source_platform"], - ingest_feature["stream_source_name"], - self.config.env, - ) - ) - - # append feature name and type - feature_snapshot.aspects.append( - MLFeaturePropertiesClass( - dataType=self.get_field_type( - ingest_feature["type"], ingest_feature["name"] - ), - sources=feature_sources, - ) - ) - - # make the MCE and workunit - mce = MetadataChangeEvent(proposedSnapshot=feature_snapshot) - return MetadataWorkUnit(id=ingest_feature["name"], mce=mce) - - def get_feature_table_wu(self, ingest_table): - """ - Generate an MLFeatureTable workunit for a Feast feature table. - - Parameters - ---------- - ingest_table: - ingested Feast table - """ - - featuretable_snapshot = MLFeatureTableSnapshot( - urn=builder.make_ml_feature_table_urn("feast", ingest_table["name"]), - aspects=[ - BrowsePathsClass(paths=[f"feast/{ingest_table['name']}"]), - ], - ) - - featuretable_snapshot.aspects.append( - MLFeatureTablePropertiesClass( - mlFeatures=[ - builder.make_ml_feature_urn( - ingest_table["name"], - feature["name"], - ) - for feature in ingest_table["features"] - ], - # a feature table can have multiple primary keys, which then act as a composite key - mlPrimaryKeys=[ - builder.make_ml_primary_key_urn( - ingest_table["name"], entity["name"] - ) - for entity in ingest_table["entities"] - ], - ) - ) - - # make the MCE and workunit - mce = MetadataChangeEvent(proposedSnapshot=featuretable_snapshot) - return MetadataWorkUnit(id=ingest_table["name"], mce=mce) - - def get_workunits(self) -> Iterable[MetadataWorkUnit]: - with tempfile.NamedTemporaryFile(suffix=".json") as tf: - - docker_client = docker.from_env() - - feast_image = HOSTED_FEAST_IMAGE - - # build the image locally if specified - if self.config.use_local_build: - dirname = os.path.dirname(__file__) - image_directory = os.path.join(dirname, "feast_image/") - - image, _ = docker_client.images.build(path=image_directory) - - feast_image = image.id - - docker_client.containers.run( - feast_image, - f"python3 ingest.py --core_url={quote(self.config.core_url)} --output_path=/out.json", - # allow the image to access the core URL if on host - network_mode="host", - # mount the tempfile so the Docker image has access - volumes={ - tf.name: {"bind": "/out.json", "mode": "rw"}, - }, - ) - - ingest = json.load(tf) - - # ingest tables - for ingest_table in ingest: - - # ingest entities in table - for ingest_entity in ingest_table["entities"]: - - wu = self.get_entity_wu(ingest_table, ingest_entity) - self.report.report_workunit(wu) - yield wu - - # ingest features in table - for ingest_feature in ingest_table["features"]: - - wu = self.get_feature_wu(ingest_table, ingest_feature) - self.report.report_workunit(wu) - yield wu - - wu = self.get_feature_table_wu(ingest_table) - self.report.report_workunit(wu) - yield wu - - def get_report(self) -> FeastSourceReport: - return self.report diff --git a/metadata-ingestion/src/datahub/ingestion/source/file.py b/metadata-ingestion/src/datahub/ingestion/source/file.py index d4a7ab97cea91..fa96cdcaa00bf 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/file.py +++ b/metadata-ingestion/src/datahub/ingestion/source/file.py @@ -45,7 +45,7 @@ class FileReadMode(ConfigEnum): class FileSourceConfig(ConfigModel): filename: Optional[str] = Field( - None, description="[deprecated in favor or `path`] The file to ingest." + None, description="[deprecated in favor of `path`] The file to ingest." ) path: pathlib.Path = Field( description="Path to folder or file to ingest. If pointed to a folder, all files with extension {file_extension} (default json) within that folder will be processed." diff --git a/metadata-ingestion/src/datahub/ingestion/source/ge_data_profiler.py b/metadata-ingestion/src/datahub/ingestion/source/ge_data_profiler.py index 8860162660a4d..7e9dab140465e 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/ge_data_profiler.py +++ b/metadata-ingestion/src/datahub/ingestion/source/ge_data_profiler.py @@ -568,39 +568,28 @@ def generate_dataset_profile( # noqa: C901 (complexity) or type_ == ProfilerDataType.FLOAT or type_ == ProfilerDataType.NUMERIC ): - if cardinality == Cardinality.UNIQUE: - pass - elif cardinality in [ + self._get_dataset_column_min(column_profile, column) + self._get_dataset_column_max(column_profile, column) + self._get_dataset_column_mean(column_profile, column) + self._get_dataset_column_median(column_profile, column) + self._get_dataset_column_stdev(column_profile, column) + + if cardinality in [ Cardinality.ONE, Cardinality.TWO, Cardinality.VERY_FEW, + ]: + self._get_dataset_column_distinct_value_frequencies( + column_profile, + column, + ) + if cardinality in { Cardinality.FEW, Cardinality.MANY, Cardinality.VERY_MANY, - Cardinality.UNIQUE, - ]: - self._get_dataset_column_min(column_profile, column) - self._get_dataset_column_max(column_profile, column) - self._get_dataset_column_mean(column_profile, column) - self._get_dataset_column_median(column_profile, column) - - if type_ == ProfilerDataType.INT: - self._get_dataset_column_stdev(column_profile, column) - + }: self._get_dataset_column_quantiles(column_profile, column) self._get_dataset_column_histogram(column_profile, column) - if cardinality in [ - Cardinality.ONE, - Cardinality.TWO, - Cardinality.VERY_FEW, - Cardinality.FEW, - ]: - self._get_dataset_column_distinct_value_frequencies( - column_profile, - column, - ) - else: # unknown cardinality - skip - pass elif type_ == ProfilerDataType.STRING: if cardinality in [ diff --git a/metadata-ingestion/src/datahub/ingestion/source/ge_profiling_config.py b/metadata-ingestion/src/datahub/ingestion/source/ge_profiling_config.py index 576d4de8c2568..609dda0a730b7 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/ge_profiling_config.py +++ b/metadata-ingestion/src/datahub/ingestion/source/ge_profiling_config.py @@ -102,12 +102,12 @@ class GEProfilingConfig(ConfigModel): ) profile_table_size_limit: Optional[int] = Field( - default=1, + default=5, description="Profile tables only if their size is less then specified GBs. If set to `null`, no limit on the size of tables to profile. Supported only in `snowflake` and `BigQuery`", ) profile_table_row_limit: Optional[int] = Field( - default=50000, + default=5000000, description="Profile tables only if their row count is less then specified count. If set to `null`, no limit on the row count of tables to profile. Supported only in `snowflake` and `BigQuery`", ) @@ -142,7 +142,6 @@ def deprecate_bigquery_temp_table_schema(cls, values): if "bigquery_temp_table_schema" in values: logger.warning( "The bigquery_temp_table_schema config is no longer required. Please remove it from your config.", - DeprecationWarning, ) del values["bigquery_temp_table_schema"] return values diff --git a/metadata-ingestion/src/datahub/ingestion/source/kafka_connect.py b/metadata-ingestion/src/datahub/ingestion/source/kafka_connect.py index 431f90643d017..da11d62aea692 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/kafka_connect.py +++ b/metadata-ingestion/src/datahub/ingestion/source/kafka_connect.py @@ -75,7 +75,7 @@ class KafkaConnectSourceConfig(DatasetLineageProviderConfigBase): ) generic_connectors: List[GenericConnectorConfig] = Field( default=[], - description="Provide lineage graph for sources connectors other than Confluent JDBC Source Connector or Debezium Source Connector", + description="Provide lineage graph for sources connectors other than Confluent JDBC Source Connector, Debezium Source Connector, and Mongo Source Connector", ) @@ -558,6 +558,68 @@ def _extract_lineages(self): return +@dataclass +class MongoSourceConnector: + # https://www.mongodb.com/docs/kafka-connector/current/source-connector/ + + connector_manifest: ConnectorManifest + + def __init__( + self, connector_manifest: ConnectorManifest, config: KafkaConnectSourceConfig + ) -> None: + self.connector_manifest = connector_manifest + self.config = config + self._extract_lineages() + + @dataclass + class MongoSourceParser: + db_connection_url: Optional[str] + source_platform: str + database_name: Optional[str] + topic_prefix: Optional[str] + transforms: List[str] + + def get_parser( + self, + connector_manifest: ConnectorManifest, + ) -> MongoSourceParser: + parser = self.MongoSourceParser( + db_connection_url=connector_manifest.config.get("connection.uri"), + source_platform="mongodb", + database_name=connector_manifest.config.get("database"), + topic_prefix=connector_manifest.config.get("topic_prefix"), + transforms=connector_manifest.config["transforms"].split(",") + if "transforms" in connector_manifest.config + else [], + ) + + return parser + + def _extract_lineages(self): + lineages: List[KafkaConnectLineage] = list() + parser = self.get_parser(self.connector_manifest) + source_platform = parser.source_platform + topic_naming_pattern = r"mongodb\.(\w+)\.(\w+)" + + if not self.connector_manifest.topic_names: + return lineages + + for topic in self.connector_manifest.topic_names: + found = re.search(re.compile(topic_naming_pattern), topic) + + if found: + table_name = get_dataset_name(found.group(1), None, found.group(2)) + + lineage = KafkaConnectLineage( + source_dataset=table_name, + source_platform=source_platform, + target_dataset=topic, + target_platform="kafka", + ) + lineages.append(lineage) + self.connector_manifest.lineages = lineages + + @dataclass class DebeziumSourceConnector: connector_manifest: ConnectorManifest @@ -848,7 +910,7 @@ class KafkaConnectSource(Source): Current limitations: - works only for - - JDBC and Debezium source connectors + - JDBC, Debezium, and Mongo source connectors - Generic connectors with user-defined lineage graph - BigQuery sink connector """ @@ -943,6 +1005,13 @@ def get_connectors_manifest(self) -> List[ConnectorManifest]: connector_manifest = DebeziumSourceConnector( connector_manifest=connector_manifest, config=self.config ).connector_manifest + elif ( + connector_manifest.config.get("connector.class", "") + == "com.mongodb.kafka.connect.MongoSourceConnector" + ): + connector_manifest = MongoSourceConnector( + connector_manifest=connector_manifest, config=self.config + ).connector_manifest else: # Find the target connector object in the list, or log an error if unknown. target_connector = None diff --git a/metadata-ingestion/src/datahub/ingestion/source/metadata/business_glossary.py b/metadata-ingestion/src/datahub/ingestion/source/metadata/business_glossary.py index 9cd73057f1d31..068e107c76279 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/metadata/business_glossary.py +++ b/metadata-ingestion/src/datahub/ingestion/source/metadata/business_glossary.py @@ -1,7 +1,10 @@ import logging +import pathlib +import time from dataclasses import dataclass, field from typing import Any, Dict, Iterable, List, Optional, Union +import pydantic from pydantic import validator from pydantic.fields import Field @@ -14,6 +17,7 @@ make_group_urn, make_user_urn, ) +from datahub.emitter.mcp import MetadataChangeProposalWrapper from datahub.ingestion.api.decorators import ( # SourceCapability,; capability, SupportStatus, config_class, @@ -22,6 +26,7 @@ ) from datahub.ingestion.api.source import Source, SourceReport from datahub.ingestion.api.workunit import MetadataWorkUnit, UsageStatsWorkUnit +from datahub.utilities.urn_encoder import UrnEncoder logger = logging.getLogger(__name__) @@ -40,6 +45,11 @@ class Owners(ConfigModel): groups: Optional[List[str]] +class KnowledgeCard(ConfigModel): + url: Optional[str] + label: Optional[str] + + class GlossaryTermConfig(ConfigModel): id: Optional[str] name: str @@ -53,6 +63,7 @@ class GlossaryTermConfig(ConfigModel): values: Optional[List[str]] related_terms: Optional[List[str]] custom_properties: Optional[Dict[str, str]] + knowledge_links: Optional[List[KnowledgeCard]] class GlossaryNodeConfig(ConfigModel): @@ -62,6 +73,7 @@ class GlossaryNodeConfig(ConfigModel): owners: Optional[Owners] terms: Optional[List[GlossaryTermConfig]] nodes: Optional[List["GlossaryNodeConfig"]] + knowledge_links: Optional[List[KnowledgeCard]] GlossaryNodeConfig.update_forward_refs() @@ -77,7 +89,9 @@ class DefaultConfig(ConfigModel): class BusinessGlossarySourceConfig(ConfigModel): - file: str = Field(description="Path to business glossary file to ingest.") + file: pydantic.FilePath = Field( + description="Path to business glossary file to ingest." + ) enable_auto_id: bool = Field( description="Generate id field from GlossaryNode and GlossaryTerm's name field", default=False, @@ -101,6 +115,10 @@ def create_id(path: List[str], default_id: Optional[str], enable_auto_id: bool) return default_id # No need to create id from path as default_id is provided id_: str = ".".join(path) + + if UrnEncoder.contains_reserved_char(id_): + enable_auto_id = True + if enable_auto_id: id_ = datahub_guid({"path": id_}) return id_ @@ -153,14 +171,13 @@ def get_owners(owners: Owners) -> models.OwnershipClass: def get_mces( glossary: BusinessGlossaryConfig, ingestion_config: BusinessGlossarySourceConfig -) -> List[models.MetadataChangeEventClass]: - events: List[models.MetadataChangeEventClass] = [] +) -> Iterable[Union[MetadataChangeProposalWrapper, models.MetadataChangeEventClass]]: path: List[str] = [] root_owners = get_owners(glossary.owners) if glossary.nodes: for node in glossary.nodes: - events += get_mces_from_node( + yield from get_mces_from_node( node, path + [node.name], parentNode=None, @@ -171,7 +188,7 @@ def get_mces( if glossary.terms: for term in glossary.terms: - events += get_mces_from_term( + yield from get_mces_from_term( term, path + [term.name], parentNode=None, @@ -180,13 +197,39 @@ def get_mces( ingestion_config=ingestion_config, ) - return events - def get_mce_from_snapshot(snapshot: Any) -> models.MetadataChangeEventClass: return models.MetadataChangeEventClass(proposedSnapshot=snapshot) +def make_institutional_memory_mcp( + urn: str, knowledge_cards: List[KnowledgeCard] +) -> Optional[MetadataChangeProposalWrapper]: + elements: List[models.InstitutionalMemoryMetadataClass] = [] + + for knowledge_card in knowledge_cards: + if knowledge_card.label and knowledge_card.url: + elements.append( + models.InstitutionalMemoryMetadataClass( + url=knowledge_card.url, + description=knowledge_card.label, + createStamp=models.AuditStampClass( + time=int(time.time() * 1000.0), + actor="urn:li:corpuser:datahub", + message="ingestion bot", + ), + ) + ) + + if elements: + return MetadataChangeProposalWrapper( + entityUrn=urn, + aspect=models.InstitutionalMemoryClass(elements=elements), + ) + + return None + + def get_mces_from_node( glossaryNode: GlossaryNodeConfig, path: List[str], @@ -194,7 +237,7 @@ def get_mces_from_node( parentOwners: models.OwnershipClass, defaults: DefaultConfig, ingestion_config: BusinessGlossarySourceConfig, -) -> List[models.MetadataChangeEventClass]: +) -> Iterable[Union[MetadataChangeProposalWrapper, models.MetadataChangeEventClass]]: node_urn = make_glossary_node_urn( path, glossaryNode.id, ingestion_config.enable_auto_id ) @@ -212,10 +255,18 @@ def get_mces_from_node( urn=node_urn, aspects=[node_info, node_owners, valid_status], ) - mces = [get_mce_from_snapshot(node_snapshot)] + yield get_mce_from_snapshot(node_snapshot) + + if glossaryNode.knowledge_links is not None: + mcp: Optional[MetadataChangeProposalWrapper] = make_institutional_memory_mcp( + node_urn, glossaryNode.knowledge_links + ) + if mcp is not None: + yield mcp + if glossaryNode.nodes: for node in glossaryNode.nodes: - mces += get_mces_from_node( + yield from get_mces_from_node( node, path + [node.name], parentNode=node_urn, @@ -226,7 +277,7 @@ def get_mces_from_node( if glossaryNode.terms: for term in glossaryNode.terms: - mces += get_mces_from_term( + yield from get_mces_from_term( glossaryTerm=term, path=path + [term.name], parentNode=node_urn, @@ -234,7 +285,6 @@ def get_mces_from_node( defaults=defaults, ingestion_config=ingestion_config, ) - return mces def get_mces_from_term( @@ -244,7 +294,7 @@ def get_mces_from_term( parentOwnership: models.OwnershipClass, defaults: DefaultConfig, ingestion_config: BusinessGlossarySourceConfig, -) -> List[models.MetadataChangeEventClass]: +) -> Iterable[Union[models.MetadataChangeEventClass, MetadataChangeProposalWrapper]]: term_urn = make_glossary_term_urn( path, glossaryTerm.id, ingestion_config.enable_auto_id ) @@ -338,14 +388,18 @@ def get_mces_from_term( ownership = get_owners(glossaryTerm.owners) aspects.append(ownership) - term_browse = models.BrowsePathsClass(paths=["/" + "/".join(path)]) - aspects.append(term_browse) - term_snapshot: models.GlossaryTermSnapshotClass = models.GlossaryTermSnapshotClass( urn=term_urn, aspects=aspects, ) - return [get_mce_from_snapshot(term_snapshot)] + yield get_mce_from_snapshot(term_snapshot) + + if glossaryTerm.knowledge_links: + mcp: Optional[MetadataChangeProposalWrapper] = make_institutional_memory_mcp( + term_urn, glossaryTerm.knowledge_links + ) + if mcp is not None: + yield mcp def populate_path_vs_id(glossary: BusinessGlossaryConfig) -> None: @@ -388,7 +442,7 @@ def create(cls, config_dict, ctx): config = BusinessGlossarySourceConfig.parse_obj(config_dict) return cls(ctx, config) - def load_glossary_config(self, file_name: str) -> BusinessGlossaryConfig: + def load_glossary_config(self, file_name: pathlib.Path) -> BusinessGlossaryConfig: config = load_config_file(file_name) glossary_cfg = BusinessGlossaryConfig.parse_obj(config) return glossary_cfg @@ -396,10 +450,17 @@ def load_glossary_config(self, file_name: str) -> BusinessGlossaryConfig: def get_workunits(self) -> Iterable[Union[MetadataWorkUnit, UsageStatsWorkUnit]]: glossary_config = self.load_glossary_config(self.config.file) populate_path_vs_id(glossary_config) - for mce in get_mces(glossary_config, ingestion_config=self.config): - wu = MetadataWorkUnit(f"{mce.proposedSnapshot.urn}", mce=mce) - self.report.report_workunit(wu) - yield wu + for event in get_mces(glossary_config, ingestion_config=self.config): + if isinstance(event, models.MetadataChangeEventClass): + wu = MetadataWorkUnit(f"{event.proposedSnapshot.urn}", mce=event) + self.report.report_workunit(wu) + yield wu + elif isinstance(event, MetadataChangeProposalWrapper): + wu = MetadataWorkUnit( + id=f"{event.entityType}-{event.aspectName}-{event.entityUrn}", + mcp=event, + ) + yield wu def get_report(self): return self.report diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi_report_server/report_server.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi_report_server/report_server.py index 6a22b0be2f8b1..b0c9bab8adc77 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/powerbi_report_server/report_server.py +++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi_report_server/report_server.py @@ -5,7 +5,7 @@ ######################################################### import logging from dataclasses import dataclass, field as dataclass_field -from typing import Any, Dict, Iterable, List +from typing import Any, Dict, Iterable, List, Optional import pydantic import requests @@ -68,7 +68,9 @@ class PowerBiReportServerAPIConfig(EnvBasedSourceConfigBase): server_alias: str = pydantic.Field( default="", description="Alias for Power BI Report Server host URL" ) - graphql_url: str = pydantic.Field(description="GraphQL API URL") + graphql_url: Optional[str] = pydantic.Field( + default=None, description="[deprecated] Not used" + ) report_virtual_directory_name: str = pydantic.Field( description="Report Virtual Directory URL name" ) diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_config.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_config.py index 43a14228e0efd..643ba4f1db579 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_config.py +++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_config.py @@ -49,6 +49,11 @@ class SnowflakeV2Config(SnowflakeConfig, SnowflakeUsageConfig): description="Whether to populate Snowsight url for Snowflake Objects", ) + match_fully_qualified_names = bool = Field( + default=False, + description="Whether `schema_pattern` is matched against fully qualified schema name `.`.", + ) + @root_validator(pre=False) def validate_unsupported_configs(cls, values: Dict) -> Dict: @@ -70,11 +75,26 @@ def validate_unsupported_configs(cls, values: Dict) -> Dict: "include_read_operational_stats is not supported. Set `include_read_operational_stats` to False.", ) + match_fully_qualified_names = values.get("match_fully_qualified_names") + + schema_pattern: Optional[AllowDenyPattern] = values.get("schema_pattern") + + if ( + schema_pattern is not None + and schema_pattern != AllowDenyPattern.allow_all() + and match_fully_qualified_names is not None + and not match_fully_qualified_names + ): + logger.warning( + "Please update `schema_pattern` to match against fully qualified schema name `.` and set config `match_fully_qualified_names : True`." + "Current default `match_fully_qualified_names: False` is only to maintain backward compatibility. " + "The config option `match_fully_qualified_names` will be deprecated in future and the default behavior will assume `match_fully_qualified_names: True`." + ) + # Always exclude reporting metadata for INFORMATION_SCHEMA schema - schema_pattern = values.get("schema_pattern") if schema_pattern is not None and schema_pattern: logger.debug("Adding deny for INFORMATION_SCHEMA to schema_pattern.") - cast(AllowDenyPattern, schema_pattern).deny.append(r"^INFORMATION_SCHEMA$") + cast(AllowDenyPattern, schema_pattern).deny.append(r".*INFORMATION_SCHEMA$") include_technical_schema = values.get("include_technical_schema") include_profiles = ( diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_profiler.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_profiler.py index 9fe9696a0f2a0..207c2d3dec732 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_profiler.py +++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_profiler.py @@ -1,12 +1,12 @@ import dataclasses -import datetime import logging -from typing import Callable, Dict, Iterable, List, Optional, Tuple, cast +from typing import Callable, Iterable, List, Optional, cast from snowflake.sqlalchemy import snowdialect from sqlalchemy import create_engine, inspect from sqlalchemy.sql import sqltypes +from datahub.configuration.pattern_utils import is_schema_allowed from datahub.emitter.mce_builder import make_dataset_urn_with_platform_instance from datahub.ingestion.api.common import WorkUnit from datahub.ingestion.source.ge_data_profiler import ( @@ -20,9 +20,14 @@ SnowflakeDatabase, SnowflakeTable, ) -from datahub.ingestion.source.snowflake.snowflake_utils import SnowflakeCommonMixin -from datahub.metadata.com.linkedin.pegasus2avro.dataset import DatasetProfile -from datahub.metadata.schema_classes import DatasetProfileClass +from datahub.ingestion.source.snowflake.snowflake_utils import ( + SnowflakeCommonMixin, + SnowflakeCommonProtocol, +) +from datahub.ingestion.source.sql.sql_generic_profiler import ( + GenericProfiler, + TableProfilerRequest, +) snowdialect.ischema_names["GEOGRAPHY"] = sqltypes.NullType @@ -35,14 +40,17 @@ class SnowflakeProfilerRequest(GEProfilerRequest): profile_table_level_only: bool = False -class SnowflakeProfiler(SnowflakeCommonMixin): +class SnowflakeProfiler(SnowflakeCommonMixin, GenericProfiler, SnowflakeCommonProtocol): + config: SnowflakeV2Config + report: SnowflakeV2Report + def __init__(self, config: SnowflakeV2Config, report: SnowflakeV2Report) -> None: + super().__init__(config, report, self.platform) self.config = config self.report = report self.logger = logger def get_workunits(self, databases: List[SnowflakeDatabase]) -> Iterable[WorkUnit]: - # Extra default SQLAlchemy option for better connection pooling and threading. # https://docs.sqlalchemy.org/en/14/core/pooling.html#sqlalchemy.pool.QueuePool.params.max_overflow if self.config.profiling.enabled: @@ -55,7 +63,12 @@ def get_workunits(self, databases: List[SnowflakeDatabase]) -> Iterable[WorkUnit continue profile_requests = [] for schema in db.schemas: - if not self.config.schema_pattern.allowed(schema.name): + if not is_schema_allowed( + self.config.schema_pattern, + schema.name, + db.name, + self.config.match_fully_qualified_names, + ): continue for table in schema.tables: @@ -68,10 +81,13 @@ def get_workunits(self, databases: List[SnowflakeDatabase]) -> Iterable[WorkUnit if len(profile_requests) == 0: continue + + table_profile_requests = cast(List[TableProfilerRequest], profile_requests) + for request, profile in self.generate_profiles( - db.name, - profile_requests, + table_profile_requests, self.config.profiling.max_workers, + db.name, platform=self.platform, profiler_args=self.get_profile_args(), ): @@ -133,47 +149,11 @@ def get_snowflake_profile_request( ) return profile_request - def is_dataset_eligible_for_profiling( - self, - dataset_name: str, - last_altered: datetime.datetime, - size_in_bytes: int, - rows_count: Optional[int], - ) -> bool: - threshold_time: Optional[datetime.datetime] = None - if self.config.profiling.profile_if_updated_since_days is not None: - threshold_time = datetime.datetime.now( - datetime.timezone.utc - ) - datetime.timedelta(self.config.profiling.profile_if_updated_since_days) - - return ( - ( - self.config.table_pattern.allowed(dataset_name) - and self.config.profile_pattern.allowed(dataset_name) - ) - and (threshold_time is None or last_altered >= threshold_time) - and ( - self.config.profiling.profile_table_size_limit is None - or ( - size_in_bytes is not None - and size_in_bytes / (2**30) - <= self.config.profiling.profile_table_size_limit - ) - # Note: Profiling is not allowed is size_in_bytes is not available - # and self.config.profiling.profile_table_size_limit is set - ) - and ( - self.config.profiling.profile_table_row_limit is None - or ( - rows_count is not None - and rows_count <= self.config.profiling.profile_table_row_limit - ) - # Note: Profiling is not allowed is rows_count is not available - # and self.config.profiling.profile_table_row_limit is set - ) - ) + def get_profiler_instance( + self, db_name: Optional[str] = None + ) -> "DatahubGEProfiler": + assert db_name - def get_profiler_instance(self, db_name: str) -> "DatahubGEProfiler": url = self.config.get_sql_alchemy_url( database=db_name, username=self.config.username, @@ -198,10 +178,6 @@ def get_profiler_instance(self, db_name: str) -> "DatahubGEProfiler": platform=self.platform, ) - def get_profile_args(self) -> Dict: - """Passed down to GE profiler""" - return {} - def callable_for_db_connection(self, db_name: str) -> Callable: def get_db_connection(): conn = self.config.get_connection() @@ -209,38 +185,3 @@ def get_db_connection(): return conn return get_db_connection - - def generate_profiles( - self, - db_name: str, - requests: List[SnowflakeProfilerRequest], - max_workers: int, - platform: Optional[str] = None, - profiler_args: Optional[Dict] = None, - ) -> Iterable[Tuple[GEProfilerRequest, Optional[DatasetProfileClass]]]: - - ge_profile_requests: List[GEProfilerRequest] = [ - cast(GEProfilerRequest, request) - for request in requests - if not request.profile_table_level_only - ] - table_level_profile_requests: List[SnowflakeProfilerRequest] = [ - request for request in requests if request.profile_table_level_only - ] - for request in table_level_profile_requests: - profile = DatasetProfile( - timestampMillis=round(datetime.datetime.now().timestamp() * 1000), - columnCount=len(request.table.columns), - rowCount=request.table.rows_count, - sizeInBytes=request.table.size_in_bytes, - ) - yield (request, profile) - - if len(ge_profile_requests) == 0: - return - - # Otherwise, if column level profiling is enabled, use GE profiler. - ge_profiler = self.get_profiler_instance(db_name) - yield from ge_profiler.generate_profiles( - ge_profile_requests, max_workers, platform, profiler_args - ) diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_query.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_query.py index 48299d7cc4095..1c2d780afa8fe 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_query.py +++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_query.py @@ -30,6 +30,17 @@ def show_databases() -> str: def use_database(db_name: str) -> str: return f'use database "{db_name}"' + @staticmethod + def get_databases(db_name: Optional[str]) -> str: + db_clause = f'"{db_name}".' if db_name is not None else "" + return f""" + SELECT database_name AS "DATABASE_NAME", + created AS "CREATED", + last_altered AS "LAST_ALTERED", + comment AS "COMMENT" + from {db_clause}information_schema.databases + order by database_name""" + @staticmethod def schemas_for_database(db_name: Optional[str]) -> str: db_clause = f'"{db_name}".' if db_name is not None else "" diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_report.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_report.py index 1b4d2e4a4f23c..e70c48771c49b 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_report.py +++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_report.py @@ -1,8 +1,9 @@ +from datahub.ingestion.source.sql.sql_generic_profiler import ProfilingSqlReport from datahub.ingestion.source_report.sql.snowflake import SnowflakeReport from datahub.ingestion.source_report.usage.snowflake_usage import SnowflakeUsageReport -class SnowflakeV2Report(SnowflakeReport, SnowflakeUsageReport): +class SnowflakeV2Report(SnowflakeReport, SnowflakeUsageReport, ProfilingSqlReport): schemas_scanned: int = 0 databases_scanned: int = 0 diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema.py index ab5c0799bfd76..c1d960ee297f3 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema.py +++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema.py @@ -8,6 +8,7 @@ from datahub.ingestion.source.snowflake.snowflake_query import SnowflakeQuery from datahub.ingestion.source.snowflake.snowflake_utils import SnowflakeQueryMixin +from datahub.ingestion.source.sql.sql_generic import BaseColumn, BaseTable, BaseView logger: logging.Logger = logging.getLogger(__name__) @@ -28,13 +29,8 @@ class SnowflakeFK: referred_column_names: List[str] -@dataclass -class SnowflakeColumn: - name: str - ordinal_position: int - is_nullable: bool - data_type: str - comment: Optional[str] +@dataclass(frozen=True, eq=True) +class SnowflakeColumn(BaseColumn): character_maximum_length: Optional[int] numeric_precision: Optional[int] numeric_scale: Optional[int] @@ -60,14 +56,8 @@ def get_precise_native_type(self): @dataclass -class SnowflakeTable: - name: str - created: datetime - last_altered: datetime - size_in_bytes: int - rows_count: int - comment: Optional[str] - clustering_key: str +class SnowflakeTable(BaseTable): + clustering_key: Optional[str] = None pk: Optional[SnowflakePK] = None columns: List[SnowflakeColumn] = field(default_factory=list) foreign_keys: List[SnowflakeFK] = field(default_factory=list) @@ -75,20 +65,15 @@ class SnowflakeTable: @dataclass -class SnowflakeView: - name: str - created: datetime - comment: Optional[str] - view_definition: str - last_altered: Optional[datetime] = None +class SnowflakeView(BaseView): columns: List[SnowflakeColumn] = field(default_factory=list) @dataclass class SnowflakeSchema: name: str - created: datetime - last_altered: datetime + created: Optional[datetime] + last_altered: Optional[datetime] comment: Optional[str] tables: List[SnowflakeTable] = field(default_factory=list) views: List[SnowflakeView] = field(default_factory=list) @@ -97,8 +82,9 @@ class SnowflakeSchema: @dataclass class SnowflakeDatabase: name: str - created: datetime + created: Optional[datetime] comment: Optional[str] + last_altered: Optional[datetime] = None schemas: List[SnowflakeSchema] = field(default_factory=list) @@ -106,7 +92,7 @@ class SnowflakeDataDictionary(SnowflakeQueryMixin): def __init__(self) -> None: self.logger = logger - def get_databases(self, conn: SnowflakeConnection) -> List[SnowflakeDatabase]: + def show_databases(self, conn: SnowflakeConnection) -> List[SnowflakeDatabase]: databases: List[SnowflakeDatabase] = [] @@ -125,6 +111,28 @@ def get_databases(self, conn: SnowflakeConnection) -> List[SnowflakeDatabase]: return databases + def get_databases( + self, conn: SnowflakeConnection, db_name: str + ) -> List[SnowflakeDatabase]: + + databases: List[SnowflakeDatabase] = [] + + cur = self.query( + conn, + SnowflakeQuery.get_databases(db_name), + ) + + for database in cur: + snowflake_db = SnowflakeDatabase( + name=database["DATABASE_NAME"], + created=database["CREATED"], + last_altered=database["LAST_ALTERED"], + comment=database["COMMENT"], + ) + databases.append(snowflake_db) + + return databases + def get_schemas_for_database( self, conn: SnowflakeConnection, db_name: str ) -> List[SnowflakeSchema]: @@ -225,6 +233,7 @@ def get_views_for_database( # last_altered=table["last_altered"], comment=table["comment"], view_definition=table["text"], + last_altered=table["created_on"], ) ) return views @@ -245,6 +254,7 @@ def get_views_for_schema( # last_altered=table["last_altered"], comment=table["comment"], view_definition=table["text"], + last_altered=table["created_on"], ) ) return views diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_utils.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_utils.py index dadae620956c7..6ea5c442dc875 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_utils.py +++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_utils.py @@ -7,6 +7,7 @@ from snowflake.connector.cursor import DictCursor from typing_extensions import Protocol +from datahub.configuration.pattern_utils import is_schema_allowed from datahub.emitter.mcp import MetadataChangeProposalWrapper from datahub.ingestion.api.workunit import MetadataWorkUnit from datahub.ingestion.source.snowflake.snowflake_config import SnowflakeV2Config @@ -30,23 +31,12 @@ class SnowflakeCloudProvider(str, Enum): # Required only for mypy, since we are using mixin classes, and not inheritance. # Reference - https://mypy.readthedocs.io/en/latest/more_types.html#mixin-classes class SnowflakeLoggingProtocol(Protocol): - @property - def logger(self) -> logging.Logger: - ... - + logger: logging.Logger -class SnowflakeCommonProtocol(Protocol): - @property - def logger(self) -> logging.Logger: - ... - @property - def config(self) -> SnowflakeV2Config: - ... - - @property - def report(self) -> SnowflakeV2Report: - ... +class SnowflakeCommonProtocol(SnowflakeLoggingProtocol, Protocol): + config: SnowflakeV2Config + report: SnowflakeV2Report def get_dataset_identifier( self, table_name: str, schema_name: str, db_name: str @@ -136,7 +126,12 @@ def _is_dataset_pattern_allowed( if not self.config.database_pattern.allowed( dataset_params[0].strip('"') - ) or not self.config.schema_pattern.allowed(dataset_params[1].strip('"')): + ) or not is_schema_allowed( + self.config.schema_pattern, + dataset_params[1].strip('"'), + dataset_params[0].strip('"'), + self.config.match_fully_qualified_names, + ): return False if dataset_type.lower() in {"table"} and not self.config.table_pattern.allowed( diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py index 352da03ce83f5..60731c2d787dc 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py +++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py @@ -7,6 +7,7 @@ import pydantic from snowflake.connector import SnowflakeConnection +from datahub.configuration.pattern_utils import is_schema_allowed from datahub.emitter.mce_builder import ( make_container_urn, make_data_platform_urn, @@ -80,7 +81,11 @@ from datahub.ingestion.source.state.stateful_ingestion_base import ( StatefulIngestionSourceBase, ) -from datahub.metadata.com.linkedin.pegasus2avro.common import Status, SubTypes +from datahub.metadata.com.linkedin.pegasus2avro.common import ( + Status, + SubTypes, + TimeStamp, +) from datahub.metadata.com.linkedin.pegasus2avro.dataset import ( DatasetProperties, UpstreamLineage, @@ -437,7 +442,8 @@ def get_workunits(self) -> Iterable[WorkUnit]: self.report.include_technical_schema = self.config.include_technical_schema databases: List[SnowflakeDatabase] = [] - databases = self.data_dictionary.get_databases(conn) + databases = self.get_databases(conn) + for snowflake_db in databases: self.report.report_entity_scanned(snowflake_db.name, "database") @@ -480,6 +486,31 @@ def get_workunits(self) -> Iterable[WorkUnit]: ] yield from self.usage_extractor.get_workunits(discovered_datasets) + def get_databases(self, conn): + databases = self.data_dictionary.show_databases(conn) + + # Below code block is required to enrich database with additional + # information that is missing in `show databases` results + # For example - last modified time of database + ischema_database_map: Dict[str, SnowflakeDatabase] = {} + for database in databases: + try: + ischema_databases = self.data_dictionary.get_databases( + conn, database.name + ) + ischema_database_map = {db.name: db for db in ischema_databases} + break + except Exception: + # query fails if "USAGE" access is not granted for database + logger.debug( + f"Failed to list databases {database.name} information_schema" + ) + for database in databases: + if database.name in ischema_database_map.keys(): + database.last_altered = ischema_database_map[database.name].last_altered + + return databases + def _process_database( self, conn: SnowflakeConnection, snowflake_db: SnowflakeDatabase ) -> Iterable[MetadataWorkUnit]: @@ -508,7 +539,12 @@ def _process_database( self.report.report_entity_scanned(snowflake_schema.name, "schema") - if not self.config.schema_pattern.allowed(snowflake_schema.name): + if not is_schema_allowed( + self.config.schema_pattern, + snowflake_schema.name, + db_name, + self.config.match_fully_qualified_names, + ): self.report.report_dropped(f"{db_name}.{snowflake_schema.name}.*") continue @@ -643,6 +679,14 @@ def gen_dataset_workunits( dataset_properties = DatasetProperties( name=table.name, + created=TimeStamp(time=int(table.created.timestamp() * 1000)) + if table.created is not None + else None, + lastModified=TimeStamp(time=int(table.last_altered.timestamp() * 1000)) + if table.last_altered is not None + else TimeStamp(time=int(table.created.timestamp() * 1000)) + if table.created is not None + else None, description=table.comment, qualifiedName=dataset_name, customProperties={**upstream_column_props}, @@ -900,6 +944,14 @@ def gen_database_containers( external_url=self.get_external_url_for_database(database.name) if self.config.include_external_url else None, + created=int(database.created.timestamp() * 1000) + if database.created is not None + else None, + last_modified=int(database.last_altered.timestamp() * 1000) + if database.last_altered is not None + else int(database.created.timestamp() * 1000) + if database.created is not None + else None, ) self.stale_entity_removal_handler.add_entity_to_state( @@ -916,6 +968,8 @@ def gen_database_containers( def gen_schema_containers( self, schema: SnowflakeSchema, db_name: str ) -> Iterable[MetadataWorkUnit]: + domain_urn = self._gen_domain_urn(f"{db_name}.{schema.name}") + schema_container_key = self.gen_schema_key( self.snowflake_identifier(db_name), self.snowflake_identifier(schema.name), @@ -933,9 +987,18 @@ def gen_schema_containers( description=schema.comment, sub_types=[SqlContainerSubTypes.SCHEMA], parent_container_key=database_container_key, + domain_urn=domain_urn, external_url=self.get_external_url_for_schema(schema.name, db_name) if self.config.include_external_url else None, + created=int(schema.created.timestamp() * 1000) + if schema.created is not None + else None, + last_modified=int(schema.last_altered.timestamp() * 1000) + if schema.last_altered is not None + else int(schema.created.timestamp() * 1000) + if schema.created is not None + else None, ) for wu in container_workunits: diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/snowflake.py b/metadata-ingestion/src/datahub/ingestion/source/sql/snowflake.py deleted file mode 100644 index c3c0518ead788..0000000000000 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/snowflake.py +++ /dev/null @@ -1,976 +0,0 @@ -import json -import logging -from collections import defaultdict -from dataclasses import dataclass -from datetime import datetime -from typing import Any, Dict, Iterable, List, Optional, Set, Tuple, Union - -import pydantic - -# This import verifies that the dependencies are available. -import snowflake.sqlalchemy # noqa: F401 -import sqlalchemy.engine -from snowflake import connector -from snowflake.sqlalchemy import custom_types, snowdialect -from sqlalchemy import create_engine, inspect -from sqlalchemy.engine.reflection import Inspector -from sqlalchemy.sql import sqltypes, text - -import datahub.emitter.mce_builder as builder -from datahub.emitter.mcp import MetadataChangeProposalWrapper -from datahub.ingestion.api.common import PipelineContext -from datahub.ingestion.api.decorators import ( - SourceCapability, - SupportStatus, - capability, - config_class, - platform_name, - support_status, -) -from datahub.ingestion.api.source import ( - CapabilityReport, - TestableSource, - TestConnectionReport, -) -from datahub.ingestion.api.workunit import MetadataWorkUnit -from datahub.ingestion.source.aws.s3_util import make_s3_urn -from datahub.ingestion.source.sql.sql_common import ( - RecordTypeClass, - SQLAlchemySource, - SqlWorkUnit, - TimeTypeClass, - register_custom_type, -) -from datahub.ingestion.source_config.sql.snowflake import SnowflakeConfig -from datahub.ingestion.source_report.sql.snowflake import SnowflakeReport -from datahub.metadata.com.linkedin.pegasus2avro.dataset import ( - DatasetLineageTypeClass, - UpstreamClass, - UpstreamLineage, -) -from datahub.metadata.com.linkedin.pegasus2avro.metadata.snapshot import DatasetSnapshot -from datahub.metadata.com.linkedin.pegasus2avro.mxe import MetadataChangeEvent -from datahub.metadata.schema_classes import ChangeTypeClass, DatasetPropertiesClass - -register_custom_type(custom_types.TIMESTAMP_TZ, TimeTypeClass) -register_custom_type(custom_types.TIMESTAMP_LTZ, TimeTypeClass) -register_custom_type(custom_types.TIMESTAMP_NTZ, TimeTypeClass) -register_custom_type(custom_types.VARIANT, RecordTypeClass) - -logger: logging.Logger = logging.getLogger(__name__) - -snowdialect.ischema_names["GEOGRAPHY"] = sqltypes.NullType - - -@platform_name("Snowflake") -@config_class(SnowflakeConfig) -@support_status(SupportStatus.CERTIFIED) -@capability(SourceCapability.PLATFORM_INSTANCE, "Enabled by default") -@capability(SourceCapability.DOMAINS, "Supported via the `domain` config field") -@capability(SourceCapability.CONTAINERS, "Enabled by default") -@capability(SourceCapability.SCHEMA_METADATA, "Enabled by default") -@capability(SourceCapability.DATA_PROFILING, "Optionally enabled via configuration") -@capability(SourceCapability.DESCRIPTIONS, "Enabled by default") -@capability(SourceCapability.LINEAGE_COARSE, "Optionally enabled via configuration") -@capability(SourceCapability.DELETION_DETECTION, "Enabled via stateful ingestion") -class SnowflakeSource(SQLAlchemySource, TestableSource): - def __init__(self, config: SnowflakeConfig, ctx: PipelineContext): - super().__init__(config, ctx, "snowflake") - self._lineage_map: Optional[Dict[str, List[Tuple[str, str, str]]]] = None - self._external_lineage_map: Optional[Dict[str, Set[str]]] = None - self.report: SnowflakeReport = SnowflakeReport() - self.config: SnowflakeConfig = config - self.provision_role_in_progress: bool = False - self.profile_candidates: Dict[str, List[str]] = {} - - @staticmethod - def check_capabilities( - conn: connector.SnowflakeConnection, connection_conf: SnowflakeConfig - ) -> Dict[Union[SourceCapability, str], CapabilityReport]: - - # Currently only overall capabilities are reported. - # Resource level variations in capabilities are not considered. - - @dataclass - class SnowflakePrivilege: - privilege: str - object_name: str - object_type: str - - def query(query): - logger.info("Query : {}".format(query)) - resp = conn.cursor().execute(query) - return resp - - _report: Dict[Union[SourceCapability, str], CapabilityReport] = dict() - privileges: List[SnowflakePrivilege] = [] - capabilities: List[SourceCapability] = [c.capability for c in SnowflakeSource.get_capabilities() if c.capability not in (SourceCapability.PLATFORM_INSTANCE, SourceCapability.DOMAINS, SourceCapability.DELETION_DETECTION)] # type: ignore - - cur = query("select current_role()") - current_role = [row[0] for row in cur][0] - - cur = query("select current_secondary_roles()") - secondary_roles_str = json.loads([row[0] for row in cur][0])["roles"] - secondary_roles = ( - [] if secondary_roles_str == "" else secondary_roles_str.split(",") - ) - - roles = [current_role] + secondary_roles - - # PUBLIC role is automatically granted to every role - if "PUBLIC" not in roles: - roles.append("PUBLIC") - i = 0 - - while i < len(roles): - role = roles[i] - i = i + 1 - # for some roles, quoting is necessary. for example test-role - cur = query(f'show grants to role "{role}"') - for row in cur: - privilege = SnowflakePrivilege( - privilege=row[1], object_type=row[2], object_name=row[3] - ) - privileges.append(privilege) - - if privilege.object_type in ( - "DATABASE", - "SCHEMA", - ) and privilege.privilege in ("OWNERSHIP", "USAGE"): - _report[SourceCapability.CONTAINERS] = CapabilityReport( - capable=True - ) - elif privilege.object_type in ( - "TABLE", - "VIEW", - "MATERIALIZED_VIEW", - ): - _report[SourceCapability.SCHEMA_METADATA] = CapabilityReport( - capable=True - ) - _report[SourceCapability.DESCRIPTIONS] = CapabilityReport( - capable=True - ) - - if privilege.privilege in ("SELECT", "OWNERSHIP"): - _report[SourceCapability.DATA_PROFILING] = CapabilityReport( - capable=True - ) - - if privilege.object_name.startswith("SNOWFLAKE.ACCOUNT_USAGE."): - # if access to "snowflake" shared database, access to all account_usage views is automatically granted - # Finer access control is not yet supported for shares - # https://community.snowflake.com/s/article/Error-Granting-individual-privileges-on-imported-database-is-not-allowed-Use-GRANT-IMPORTED-PRIVILEGES-instead - _report[SourceCapability.LINEAGE_COARSE] = CapabilityReport( - capable=True - ) - # If all capabilities supported, no need to continue - if set(capabilities) == set(_report.keys()): - break - - # Due to this, entire role hierarchy is considered - if ( - privilege.object_type == "ROLE" - and privilege.privilege == "USAGE" - and privilege.object_name not in roles - ): - roles.append(privilege.object_name) - - cur = query("select current_warehouse()") - current_warehouse = [row[0] for row in cur][0] - - default_failure_messages = { - SourceCapability.SCHEMA_METADATA: "Either no tables exist or current role does not have permissions to access them", - SourceCapability.DESCRIPTIONS: "Either no tables exist or current role does not have permissions to access them", - SourceCapability.DATA_PROFILING: "Either no tables exist or current role does not have permissions to access them", - SourceCapability.CONTAINERS: "Current role does not have permissions to use any database", - SourceCapability.LINEAGE_COARSE: "Current role does not have permissions to snowflake account usage views", - } - - for c in capabilities: # type:ignore - - # These capabilities do not work without active warehouse - if current_warehouse is None and c in ( - SourceCapability.SCHEMA_METADATA, - SourceCapability.DESCRIPTIONS, - SourceCapability.DATA_PROFILING, - SourceCapability.LINEAGE_COARSE, - ): - failure_message = ( - f"Current role does not have permissions to use warehouse {connection_conf.warehouse}" - if connection_conf.warehouse is not None - else "No default warehouse set for user. Either set default warehouse for user or configure warehouse in recipe" - ) - _report[c] = CapabilityReport( - capable=False, - failure_reason=failure_message, - ) - - if c in _report.keys(): - continue - - # If some capabilities are missing, then mark them as not capable - _report[c] = CapabilityReport( - capable=False, - failure_reason=default_failure_messages[c], - ) - - return _report - - @classmethod - def create(cls, config_dict, ctx): - config = SnowflakeConfig.parse_obj(config_dict) - return cls(config, ctx) - - @staticmethod - def test_connection(config_dict: dict) -> TestConnectionReport: - test_report = TestConnectionReport() - - try: - SnowflakeConfig.Config.extra = ( - pydantic.Extra.allow - ) # we are okay with extra fields during this stage - connection_conf = SnowflakeConfig.parse_obj(config_dict) - - connection: connector.SnowflakeConnection = connection_conf.get_connection() - assert connection - - test_report.basic_connectivity = CapabilityReport(capable=True) - - test_report.capability_report = SnowflakeSource.check_capabilities( - connection, connection_conf - ) - - except Exception as e: - logger.error(f"Failed to test connection due to {e}", exc_info=e) - if test_report.basic_connectivity is None: - test_report.basic_connectivity = CapabilityReport( - capable=False, failure_reason=f"{e}" - ) - else: - test_report.internal_failure = True - test_report.internal_failure_reason = f"{e}" - finally: - SnowflakeConfig.Config.extra = ( - pydantic.Extra.forbid - ) # set config flexibility back to strict - return test_report - - def get_metadata_engine( - self, database: Optional[str] = None - ) -> sqlalchemy.engine.Engine: - if self.provision_role_in_progress and self.config.provision_role is not None: - username: Optional[str] = self.config.provision_role.admin_username - password: Optional[ - pydantic.SecretStr - ] = self.config.provision_role.admin_password - role: Optional[str] = self.config.provision_role.admin_role - else: - username = self.config.username - password = self.config.password - role = self.config.role - - url = self.config.get_sql_alchemy_url( - database=database, username=username, password=password, role=role - ) - logger.debug(f"sql_alchemy_url={url}") - if self.config.authentication_type == "OAUTH_AUTHENTICATOR": - return create_engine( - url, - creator=self.config.get_oauth_connection, - **self.config.get_options(), - ) - else: - return create_engine( - url, - **self.config.get_options(), - ) - - def inspect_session_metadata(self) -> Any: - db_engine = self.get_metadata_engine() - try: - logger.info("Checking current version") - for db_row in db_engine.execute("select CURRENT_VERSION()"): - self.report.saas_version = db_row[0] - except Exception as e: - self.report.report_failure("version", f"Error: {e}") - try: - logger.info("Checking current warehouse") - for db_row in db_engine.execute("select current_warehouse()"): - self.report.default_warehouse = db_row[0] - except Exception as e: - self.report.report_failure("current_warehouse", f"Error: {e}") - try: - logger.info("Checking current database") - for db_row in db_engine.execute("select current_database()"): - self.report.default_db = db_row[0] - except Exception as e: - self.report.report_failure("current_database", f"Error: {e}") - try: - logger.info("Checking current schema") - for db_row in db_engine.execute("select current_schema()"): - self.report.default_schema = db_row[0] - except Exception as e: - self.report.report_failure("current_schema", f"Error: {e}") - - def inspect_role_grants(self) -> Any: - db_engine = self.get_metadata_engine() - cur_role = None - if self.config.role is None: - for db_row in db_engine.execute("select CURRENT_ROLE()"): - cur_role = db_row[0] - else: - cur_role = self.config.role - - if cur_role is None: - return - - self.report.role = cur_role - logger.info(f"Current role is {cur_role}") - if cur_role.lower() == "accountadmin" or not self.config.check_role_grants: - return - - logger.info(f"Checking grants for role {cur_role}") - for db_row in db_engine.execute(text(f"show grants to role {cur_role}")): - privilege = db_row["privilege"] - granted_on = db_row["granted_on"] - name = db_row["name"] - self.report.role_grants.append( - f"{privilege} granted on {granted_on} {name}" - ) - - def get_inspectors(self) -> Iterable[Inspector]: - db_listing_engine = self.get_metadata_engine(database=None) - - for db_row in db_listing_engine.execute(text("SHOW DATABASES")): - db = db_row.name - if self.config.database_pattern.allowed(db): - # We create a separate engine for each database in order to ensure that - # they are isolated from each other. - self.current_database = db - engine = self.get_metadata_engine(database=db) - - with engine.connect() as conn: - inspector = inspect(conn) - yield inspector - else: - self.report.report_dropped(db) - - def get_identifier( - self, *, schema: str, entity: str, inspector: Inspector, **kwargs: Any - ) -> str: - regular = super().get_identifier( - schema=schema, entity=entity, inspector=inspector, **kwargs - ) - return f"{self.current_database.lower()}.{regular}" - - def _populate_view_upstream_lineage(self, engine: sqlalchemy.engine.Engine) -> None: - # NOTE: This query captures only the upstream lineage of a view (with no column lineage). - # For more details see: https://docs.snowflake.com/en/user-guide/object-dependencies.html#object-dependencies - # and also https://docs.snowflake.com/en/sql-reference/account-usage/access_history.html#usage-notes for current limitations on capturing the lineage for views. - view_upstream_lineage_query: str = """ -SELECT - concat( - referenced_database, '.', referenced_schema, - '.', referenced_object_name - ) AS view_upstream, - concat( - referencing_database, '.', referencing_schema, - '.', referencing_object_name - ) AS downstream_view -FROM - snowflake.account_usage.object_dependencies -WHERE - referencing_object_domain in ('VIEW', 'MATERIALIZED VIEW') - """ - - assert self._lineage_map is not None - num_edges: int = 0 - - try: - for db_row in engine.execute(view_upstream_lineage_query): - # Process UpstreamTable/View/ExternalTable/Materialized View->View edge. - view_upstream: str = db_row["view_upstream"].lower() - view_name: str = db_row["downstream_view"].lower() - if not self._is_dataset_allowed(dataset_name=view_name, is_view=True): - continue - # key is the downstream view name - self._lineage_map[view_name].append( - # (, , ) - (view_upstream, "[]", "[]") - ) - num_edges += 1 - logger.debug( - f"Upstream->View: Lineage[View(Down)={view_name}]:Upstream={view_upstream}" - ) - except Exception as e: - self.warn( - logger, - "view_upstream_lineage", - "Extracting the upstream view lineage from Snowflake failed." - + f"Please check your permissions. Continuing...\nError was {e}.", - ) - logger.info(f"A total of {num_edges} View upstream edges found.") - self.report.num_table_to_view_edges_scanned = num_edges - - def _populate_view_downstream_lineage( - self, engine: sqlalchemy.engine.Engine - ) -> None: - # This query captures the downstream table lineage for views. - # See https://docs.snowflake.com/en/sql-reference/account-usage/access_history.html#usage-notes for current limitations on capturing the lineage for views. - # Eg: For viewA->viewB->ViewC->TableD, snowflake does not yet log intermediate view logs, resulting in only the viewA->TableD edge. - view_lineage_query: str = """ -WITH view_lineage_history AS ( - SELECT - vu.value : "objectName" AS view_name, - vu.value : "objectDomain" AS view_domain, - vu.value : "columns" AS view_columns, - w.value : "objectName" AS downstream_table_name, - w.value : "objectDomain" AS downstream_table_domain, - w.value : "columns" AS downstream_table_columns, - t.query_start_time AS query_start_time - FROM - ( - SELECT - * - FROM - snowflake.account_usage.access_history - ) t, - lateral flatten(input => t.DIRECT_OBJECTS_ACCESSED) vu, - lateral flatten(input => t.OBJECTS_MODIFIED) w - WHERE - vu.value : "objectId" IS NOT NULL - AND w.value : "objectId" IS NOT NULL - AND w.value : "objectName" NOT LIKE '%.GE_TMP_%' - AND w.value : "objectName" NOT LIKE '%.GE_TEMP_%' - AND t.query_start_time >= to_timestamp_ltz({start_time_millis}, 3) - AND t.query_start_time < to_timestamp_ltz({end_time_millis}, 3) -) -SELECT - view_name, - view_columns, - downstream_table_name, - downstream_table_columns -FROM - view_lineage_history -WHERE - view_domain in ('View', 'Materialized view') - QUALIFY ROW_NUMBER() OVER ( - PARTITION BY view_name, - downstream_table_name - ORDER BY - query_start_time DESC - ) = 1 - """.format( - start_time_millis=int(self.config.start_time.timestamp() * 1000) - if not self.config.ignore_start_time_lineage - else 0, - end_time_millis=int(self.config.end_time.timestamp() * 1000), - ) - - assert self._lineage_map is not None - self.report.num_view_to_table_edges_scanned = 0 - - try: - db_rows = engine.execute(view_lineage_query) - except Exception as e: - self.warn( - logger, - "view_downstream_lineage", - f"Extracting the view lineage from Snowflake failed." - f"Please check your permissions. Continuing...\nError was {e}.", - ) - else: - for db_row in db_rows: - view_name: str = db_row["view_name"].lower().replace('"', "") - if not self._is_dataset_allowed(dataset_name=view_name, is_view=True): - continue - downstream_table: str = ( - db_row["downstream_table_name"].lower().replace('"', "") - ) - # Capture view->downstream table lineage. - self._lineage_map[downstream_table].append( - # (, , ) - ( - view_name, - db_row["view_columns"], - db_row["downstream_table_columns"], - ) - ) - self.report.num_view_to_table_edges_scanned += 1 - - logger.debug( - f"View->Table: Lineage[Table(Down)={downstream_table}]:View(Up)={self._lineage_map[downstream_table]}" - ) - - logger.info( - f"Found {self.report.num_view_to_table_edges_scanned} View->Table edges." - ) - - def _populate_view_lineage(self) -> None: - if not self.config.include_view_lineage: - return - engine = self.get_metadata_engine(database=None) - self._populate_view_upstream_lineage(engine) - self._populate_view_downstream_lineage(engine) - - def _populate_external_lineage(self) -> None: - engine = self.get_metadata_engine(database=None) - # Handles the case where a table is populated from an external location via copy. - # Eg: copy into category_english from 's3://acryl-snow-demo-olist/olist_raw_data/category_english'credentials=(aws_key_id='...' aws_secret_key='...') pattern='.*.csv'; - query: str = """ - WITH external_table_lineage_history AS ( - SELECT - r.value:"locations" as upstream_locations, - w.value:"objectName" AS downstream_table_name, - w.value:"objectDomain" AS downstream_table_domain, - w.value:"columns" AS downstream_table_columns, - t.query_start_time AS query_start_time - FROM - (SELECT * from snowflake.account_usage.access_history) t, - lateral flatten(input => t.BASE_OBJECTS_ACCESSED) r, - lateral flatten(input => t.OBJECTS_MODIFIED) w - WHERE r.value:"locations" IS NOT NULL - AND w.value:"objectId" IS NOT NULL - AND t.query_start_time >= to_timestamp_ltz({start_time_millis}, 3) - AND t.query_start_time < to_timestamp_ltz({end_time_millis}, 3)) - SELECT upstream_locations, downstream_table_name, downstream_table_columns - FROM external_table_lineage_history - WHERE downstream_table_domain = 'Table' - QUALIFY ROW_NUMBER() OVER (PARTITION BY downstream_table_name ORDER BY query_start_time DESC) = 1""".format( - start_time_millis=int(self.config.start_time.timestamp() * 1000) - if not self.config.ignore_start_time_lineage - else 0, - end_time_millis=int(self.config.end_time.timestamp() * 1000), - ) - - num_edges: int = 0 - self._external_lineage_map = defaultdict(set) - try: - for db_row in engine.execute(query): - # key is the down-stream table name - key: str = db_row[1].lower().replace('"', "") - if not self._is_dataset_allowed(key): - continue - self._external_lineage_map[key] |= {*json.loads(db_row[0])} - logger.debug( - f"ExternalLineage[Table(Down)={key}]:External(Up)={self._external_lineage_map[key]} via access_history" - ) - except Exception as e: - logger.warning( - f"Populating table external lineage from Snowflake failed." - f"Please check your premissions. Continuing...\nError was {e}." - ) - # Handles the case for explicitly created external tables. - # NOTE: Snowflake does not log this information to the access_history table. - external_tables_query: str = "show external tables in account" - try: - for db_row in engine.execute(external_tables_query): - key = ( - f"{db_row.database_name}.{db_row.schema_name}.{db_row.name}".lower() - ) - if not self._is_dataset_allowed(dataset_name=key): - continue - self._external_lineage_map[key].add(db_row.location) - logger.debug( - f"ExternalLineage[Table(Down)={key}]:External(Up)={self._external_lineage_map[key]} via show external tables" - ) - num_edges += 1 - except Exception as e: - self.warn( - logger, - "external_lineage", - f"Populating external table lineage from Snowflake failed." - f"Please check your premissions. Continuing...\nError was {e}.", - ) - logger.info(f"Found {num_edges} external lineage edges.") - self.report.num_external_table_edges_scanned = num_edges - - def _populate_lineage(self) -> None: - engine = self.get_metadata_engine(database=None) - query: str = """ -WITH table_lineage_history AS ( - SELECT - r.value:"objectName" AS upstream_table_name, - r.value:"objectDomain" AS upstream_table_domain, - r.value:"columns" AS upstream_table_columns, - w.value:"objectName" AS downstream_table_name, - w.value:"objectDomain" AS downstream_table_domain, - w.value:"columns" AS downstream_table_columns, - t.query_start_time AS query_start_time - FROM - (SELECT * from snowflake.account_usage.access_history) t, - lateral flatten(input => t.DIRECT_OBJECTS_ACCESSED) r, - lateral flatten(input => t.OBJECTS_MODIFIED) w - WHERE r.value:"objectId" IS NOT NULL - AND w.value:"objectId" IS NOT NULL - AND w.value:"objectName" NOT LIKE '%.GE_TMP_%' - AND w.value:"objectName" NOT LIKE '%.GE_TEMP_%' - AND t.query_start_time >= to_timestamp_ltz({start_time_millis}, 3) - AND t.query_start_time < to_timestamp_ltz({end_time_millis}, 3)) -SELECT upstream_table_name, downstream_table_name, upstream_table_columns, downstream_table_columns -FROM table_lineage_history -WHERE upstream_table_domain in ('Table', 'External table') and downstream_table_domain = 'Table' -QUALIFY ROW_NUMBER() OVER (PARTITION BY downstream_table_name, upstream_table_name ORDER BY query_start_time DESC) = 1 """.format( - start_time_millis=int(self.config.start_time.timestamp() * 1000) - if not self.config.ignore_start_time_lineage - else 0, - end_time_millis=int(self.config.end_time.timestamp() * 1000), - ) - num_edges: int = 0 - self._lineage_map = defaultdict(list) - try: - for db_row in engine.execute(query): - # key is the down-stream table name - key: str = db_row[1].lower().replace('"', "") - upstream_table_name = db_row[0].lower().replace('"', "") - if not ( - self._is_dataset_allowed(key) - or self._is_dataset_allowed(upstream_table_name) - ): - continue - self._lineage_map[key].append( - # (, , ) - (upstream_table_name, db_row[2], db_row[3]) - ) - num_edges += 1 - logger.debug( - f"Lineage[Table(Down)={key}]:Table(Up)={self._lineage_map[key]}" - ) - except Exception as e: - self.warn( - logger, - "lineage", - f"Extracting lineage from Snowflake failed." - f"Please check your premissions. Continuing...\nError was {e}.", - ) - logger.info( - f"A total of {num_edges} Table->Table edges found" - f" for {len(self._lineage_map)} downstream tables.", - ) - self.report.num_table_to_table_edges_scanned = num_edges - - def _get_upstream_lineage_info( - self, dataset_urn: str - ) -> Optional[Tuple[UpstreamLineage, Dict[str, str]]]: - dataset_key = builder.dataset_urn_to_key(dataset_urn) - if dataset_key is None: - logger.warning(f"Invalid dataset urn {dataset_urn}. Could not get key!") - return None - - if self._lineage_map is None: - self._populate_lineage() - self._populate_view_lineage() - if self._external_lineage_map is None: - self._populate_external_lineage() - - assert self._lineage_map is not None - assert self._external_lineage_map is not None - dataset_name = dataset_key.name - lineage = self._lineage_map[dataset_name] - external_lineage = self._external_lineage_map[dataset_name] - if not (lineage or external_lineage): - logger.debug(f"No lineage found for {dataset_name}") - return None - upstream_tables: List[UpstreamClass] = [] - column_lineage: Dict[str, str] = {} - for lineage_entry in lineage: - # Update the table-lineage - upstream_table_name = lineage_entry[0] - if not self._is_dataset_allowed(upstream_table_name): - continue - upstream_table = UpstreamClass( - dataset=builder.make_dataset_urn_with_platform_instance( - self.platform, - upstream_table_name, - self.config.platform_instance, - self.config.env, - ), - type=DatasetLineageTypeClass.TRANSFORMED, - ) - upstream_tables.append(upstream_table) - # Update column-lineage for each down-stream column. - upstream_columns = [ - d["columnName"].lower() for d in json.loads(lineage_entry[1]) - ] - downstream_columns = [ - d["columnName"].lower() for d in json.loads(lineage_entry[2]) - ] - upstream_column_str = ( - f"{upstream_table_name}({', '.join(sorted(upstream_columns))})" - ) - downstream_column_str = ( - f"{dataset_name}({', '.join(sorted(downstream_columns))})" - ) - column_lineage_key = f"column_lineage[{upstream_table_name}]" - column_lineage_value = ( - f"{{{upstream_column_str} -> {downstream_column_str}}}" - ) - column_lineage[column_lineage_key] = column_lineage_value - logger.debug(f"{column_lineage_key}:{column_lineage_value}") - - for external_lineage_entry in external_lineage: - # For now, populate only for S3 - if external_lineage_entry.startswith("s3://"): - external_upstream_table = UpstreamClass( - dataset=make_s3_urn(external_lineage_entry, self.config.env), - type=DatasetLineageTypeClass.COPY, - ) - upstream_tables.append(external_upstream_table) - - if upstream_tables: - logger.debug( - f"Upstream lineage of '{dataset_name}': {[u.dataset for u in upstream_tables]}" - ) - if self.config.upstream_lineage_in_report: - self.report.upstream_lineage[dataset_name] = [ - u.dataset for u in upstream_tables - ] - return UpstreamLineage(upstreams=upstream_tables), column_lineage - return None - - def add_config_to_report(self): - self.report.cleaned_account_id = self.config.get_account() - self.report.ignore_start_time_lineage = self.config.ignore_start_time_lineage - self.report.upstream_lineage_in_report = self.config.upstream_lineage_in_report - if not self.report.ignore_start_time_lineage: - self.report.lineage_start_time = self.config.start_time - self.report.lineage_end_time = self.config.end_time - self.report.check_role_grants = self.config.check_role_grants - if self.config.provision_role is not None: - self.report.run_ingestion = self.config.provision_role.run_ingestion - - def do_provision_role_internal(self): - provision_role_block = self.config.provision_role - if provision_role_block is None: - return - self.report.provision_role_done = not provision_role_block.dry_run - - role = self.config.role - if role is None: - role = "datahub_role" - self.warn( - logger, - "role-grant", - f"role not specified during provision role using {role} as default", - ) - self.report.role = role - - warehouse = self.config.warehouse - - logger.info("Creating connection for provision_role") - engine = self.get_metadata_engine(database=None) - - sqls: List[str] = [] - if provision_role_block.drop_role_if_exists: - sqls.append(f"DROP ROLE IF EXISTS {role}") - - sqls.append(f"CREATE ROLE IF NOT EXISTS {role}") - - if warehouse is None: - self.warn( - logger, "role-grant", "warehouse not specified during provision role" - ) - else: - sqls.append(f"grant operate, usage on warehouse {warehouse} to role {role}") - - for inspector in self.get_inspectors(): - db_name = self.get_db_name(inspector) - sqls.extend( - [ - f"grant usage on DATABASE {db_name} to role {role}", - f"grant usage on all schemas in database {db_name} to role {role}", - f"grant usage on future schemas in database {db_name} to role {role}", - ] - ) - if self.config.profiling.enabled: - sqls.extend( - [ - f"grant select on all tables in database {db_name} to role {role}", - f"grant select on future tables in database {db_name} to role {role}", - f"grant select on all external tables in database {db_name} to role {role}", - f"grant select on future external tables in database {db_name} to role {role}", - f"grant select on all views in database {db_name} to role {role}", - f"grant select on future views in database {db_name} to role {role}", - ] - ) - else: - sqls.extend( - [ - f"grant references on all tables in database {db_name} to role {role}", - f"grant references on future tables in database {db_name} to role {role}", - f"grant references on all external tables in database {db_name} to role {role}", - f"grant references on future external tables in database {db_name} to role {role}", - f"grant references on all views in database {db_name} to role {role}", - f"grant references on future views in database {db_name} to role {role}", - ] - ) - if self.config.username is not None: - sqls.append(f"grant role {role} to user {self.config.username}") - - if self.config.include_table_lineage or self.config.include_view_lineage: - sqls.append( - f"grant imported privileges on database snowflake to role {role}" - ) - - dry_run_str = "[DRY RUN] " if provision_role_block.dry_run else "" - for sql in sqls: - logger.info(f"{dry_run_str} Attempting to run sql {sql}") - if provision_role_block.dry_run: - continue - try: - engine.execute(sql) - except Exception as e: - self.error(logger, "role-grant", f"Exception: {e}") - - self.report.provision_role_success = not provision_role_block.dry_run - - def do_provision_role(self): - if ( - self.config.provision_role is None - or self.config.provision_role.enabled is False - ): - return - - try: - self.provision_role_in_progress = True - self.do_provision_role_internal() - finally: - self.provision_role_in_progress = False - - def should_run_ingestion(self) -> bool: - return ( - self.config.provision_role is None - or self.config.provision_role.enabled is False - or self.config.provision_role.run_ingestion - ) - - # Override the base class method. - def get_workunits(self) -> Iterable[Union[MetadataWorkUnit, SqlWorkUnit]]: - self.add_config_to_report() - - self.do_provision_role() - if not self.should_run_ingestion(): - return - - self.inspect_session_metadata() - - self.inspect_role_grants() - for wu in super().get_workunits(): - if ( - self.config.include_table_lineage - and isinstance(wu, MetadataWorkUnit) - and isinstance(wu.metadata, MetadataChangeEvent) - and isinstance(wu.metadata.proposedSnapshot, DatasetSnapshot) - ): - dataset_snapshot: DatasetSnapshot = wu.metadata.proposedSnapshot - assert dataset_snapshot - # Join the workunit stream from super with the lineage info using the urn. - lineage_info = self._get_upstream_lineage_info(dataset_snapshot.urn) - if lineage_info is not None: - # Emit the lineage work unit - upstream_lineage, upstream_column_props = lineage_info - lineage_mcpw = MetadataChangeProposalWrapper( - entityType="dataset", - changeType=ChangeTypeClass.UPSERT, - entityUrn=dataset_snapshot.urn, - aspectName="upstreamLineage", - aspect=upstream_lineage, - ) - lineage_wu = MetadataWorkUnit( - id=f"{self.platform}-{lineage_mcpw.entityUrn}-{lineage_mcpw.aspectName}", - mcp=lineage_mcpw, - ) - self.report.report_workunit(lineage_wu) - yield lineage_wu - - # Update the super's workunit to include the column-lineage in the custom properties. We need to follow - # the RCU semantics for both the aspects & customProperties in order to preserve the changes made by super. - aspects = dataset_snapshot.aspects - if aspects is None: - aspects = [] - dataset_properties_aspect: Optional[DatasetPropertiesClass] = None - for aspect in aspects: - if isinstance(aspect, DatasetPropertiesClass): - dataset_properties_aspect = aspect - if dataset_properties_aspect is None: - dataset_properties_aspect = DatasetPropertiesClass() - aspects.append(dataset_properties_aspect) - - custom_properties = ( - { - **dataset_properties_aspect.customProperties, - **upstream_column_props, - } - if dataset_properties_aspect.customProperties - else upstream_column_props - ) - dataset_properties_aspect.customProperties = custom_properties - dataset_snapshot.aspects = aspects - - # Emit the work unit from super. - yield wu - - def _is_dataset_allowed( - self, dataset_name: Optional[str], is_view: bool = False - ) -> bool: - # View lineages is not supported. Add the allow/deny pattern for that when it is supported. - if dataset_name is None: - return True - dataset_params = dataset_name.split(".") - if len(dataset_params) != 3: - return True - if ( - not self.config.database_pattern.allowed(dataset_params[0]) - or not self.config.schema_pattern.allowed(dataset_params[1]) - or ( - not is_view and not self.config.table_pattern.allowed(dataset_params[2]) - ) - or (is_view and not self.config.view_pattern.allowed(dataset_params[2])) - ): - return False - return True - - def generate_profile_candidates( - self, inspector: Inspector, threshold_time: Optional[datetime], schema: str - ) -> Optional[List[str]]: - if threshold_time is None: - return None - db_name = self.current_database - if self.profile_candidates.get(db_name) is not None: - # snowflake profile candidates are available at database level, - # no need to regenerate for every schema - return self.profile_candidates[db_name] - self.report.profile_if_updated_since = threshold_time - _profile_candidates = [] - logger.debug(f"Generating profiling candidates for db {db_name}") - db_rows = inspector.engine.execute( - text( - """ -select table_catalog, table_schema, table_name -from information_schema.tables -where last_altered >= to_timestamp_ltz({timestamp}, 3) and table_type= 'BASE TABLE' - """.format( - timestamp=round(threshold_time.timestamp() * 1000) - ) - ) - ) - - for db_row in db_rows: - _profile_candidates.append( - self.get_identifier( - schema=db_row.table_schema, - entity=db_row.table_name, - inspector=inspector, - ).lower() - ) - - self.report.profile_candidates[db_name] = _profile_candidates - self.profile_candidates[db_name] = _profile_candidates - return _profile_candidates - - # Stateful Ingestion specific overrides - # NOTE: There is no special state associated with this source yet than what is provided by sql_common. - def get_platform_instance_id(self) -> str: - """Overrides the source identifier for stateful ingestion.""" - return self.config.get_account() diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_generic.py b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_generic.py index 01fd5d1ebbb03..69e4c97479006 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_generic.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_generic.py @@ -1,3 +1,7 @@ +from dataclasses import dataclass, field +from datetime import datetime +from typing import Generic, List, Optional, TypeVar + from pydantic.fields import Field from datahub.ingestion.api.common import PipelineContext @@ -12,6 +16,42 @@ from datahub.ingestion.source.sql.sql_common import SQLAlchemyConfig, SQLAlchemySource +@dataclass(frozen=True, eq=True) +class BaseColumn: + name: str + ordinal_position: int + is_nullable: bool + data_type: str + comment: Optional[str] + + +SqlTableColumn = TypeVar("SqlTableColumn", bound="BaseColumn") + + +@dataclass +class BaseTable(Generic[SqlTableColumn]): + name: str + comment: Optional[str] + created: datetime + last_altered: Optional[datetime] + size_in_bytes: Optional[int] + rows_count: Optional[int] + columns: List[SqlTableColumn] = field(default_factory=list) + ddl: Optional[str] = None + + +@dataclass +class BaseView(Generic[SqlTableColumn]): + name: str + comment: Optional[str] + created: Optional[datetime] + last_altered: Optional[datetime] + view_definition: str + size_in_bytes: Optional[int] = None + rows_count: Optional[int] = None + columns: List[SqlTableColumn] = field(default_factory=list) + + class SQLAlchemyGenericConfig(SQLAlchemyConfig): platform: str = Field( diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_generic_profiler.py b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_generic_profiler.py new file mode 100644 index 0000000000000..13d6febf9d422 --- /dev/null +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_generic_profiler.py @@ -0,0 +1,165 @@ +import logging +from dataclasses import dataclass, field +from datetime import datetime, timedelta, timezone +from typing import Dict, Iterable, List, Optional, Tuple, Union, cast + +from sqlalchemy import create_engine, inspect +from sqlalchemy.engine.reflection import Inspector + +from datahub.ingestion.source.ge_data_profiler import ( + DatahubGEProfiler, + GEProfilerRequest, +) +from datahub.ingestion.source.sql.sql_common import SQLAlchemyConfig, SQLSourceReport +from datahub.ingestion.source.sql.sql_generic import BaseTable, BaseView +from datahub.metadata.com.linkedin.pegasus2avro.dataset import DatasetProfile +from datahub.metadata.schema_classes import DatasetProfileClass +from datahub.utilities.stats_collections import TopKDict + + +@dataclass +class DetailedProfilerReportMixin: + profiling_skipped_not_updated: TopKDict[str, int] = field(default_factory=TopKDict) + profiling_skipped_size_limit: TopKDict[str, int] = field(default_factory=TopKDict) + + profiling_skipped_row_limit: TopKDict[str, int] = field(default_factory=TopKDict) + num_tables_not_eligible_profiling: Dict[str, int] = field(default_factory=TopKDict) + + +class ProfilingSqlReport(DetailedProfilerReportMixin, SQLSourceReport): + pass + + +@dataclass +class TableProfilerRequest(GEProfilerRequest): + table: Union[BaseTable, BaseView] + profile_table_level_only: bool = False + + +logger = logging.getLogger(__name__) + + +class GenericProfiler: + def __init__( + self, config: SQLAlchemyConfig, report: ProfilingSqlReport, platform: str + ) -> None: + self.config = config + self.report = report + self.platform = platform + + def generate_profiles( + self, + requests: List[TableProfilerRequest], + max_workers: int, + db_name: Optional[str] = None, + platform: Optional[str] = None, + profiler_args: Optional[Dict] = None, + ) -> Iterable[Tuple[GEProfilerRequest, Optional[DatasetProfileClass]]]: + + ge_profile_requests: List[GEProfilerRequest] = [ + cast(GEProfilerRequest, request) + for request in requests + if not request.profile_table_level_only + ] + table_level_profile_requests: List[TableProfilerRequest] = [ + request for request in requests if request.profile_table_level_only + ] + for request in table_level_profile_requests: + profile = DatasetProfile( + timestampMillis=int(datetime.now().timestamp() * 1000), + columnCount=len(request.table.columns), + rowCount=request.table.rows_count, + sizeInBytes=request.table.size_in_bytes, + ) + yield (request, profile) + + if not ge_profile_requests: + return + + # Otherwise, if column level profiling is enabled, use GE profiler. + ge_profiler = self.get_profiler_instance(db_name) + yield from ge_profiler.generate_profiles( + ge_profile_requests, max_workers, platform, profiler_args + ) + + def get_inspectors(self) -> Iterable[Inspector]: + # This method can be overridden in the case that you want to dynamically + # run on multiple databases. + + url = self.config.get_sql_alchemy_url() + logger.debug(f"sql_alchemy_url={url}") + engine = create_engine(url, **self.config.options) + with engine.connect() as conn: + inspector = inspect(conn) + yield inspector + + def get_profiler_instance( + self, db_name: Optional[str] = None + ) -> "DatahubGEProfiler": + logger.debug(f"Getting profiler instance from {self.platform}") + url = self.config.get_sql_alchemy_url() + + logger.debug(f"sql_alchemy_url={url}") + + engine = create_engine(url, **self.config.options) + with engine.connect() as conn: + inspector = inspect(conn) + + return DatahubGEProfiler( + conn=inspector.bind, + report=self.report, + config=self.config.profiling, + platform=self.platform, + ) + + def is_dataset_eligible_for_profiling( + self, + dataset_name: str, + last_altered: Optional[datetime], + size_in_bytes: Optional[int], + rows_count: Optional[int], + ) -> bool: + threshold_time: Optional[datetime] = None + if self.config.profiling.profile_if_updated_since_days is not None: + threshold_time = datetime.now(timezone.utc) - timedelta( + self.config.profiling.profile_if_updated_since_days + ) + + if not self.config.table_pattern.allowed( + dataset_name + ) or not self.config.profile_pattern.allowed(dataset_name): + return False + + schema_name = dataset_name.rsplit(".", 1)[0] + if (threshold_time is not None) and ( + last_altered is not None and last_altered < threshold_time + ): + self.report.profiling_skipped_not_updated[schema_name] = ( + self.report.profiling_skipped_not_updated.get(schema_name, 0) + 1 + ) + return False + + if self.config.profiling.profile_table_size_limit is not None and ( + size_in_bytes is None + or size_in_bytes / (2**30) + > self.config.profiling.profile_table_size_limit + ): + self.report.profiling_skipped_size_limit[schema_name] = ( + self.report.profiling_skipped_size_limit.get(schema_name, 0) + 1 + ) + return False + + if self.config.profiling.profile_table_row_limit is not None and ( + rows_count is None + or rows_count > self.config.profiling.profile_table_row_limit + ): + self.report.profiling_skipped_row_limit[schema_name] = ( + self.report.profiling_skipped_row_limit.get(schema_name, 0) + 1 + ) + return False + + return True + + def get_profile_args(self) -> Dict: + """Passed down to GE profiler""" + return {} diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/vertica.py b/metadata-ingestion/src/datahub/ingestion/source/sql/vertica.py index 86d5943f54264..11b771cbe0209 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/vertica.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/vertica.py @@ -31,9 +31,37 @@ class UUID(String): __visit_name__ = "UUID" +class TIMESTAMP_WITH_PRECISION(TIMESTAMP): + """The SQL TIMESTAMP With Precision type. + + Since Vertica supports precision values for timestamp this allows ingestion + of timestamp fields with precision values. + PS: THIS DATA IS CURRENTLY UNUSED, IT JUST FIXES INGESTION PROBLEMS + TODO: Should research the possibility of reflecting the precision in the schema + + """ + + __visit_name__ = "TIMESTAMP" + + def __init__(self, timezone=False, precision=None): + """Construct a new :class:`_types.TIMESTAMP_WITH_PRECISION`. + + :param timezone: boolean. Indicates that the TIMESTAMP type should + enable timezone support, if available on the target database. + On a per-dialect basis is similar to "TIMESTAMP WITH TIMEZONE". + If the target database does not support timezones, this flag is + ignored. + :param precision: integer. Indicates the PRECISION field when provided + + + """ + super(TIMESTAMP, self).__init__(timezone=timezone) + self.precision = precision + + def TIMESTAMP_WITH_TIMEZONE(*args, **kwargs): kwargs["timezone"] = True - return TIMESTAMP(*args, **kwargs) + return TIMESTAMP_WITH_PRECISION(*args, **kwargs) def TIME_WITH_TIMEZONE(*args, **kwargs): @@ -175,6 +203,7 @@ def _get_column_info( # noqa: C901 break self.ischema_names["UUID"] = UUID + self.ischema_names["TIMESTAMP"] = TIMESTAMP_WITH_PRECISION self.ischema_names["TIMESTAMPTZ"] = TIMESTAMP_WITH_TIMEZONE self.ischema_names["TIMETZ"] = TIME_WITH_TIMEZONE diff --git a/metadata-ingestion/src/datahub/ingestion/source/usage/snowflake_usage.py b/metadata-ingestion/src/datahub/ingestion/source/usage/snowflake_usage.py deleted file mode 100644 index 3bff04234e9c2..0000000000000 --- a/metadata-ingestion/src/datahub/ingestion/source/usage/snowflake_usage.py +++ /dev/null @@ -1,464 +0,0 @@ -import collections -import json -import logging -import time -from datetime import datetime, timezone -from typing import Any, Dict, Iterable, List, Optional, Union, cast - -import pydantic.dataclasses -from more_itertools import partition -from pydantic import BaseModel -from sqlalchemy import create_engine -from sqlalchemy.engine import Engine - -import datahub.emitter.mce_builder as builder -from datahub.configuration.time_window_config import get_time_bucket -from datahub.emitter.mcp import MetadataChangeProposalWrapper -from datahub.ingestion.api.common import PipelineContext -from datahub.ingestion.api.decorators import ( - SupportStatus, - config_class, - platform_name, - support_status, -) -from datahub.ingestion.api.workunit import MetadataWorkUnit -from datahub.ingestion.source.state.redundant_run_skip_handler import ( - RedundantRunSkipHandler, -) -from datahub.ingestion.source.state.stateful_ingestion_base import ( - StatefulIngestionSourceBase, -) -from datahub.ingestion.source.usage.usage_common import GenericAggregatedDataset -from datahub.ingestion.source_config.usage.snowflake_usage import SnowflakeUsageConfig -from datahub.ingestion.source_report.usage.snowflake_usage import SnowflakeUsageReport -from datahub.metadata.schema_classes import ( - ChangeTypeClass, - OperationClass, - OperationTypeClass, -) -from datahub.utilities.perf_timer import PerfTimer -from datahub.utilities.time import datetime_to_ts_millis - -logger = logging.getLogger(__name__) - -SnowflakeTableRef = str -AggregatedDataset = GenericAggregatedDataset[SnowflakeTableRef] -AggregatedAccessEvents = Dict[datetime, Dict[SnowflakeTableRef, AggregatedDataset]] - -SNOWFLAKE_USAGE_SQL_TEMPLATE = """ -SELECT - -- access_history.query_id, -- only for debugging purposes - access_history.query_start_time, - query_history.query_text, - query_history.query_type, - query_history.rows_inserted, - query_history.rows_updated, - query_history.rows_deleted, - access_history.base_objects_accessed, - access_history.direct_objects_accessed, -- when dealing with views, direct objects will show the view while base will show the underlying table - -- query_history.execution_status, -- not really necessary, but should equal "SUCCESS" - -- query_history.warehouse_name, - access_history.user_name, - users.first_name, - users.last_name, - users.display_name, - users.email, - query_history.role_name -FROM - snowflake.account_usage.access_history access_history -LEFT JOIN - snowflake.account_usage.query_history query_history - ON access_history.query_id = query_history.query_id -LEFT JOIN - snowflake.account_usage.users users - ON access_history.user_name = users.name -WHERE query_start_time >= to_timestamp_ltz({start_time_millis}, 3) - AND query_start_time < to_timestamp_ltz({end_time_millis}, 3) -ORDER BY query_start_time DESC -; -""".strip() - -OPERATION_STATEMENT_TYPES = { - "INSERT": OperationTypeClass.INSERT, - "UPDATE": OperationTypeClass.UPDATE, - "DELETE": OperationTypeClass.DELETE, - "CREATE": OperationTypeClass.CREATE, - "CREATE_TABLE": OperationTypeClass.CREATE, - "CREATE_TABLE_AS_SELECT": OperationTypeClass.CREATE, - "CREATE_SCHEMA": OperationTypeClass.CREATE, -} - - -@pydantic.dataclasses.dataclass -class SnowflakeColumnReference: - columnId: int - columnName: str - - -class PermissiveModel(BaseModel): - class Config: - extra = "allow" - - -class SnowflakeObjectAccessEntry(PermissiveModel): - columns: Optional[List[SnowflakeColumnReference]] - objectDomain: str - objectId: int - objectName: str - stageKind: Optional[str] - - -class SnowflakeJoinedAccessEvent(PermissiveModel): - query_start_time: datetime - query_text: str - query_type: str - rows_inserted: Optional[int] - rows_updated: Optional[int] - rows_deleted: Optional[int] - base_objects_accessed: List[SnowflakeObjectAccessEntry] - direct_objects_accessed: List[SnowflakeObjectAccessEntry] - - user_name: str - first_name: Optional[str] - last_name: Optional[str] - display_name: Optional[str] - email: str - role_name: str - - -@platform_name("Snowflake") -@support_status(SupportStatus.CERTIFIED) -@config_class(SnowflakeUsageConfig) -class SnowflakeUsageSource(StatefulIngestionSourceBase): - def __init__(self, config: SnowflakeUsageConfig, ctx: PipelineContext): - super(SnowflakeUsageSource, self).__init__(config, ctx) - self.config: SnowflakeUsageConfig = config - self.report: SnowflakeUsageReport = SnowflakeUsageReport() - # Create and register the stateful ingestion use-case handlers. - self.redundant_run_skip_handler = RedundantRunSkipHandler( - source=self, - config=self.config, - pipeline_name=self.ctx.pipeline_name, - run_id=self.ctx.run_id, - ) - - @classmethod - def create(cls, config_dict, ctx): - config = SnowflakeUsageConfig.parse_obj(config_dict) - return cls(config, ctx) - - def get_platform_instance_id(self) -> str: - return self.config.get_account() - - def check_email_domain_missing(self) -> Any: - if self.config.email_domain is not None and self.config.email_domain != "": - return - - self.warn( - logger, - "missing-email-domain", - "User's without email address will be ignored from usage if you don't set email_domain property", - ) - - def add_config_to_report(self): - self.report.window_start_time = self.config.start_time - self.report.window_end_time = self.config.end_time - - def get_workunits(self) -> Iterable[MetadataWorkUnit]: - self.add_config_to_report() - self.check_email_domain_missing() - if not self.redundant_run_skip_handler.should_skip_this_run( - cur_start_time_millis=datetime_to_ts_millis(self.config.start_time) - ): - # Generate the workunits. - access_events = self._get_snowflake_history() - aggregated_info_items_raw, operation_aspect_work_units_raw = partition( - lambda x: isinstance(x, MetadataWorkUnit), - self._aggregate_access_events(access_events), - ) - for wu in cast(Iterable[MetadataWorkUnit], operation_aspect_work_units_raw): - self.report.report_workunit(wu) - yield wu - aggregated_info_items = list(aggregated_info_items_raw) - assert len(aggregated_info_items) == 1 - - for time_bucket in cast( - AggregatedAccessEvents, aggregated_info_items[0] - ).values(): - for aggregate in time_bucket.values(): - wu = self._make_usage_stat(aggregate) - self.report.report_workunit(wu) - yield wu - # Update checkpoint state for this run. - self.redundant_run_skip_handler.update_state( - start_time_millis=datetime_to_ts_millis(self.config.start_time), - end_time_millis=datetime_to_ts_millis(self.config.end_time), - ) - - def _make_usage_query(self) -> str: - start_time = datetime_to_ts_millis(self.config.start_time) - end_time = datetime_to_ts_millis(self.config.end_time) - return SNOWFLAKE_USAGE_SQL_TEMPLATE.format( - start_time_millis=start_time, - end_time_millis=end_time, - ) - - def _make_sql_engine(self) -> Engine: - url = self.config.get_sql_alchemy_url() - logger.debug(f"sql_alchemy_url={url}") - engine = create_engine( - url, - **self.config.get_options(), - ) - return engine - - def _check_usage_date_ranges(self, engine: Engine) -> Any: - - query = """ - select - min(query_start_time) as min_time, - max(query_start_time) as max_time - from snowflake.account_usage.access_history - """ - with PerfTimer() as timer: - try: - for db_row in engine.execute(query): - if len(db_row) < 2 or db_row[0] is None or db_row[1] is None: - self.warn( - logger, - "check-usage-data", - f"Missing data for access_history {db_row} - Check if using Enterprise edition of Snowflake", - ) - continue - self.report.min_access_history_time = db_row[0].astimezone( - tz=timezone.utc - ) - self.report.max_access_history_time = db_row[1].astimezone( - tz=timezone.utc - ) - self.report.access_history_range_query_secs = round( - timer.elapsed_seconds(), 2 - ) - except Exception as e: - self.error(logger, "check-usage-data", f"Error was {e}") - - def _is_unsupported_object_accessed(self, obj: Dict[str, Any]) -> bool: - unsupported_keys = ["locations"] - - if obj.get("objectDomain") in ["Stage"]: - return True - - return any([obj.get(key) is not None for key in unsupported_keys]) - - def _is_object_valid(self, obj: Dict[str, Any]) -> bool: - if self._is_unsupported_object_accessed( - obj - ) or not self._is_dataset_pattern_allowed( - obj.get("objectName"), obj.get("objectDomain") - ): - return False - return True - - def _is_dataset_pattern_allowed( - self, dataset_name: Optional[Any], dataset_type: Optional[Any] - ) -> bool: - if not dataset_type or not dataset_name: - return True - dataset_params = dataset_name.split(".") - if len(dataset_params) != 3: - self.warn( - logger, - "invalid-dataset-pattern", - f"Found {dataset_params} of type {dataset_type}", - ) - return False - if not self.config.database_pattern.allowed( - dataset_params[0] - ) or not self.config.schema_pattern.allowed(dataset_params[1]): - return False - - if dataset_type.lower() in {"table"} and not self.config.table_pattern.allowed( - dataset_params[2] - ): - return False - - if dataset_type.lower() in { - "view", - "materialized_view", - } and not self.config.view_pattern.allowed(dataset_params[2]): - return False - - return True - - def _process_snowflake_history_row( - self, row: Any - ) -> Iterable[SnowflakeJoinedAccessEvent]: - self.report.rows_processed += 1 - # Make some minor type conversions. - if hasattr(row, "_asdict"): - # Compat with SQLAlchemy 1.3 and 1.4 - # See https://docs.sqlalchemy.org/en/14/changelog/migration_14.html#rowproxy-is-no-longer-a-proxy-is-now-called-row-and-behaves-like-an-enhanced-named-tuple. - event_dict = row._asdict() - else: - event_dict = dict(row) - - # no use processing events that don't have a query text - if not event_dict["query_text"]: - self.report.rows_missing_query_text += 1 - return - - event_dict["base_objects_accessed"] = [ - obj - for obj in json.loads(event_dict["base_objects_accessed"]) - if self._is_object_valid(obj) - ] - if len(event_dict["base_objects_accessed"]) == 0: - self.report.rows_zero_base_objects_accessed += 1 - - event_dict["direct_objects_accessed"] = [ - obj - for obj in json.loads(event_dict["direct_objects_accessed"]) - if self._is_object_valid(obj) - ] - if len(event_dict["direct_objects_accessed"]) == 0: - self.report.rows_zero_direct_objects_accessed += 1 - - event_dict["query_start_time"] = (event_dict["query_start_time"]).astimezone( - tz=timezone.utc - ) - - if not event_dict["email"] and self.config.email_domain: - if not event_dict["user_name"]: - self.report.report_warning("user-name-miss", f"Missing in {event_dict}") - logger.warning( - f"The user_name is missing from {event_dict}. Skipping ...." - ) - self.report.rows_missing_email += 1 - return - - event_dict[ - "email" - ] = f'{event_dict["user_name"]}@{self.config.email_domain}'.lower() - - try: # big hammer try block to ensure we don't fail on parsing events - event = SnowflakeJoinedAccessEvent(**event_dict) - yield event - except Exception as e: - self.report.rows_parsing_error += 1 - self.warn(logger, "usage", f"Failed to parse usage line {event_dict}, {e}") - - def _get_snowflake_history(self) -> Iterable[SnowflakeJoinedAccessEvent]: - engine = self._make_sql_engine() - - logger.info("Checking usage date ranges") - self._check_usage_date_ranges(engine) - - if ( - self.report.min_access_history_time is None - or self.report.max_access_history_time is None - ): - return - - logger.info("Getting usage history") - with PerfTimer() as timer: - query = self._make_usage_query() - results = engine.execute(query) - self.report.access_history_query_secs = round(timer.elapsed_seconds(), 2) - - for row in results: - yield from self._process_snowflake_history_row(row) - - def _get_operation_aspect_work_unit( - self, event: SnowflakeJoinedAccessEvent - ) -> Iterable[MetadataWorkUnit]: - if event.query_start_time and event.query_type in OPERATION_STATEMENT_TYPES: - start_time = event.query_start_time - query_type = event.query_type - user_email = event.email - operation_type = OPERATION_STATEMENT_TYPES[query_type] - reported_time: int = int(time.time() * 1000) - last_updated_timestamp: int = datetime_to_ts_millis(start_time) - user_urn = builder.make_user_urn(user_email.split("@")[0]) - for obj in event.base_objects_accessed: - resource = obj.objectName - dataset_urn = builder.make_dataset_urn_with_platform_instance( - "snowflake", - resource.lower(), - self.config.platform_instance, - self.config.env, - ) - operation_aspect = OperationClass( - timestampMillis=reported_time, - lastUpdatedTimestamp=last_updated_timestamp, - actor=user_urn, - operationType=operation_type, - ) - mcp = MetadataChangeProposalWrapper( - entityType="dataset", - aspectName="operation", - changeType=ChangeTypeClass.UPSERT, - entityUrn=dataset_urn, - aspect=operation_aspect, - ) - wu = MetadataWorkUnit( - id=f"{start_time.isoformat()}-operation-aspect-{resource}", - mcp=mcp, - ) - yield wu - - def _aggregate_access_events( - self, events: Iterable[SnowflakeJoinedAccessEvent] - ) -> Iterable[Union[AggregatedAccessEvents, MetadataWorkUnit]]: - """ - Emits aggregated access events combined with operational workunits from the events. - """ - datasets: AggregatedAccessEvents = collections.defaultdict(dict) - - for event in events: - floored_ts = get_time_bucket( - event.query_start_time, self.config.bucket_duration - ) - - accessed_data = ( - event.base_objects_accessed - if self.config.apply_view_usage_to_tables - else event.direct_objects_accessed - ) - for object in accessed_data: - resource = object.objectName - agg_bucket = datasets[floored_ts].setdefault( - resource, - AggregatedDataset( - bucket_start_time=floored_ts, - resource=resource, - user_email_pattern=self.config.user_email_pattern, - ), - ) - agg_bucket.add_read_entry( - event.email, - event.query_text, - [colRef.columnName.lower() for colRef in object.columns] - if object.columns is not None - else [], - ) - if self.config.include_operational_stats: - yield from self._get_operation_aspect_work_unit(event) - - yield datasets - - def _make_usage_stat(self, agg: AggregatedDataset) -> MetadataWorkUnit: - return agg.make_usage_workunit( - self.config.bucket_duration, - lambda resource: builder.make_dataset_urn_with_platform_instance( - "snowflake", - resource.lower(), - self.config.platform_instance, - self.config.env, - ), - self.config.top_n_queries, - self.config.format_sql_queries, - self.config.include_top_n_queries, - ) - - def get_report(self): - return self.report diff --git a/metadata-ingestion/src/datahub/ingestion/source_config/sql/snowflake.py b/metadata-ingestion/src/datahub/ingestion/source_config/sql/snowflake.py index 46a40cb9902c4..fa1e7462f5515 100644 --- a/metadata-ingestion/src/datahub/ingestion/source_config/sql/snowflake.py +++ b/metadata-ingestion/src/datahub/ingestion/source_config/sql/snowflake.py @@ -41,6 +41,8 @@ "OAUTH_AUTHENTICATOR": OAUTH_AUTHENTICATOR, } +SNOWFLAKE_HOST_SUFFIX = ".snowflakecomputing.com" + class SnowflakeProvisionRoleConfig(ConfigModel): enabled: bool = pydantic.Field( @@ -105,15 +107,21 @@ class BaseSnowflakeConfig(BaseTimeWindowConfig): password: Optional[pydantic.SecretStr] = pydantic.Field( default=None, exclude=True, description="Snowflake password." ) + private_key: Optional[str] = pydantic.Field( + default=None, + description="Private key in a form of '-----BEGIN PRIVATE KEY-----\\nprivate-key\\n-----END PRIVATE KEY-----\\n' if using key pair authentication. Encrypted version of private key will be in a form of '-----BEGIN ENCRYPTED PRIVATE KEY-----\\nencrypted-private-key\\n-----END ECNCRYPTED PRIVATE KEY-----\\n' See: https://docs.snowflake.com/en/user-guide/key-pair-auth.html", + ) + private_key_path: Optional[str] = pydantic.Field( default=None, - description="The path to the private key if using key pair authentication. See: https://docs.snowflake.com/en/user-guide/key-pair-auth.html", + description="The path to the private key if using key pair authentication. Ignored if `private_key` is set. See: https://docs.snowflake.com/en/user-guide/key-pair-auth.html", ) private_key_password: Optional[pydantic.SecretStr] = pydantic.Field( default=None, exclude=True, - description="Password for your private key if using key pair authentication.", + description="Password for your private key. Required if using key pair authentication with encrypted private key.", ) + oauth_config: Optional[OauthConfiguration] = pydantic.Field( default=None, description="oauth configuration - https://docs.snowflake.com/en/user-guide/python-connector-example.html#connecting-with-oauth", @@ -126,7 +134,7 @@ class BaseSnowflakeConfig(BaseTimeWindowConfig): description="DEPRECATED: Snowflake account. e.g. abc48144" ) # Deprecated account_id: Optional[str] = pydantic.Field( - description="Snowflake account identifier. e.g. xy12345, xy12345.us-east-2.aws, xy12345.us-central1.gcp, xy12345.central-us.azure. Refer [Account Identifiers](https://docs.snowflake.com/en/user-guide/admin-account-identifier.html#format-2-legacy-account-locator-in-a-region) for more details." + description="Snowflake account identifier. e.g. xy12345, xy12345.us-east-2.aws, xy12345.us-central1.gcp, xy12345.central-us.azure, xy12345.us-west-2.privatelink. Refer [Account Identifiers](https://docs.snowflake.com/en/user-guide/admin-account-identifier.html#format-2-legacy-account-locator-in-a-region) for more details." ) # Once host_port is removed this will be made mandatory warehouse: Optional[str] = pydantic.Field(description="Snowflake warehouse.") role: Optional[str] = pydantic.Field(description="Snowflake role.") @@ -161,9 +169,9 @@ def one_of_host_port_or_account_id_is_required(cls, values): ) host_port = remove_protocol(host_port) host_port = remove_trailing_slashes(host_port) - host_port = remove_suffix(host_port, ".snowflakecomputing.com") + host_port = remove_suffix(host_port, SNOWFLAKE_HOST_SUFFIX) values["host_port"] = host_port - account_id = values.get("account_id") + account_id: Optional[str] = values.get("account_id") if account_id is None: if host_port is None: raise ConfigurationError( @@ -171,6 +179,14 @@ def one_of_host_port_or_account_id_is_required(cls, values): ) else: values["account_id"] = host_port + else: + account_id = remove_protocol(account_id) + account_id = remove_trailing_slashes(account_id) + account_id = remove_suffix(account_id, SNOWFLAKE_HOST_SUFFIX) + if account_id != values["account_id"]: + logger.info(f"Using {account_id} as `account_id`.") + values["account_id"] = account_id + return values @pydantic.validator("authentication_type", always=True) @@ -182,10 +198,13 @@ def authenticator_type_is_valid(cls, v, values, field): ) if v == "KEY_PAIR_AUTHENTICATOR": # If we are using key pair auth, we need the private key path and password to be set - if values.get("private_key_path") is None: + if ( + values.get("private_key") is None + and values.get("private_key_path") is None + ): raise ValueError( - f"'private_key_path' was none " - f"but should be set when using {v} authentication" + f"Both `private_key` and `private_key_path` are none. " + f"At least one should be set when using {v} authentication" ) elif v == "OAUTH_AUTHENTICATOR": if values.get("oauth_config") is None: @@ -275,16 +294,22 @@ def get_sql_alchemy_connect_args(self) -> dict: if self.authentication_type != "KEY_PAIR_AUTHENTICATOR": return {} if self.connect_args is None: - if self.private_key_path is None: - raise ValueError("missing required private key path to read key from") - if self.private_key_password is None: - raise ValueError("missing required private key password") - with open(self.private_key_path, "rb") as key: - p_key = serialization.load_pem_private_key( - key.read(), - password=self.private_key_password.get_secret_value().encode(), - backend=default_backend(), - ) + if self.private_key is not None: + pkey_bytes = self.private_key.replace("\\n", "\n").encode() + else: + assert ( + self.private_key_path + ), "missing required private key path to read key from" + with open(self.private_key_path, "rb") as key: + pkey_bytes = key.read() + + p_key = serialization.load_pem_private_key( + pkey_bytes, + password=self.private_key_password.get_secret_value().encode() + if self.private_key_password is not None + else None, + backend=default_backend(), + ) pkb = p_key.private_bytes( encoding=serialization.Encoding.DER, diff --git a/metadata-ingestion/src/datahub/ingestion/source_report/sql/snowflake.py b/metadata-ingestion/src/datahub/ingestion/source_report/sql/snowflake.py index 89c715efb1130..7862e0780a9e7 100644 --- a/metadata-ingestion/src/datahub/ingestion/source_report/sql/snowflake.py +++ b/metadata-ingestion/src/datahub/ingestion/source_report/sql/snowflake.py @@ -2,7 +2,7 @@ from datetime import datetime from typing import Dict, List, Optional -from datahub.ingestion.source.sql.sql_common import SQLSourceReport +from datahub.ingestion.source.sql.sql_generic_profiler import ProfilingSqlReport from datahub.ingestion.source_report.time_window import BaseTimeWindowReport @@ -12,7 +12,7 @@ class BaseSnowflakeReport(BaseTimeWindowReport): @dataclass -class SnowflakeReport(BaseSnowflakeReport, SQLSourceReport): +class SnowflakeReport(BaseSnowflakeReport, ProfilingSqlReport): num_table_to_table_edges_scanned: int = 0 num_table_to_view_edges_scanned: int = 0 num_view_to_table_edges_scanned: int = 0 diff --git a/metadata-ingestion/src/datahub/ingestion/transformer/transform_registry.py b/metadata-ingestion/src/datahub/ingestion/transformer/transform_registry.py index 12662f55e0399..ab24b7f19ae5c 100644 --- a/metadata-ingestion/src/datahub/ingestion/transformer/transform_registry.py +++ b/metadata-ingestion/src/datahub/ingestion/transformer/transform_registry.py @@ -1,74 +1,26 @@ from datahub.ingestion.api.registry import PluginRegistry from datahub.ingestion.api.transform import Transformer -from datahub.ingestion.transformer import dataset_domain -from datahub.ingestion.transformer.add_dataset_browse_path import ( - AddDatasetBrowsePathTransformer, -) -from datahub.ingestion.transformer.add_dataset_ownership import ( - AddDatasetOwnership, - PatternAddDatasetOwnership, - SimpleAddDatasetOwnership, -) -from datahub.ingestion.transformer.add_dataset_properties import ( - AddDatasetProperties, - SimpleAddDatasetProperties, -) -from datahub.ingestion.transformer.add_dataset_schema_tags import ( - PatternAddDatasetSchemaTags, -) -from datahub.ingestion.transformer.add_dataset_schema_terms import ( - PatternAddDatasetSchemaTerms, -) -from datahub.ingestion.transformer.add_dataset_tags import ( - AddDatasetTags, - PatternAddDatasetTags, - SimpleAddDatasetTags, -) -from datahub.ingestion.transformer.add_dataset_terms import ( - AddDatasetTerms, - PatternAddDatasetTerms, - SimpleAddDatasetTerms, -) -from datahub.ingestion.transformer.mark_dataset_status import MarkDatasetStatus -from datahub.ingestion.transformer.remove_dataset_ownership import ( - SimpleRemoveDatasetOwnership, -) transform_registry = PluginRegistry[Transformer]() - -transform_registry.register( - "simple_remove_dataset_ownership", SimpleRemoveDatasetOwnership -) -transform_registry.register("mark_dataset_status", MarkDatasetStatus) -transform_registry.register("set_dataset_browse_path", AddDatasetBrowsePathTransformer) - -transform_registry.register("add_dataset_ownership", AddDatasetOwnership) -transform_registry.register("simple_add_dataset_ownership", SimpleAddDatasetOwnership) -transform_registry.register("pattern_add_dataset_ownership", PatternAddDatasetOwnership) - -transform_registry.register("add_dataset_domain", dataset_domain.AddDatasetDomain) -transform_registry.register( - "simple_add_dataset_domain", dataset_domain.SimpleAddDatasetDomain -) -transform_registry.register( - "pattern_add_dataset_domain", dataset_domain.PatternAddDatasetDomain -) - - -transform_registry.register("add_dataset_tags", AddDatasetTags) -transform_registry.register("simple_add_dataset_tags", SimpleAddDatasetTags) -transform_registry.register("pattern_add_dataset_tags", PatternAddDatasetTags) - -transform_registry.register("add_dataset_terms", AddDatasetTerms) -transform_registry.register("simple_add_dataset_terms", SimpleAddDatasetTerms) -transform_registry.register("pattern_add_dataset_terms", PatternAddDatasetTerms) - -transform_registry.register("add_dataset_properties", AddDatasetProperties) -transform_registry.register("simple_add_dataset_properties", SimpleAddDatasetProperties) - -transform_registry.register( - "pattern_add_dataset_schema_terms", PatternAddDatasetSchemaTerms -) -transform_registry.register( - "pattern_add_dataset_schema_tags", PatternAddDatasetSchemaTags -) +transform_registry.register_from_entrypoint("datahub.ingestion.transformer.plugins") + +# These transformers are always enabled +assert transform_registry.get("simple_remove_dataset_ownership") +assert transform_registry.get("mark_dataset_status") +assert transform_registry.get("set_dataset_browse_path") +assert transform_registry.get("add_dataset_ownership") +assert transform_registry.get("simple_add_dataset_ownership") +assert transform_registry.get("pattern_add_dataset_ownership") +assert transform_registry.get("add_dataset_domain") +assert transform_registry.get("simple_add_dataset_domain") +assert transform_registry.get("pattern_add_dataset_domain") +assert transform_registry.get("add_dataset_tags") +assert transform_registry.get("simple_add_dataset_tags") +assert transform_registry.get("pattern_add_dataset_tags") +assert transform_registry.get("add_dataset_terms") +assert transform_registry.get("simple_add_dataset_terms") +assert transform_registry.get("pattern_add_dataset_terms") +assert transform_registry.get("add_dataset_properties") +assert transform_registry.get("simple_add_dataset_properties") +assert transform_registry.get("pattern_add_dataset_schema_terms") +assert transform_registry.get("pattern_add_dataset_schema_tags") diff --git a/metadata-ingestion/src/datahub/utilities/bigquery_sql_parser.py b/metadata-ingestion/src/datahub/utilities/bigquery_sql_parser.py index f84fe6cd7cb96..ca23a60fab8ae 100644 --- a/metadata-ingestion/src/datahub/utilities/bigquery_sql_parser.py +++ b/metadata-ingestion/src/datahub/utilities/bigquery_sql_parser.py @@ -9,11 +9,11 @@ class BigQuerySQLParser(SQLParser): parser: SQLParser - def __init__(self, sql_query: str) -> None: + def __init__(self, sql_query: str, use_external_process: bool = False) -> None: super().__init__(sql_query) self._parsed_sql_query = self.parse_sql_query(sql_query) - self.parser = SqlLineageSQLParser(self._parsed_sql_query) + self.parser = SqlLineageSQLParser(self._parsed_sql_query, use_external_process) def parse_sql_query(self, sql_query: str) -> str: sql_query = BigQuerySQLParser._parse_bigquery_comment_sign(sql_query) diff --git a/metadata-ingestion/src/datahub/utilities/sample_data.py b/metadata-ingestion/src/datahub/utilities/sample_data.py new file mode 100644 index 0000000000000..12810d23f9bff --- /dev/null +++ b/metadata-ingestion/src/datahub/utilities/sample_data.py @@ -0,0 +1,23 @@ +import os +import pathlib +import tempfile + +import requests + +DOCKER_COMPOSE_BASE = os.getenv( + "DOCKER_COMPOSE_BASE", + "https://raw.githubusercontent.com/datahub-project/datahub/master", +) +BOOTSTRAP_MCES_FILE = "metadata-ingestion/examples/mce_files/bootstrap_mce.json" +BOOTSTRAP_MCES_URL = f"{DOCKER_COMPOSE_BASE}/{BOOTSTRAP_MCES_FILE}" + + +def download_sample_data() -> str: + with tempfile.NamedTemporaryFile(suffix=".json", delete=False) as tmp_file: + path = str(pathlib.Path(tmp_file.name)) + + # Download the bootstrap MCE file from GitHub. + mce_json_download_response = requests.get(BOOTSTRAP_MCES_URL) + mce_json_download_response.raise_for_status() + tmp_file.write(mce_json_download_response.content) + return path diff --git a/metadata-ingestion/src/datahub/utilities/urn_encoder.py b/metadata-ingestion/src/datahub/utilities/urn_encoder.py index 68212784da33c..706d50d942055 100644 --- a/metadata-ingestion/src/datahub/utilities/urn_encoder.py +++ b/metadata-ingestion/src/datahub/utilities/urn_encoder.py @@ -3,7 +3,8 @@ # NOTE: Frontend relies on encoding these three characters. Specifically, we decode and encode schema fields for column level lineage. # If this changes, make appropriate changes to datahub-web-react/src/app/lineage/utils/columnLineageUtils.ts -RESERVED_CHARS = [",", "(", ")"] +RESERVED_CHARS = {",", "(", ")"} +RESERVED_CHARS_EXTENDED = RESERVED_CHARS.union({"%"}) class UrnEncoder: @@ -19,3 +20,7 @@ def encode_string(s: str) -> str: def encode_char(c: str) -> str: assert len(c) == 1, "Invalid input, Expected single character" return urllib.parse.quote(c) if c in RESERVED_CHARS else c + + @staticmethod + def contains_reserved_char(value: str) -> bool: + return bool(set(value).intersection(RESERVED_CHARS_EXTENDED)) diff --git a/metadata-ingestion/tests/integration/business-glossary/business_glossary.yml b/metadata-ingestion/tests/integration/business-glossary/business_glossary.yml new file mode 100644 index 0000000000000..9550960282872 --- /dev/null +++ b/metadata-ingestion/tests/integration/business-glossary/business_glossary.yml @@ -0,0 +1,72 @@ +version: 1 +source: DataHub +owners: + users: + - mjames +url: "https://github.com/datahub-project/datahub/" +nodes: + - name: Classification + description: A set of terms related to Data Classification + knowledge_links: + - label: Wiki link for classification + url: "https://en.wikipedia.org/wiki/Classification" + terms: + - name: Sensitive + description: Sensitive Data + custom_properties: + is_confidential: false + knowledge_links: + - label: Google Link + url: "https://www.google.com" + - name: Confidential + description: Confidential Data + custom_properties: + is_confidential: true + - name: Highly Confidential + description: Highly Confidential Data + custom_properties: + is_confidential: true + - name: Personal Information + description: All terms related to personal information + owners: + users: + - mjames + terms: + - name: Email + description: An individual's email address + inherits: + - Classification.Confidential + owners: + groups: + - Trust and Safety + - name: Address + description: A physical address + - name: Gender + description: The gender identity of the individual + inherits: + - Classification.Sensitive + - name: Clients And Accounts + description: Provides basic concepts such as account, account holder, account provider, relationship manager that are commonly used by financial services providers to describe customers and to determine counterparty identities + owners: + groups: + - finance + terms: + - name: Account + description: Container for records associated with a business arrangement for regular transactions and services + term_source: "EXTERNAL" + source_ref: FIBO + source_url: "https://spec.edmcouncil.org/fibo/ontology/FBC/ProductsAndServices/ClientsAndAccounts/Account" + inherits: + - Classification.Highly Confidential + contains: + - Clients And Accounts.Balance + - name: Balance + description: Amount of money available or owed + term_source: "EXTERNAL" + source_ref: FIBO + source_url: "https://spec.edmcouncil.org/fibo/ontology/FBC/ProductsAndServices/ClientsAndAccounts/Balance" + - name: KPIs + description: Common Business KPIs + terms: + - name: CSAT % + description: Customer Satisfaction Score diff --git a/metadata-ingestion/tests/integration/business-glossary/glossary_events_golden.json b/metadata-ingestion/tests/integration/business-glossary/glossary_events_golden.json new file mode 100644 index 0000000000000..8bd977993dca7 --- /dev/null +++ b/metadata-ingestion/tests/integration/business-glossary/glossary_events_golden.json @@ -0,0 +1,554 @@ +[ +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.GlossaryNodeSnapshot": { + "urn": "urn:li:glossaryNode:Classification", + "aspects": [ + { + "com.linkedin.pegasus2avro.glossary.GlossaryNodeInfo": { + "definition": "A set of terms related to Data Classification", + "name": "Classification" + } + }, + { + "com.linkedin.pegasus2avro.common.Ownership": { + "owners": [ + { + "owner": "urn:li:corpuser:mjames", + "type": "DEVELOPER" + } + ], + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + } + }, + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "datahub-business-glossary-2020_04_14-07_00_00" + } +}, +{ + "entityType": "glossaryNode", + "entityUrn": "urn:li:glossaryNode:Classification", + "changeType": "UPSERT", + "aspectName": "institutionalMemory", + "aspect": { + "value": "{\"elements\": [{\"url\": \"https://en.wikipedia.org/wiki/Classification\", \"description\": \"Wiki link for classification\", \"createStamp\": {\"time\": 1586847600000, \"actor\": \"urn:li:corpuser:datahub\", \"message\": \"ingestion bot\"}}]}", + "contentType": "application/json" + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "datahub-business-glossary-2020_04_14-07_00_00" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.GlossaryTermSnapshot": { + "urn": "urn:li:glossaryTerm:Classification.Sensitive", + "aspects": [ + { + "com.linkedin.pegasus2avro.glossary.GlossaryTermInfo": { + "customProperties": { + "is_confidential": "False" + }, + "name": "Sensitive", + "definition": "Sensitive Data", + "parentNode": "urn:li:glossaryNode:Classification", + "termSource": "INTERNAL", + "sourceRef": "DataHub", + "sourceUrl": "https://github.com/datahub-project/datahub/" + } + }, + { + "com.linkedin.pegasus2avro.common.Ownership": { + "owners": [ + { + "owner": "urn:li:corpuser:mjames", + "type": "DEVELOPER" + } + ], + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "datahub-business-glossary-2020_04_14-07_00_00" + } +}, +{ + "entityType": "glossaryTerm", + "entityUrn": "urn:li:glossaryTerm:Classification.Sensitive", + "changeType": "UPSERT", + "aspectName": "institutionalMemory", + "aspect": { + "value": "{\"elements\": [{\"url\": \"https://www.google.com\", \"description\": \"Google Link\", \"createStamp\": {\"time\": 1586847600000, \"actor\": \"urn:li:corpuser:datahub\", \"message\": \"ingestion bot\"}}]}", + "contentType": "application/json" + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "datahub-business-glossary-2020_04_14-07_00_00" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.GlossaryTermSnapshot": { + "urn": "urn:li:glossaryTerm:Classification.Confidential", + "aspects": [ + { + "com.linkedin.pegasus2avro.glossary.GlossaryTermInfo": { + "customProperties": { + "is_confidential": "True" + }, + "name": "Confidential", + "definition": "Confidential Data", + "parentNode": "urn:li:glossaryNode:Classification", + "termSource": "INTERNAL", + "sourceRef": "DataHub", + "sourceUrl": "https://github.com/datahub-project/datahub/" + } + }, + { + "com.linkedin.pegasus2avro.common.Ownership": { + "owners": [ + { + "owner": "urn:li:corpuser:mjames", + "type": "DEVELOPER" + } + ], + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "datahub-business-glossary-2020_04_14-07_00_00" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.GlossaryTermSnapshot": { + "urn": "urn:li:glossaryTerm:Classification.Highly Confidential", + "aspects": [ + { + "com.linkedin.pegasus2avro.glossary.GlossaryTermInfo": { + "customProperties": { + "is_confidential": "True" + }, + "name": "Highly Confidential", + "definition": "Highly Confidential Data", + "parentNode": "urn:li:glossaryNode:Classification", + "termSource": "INTERNAL", + "sourceRef": "DataHub", + "sourceUrl": "https://github.com/datahub-project/datahub/" + } + }, + { + "com.linkedin.pegasus2avro.common.Ownership": { + "owners": [ + { + "owner": "urn:li:corpuser:mjames", + "type": "DEVELOPER" + } + ], + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "datahub-business-glossary-2020_04_14-07_00_00" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.GlossaryNodeSnapshot": { + "urn": "urn:li:glossaryNode:Personal Information", + "aspects": [ + { + "com.linkedin.pegasus2avro.glossary.GlossaryNodeInfo": { + "definition": "All terms related to personal information", + "name": "Personal Information" + } + }, + { + "com.linkedin.pegasus2avro.common.Ownership": { + "owners": [ + { + "owner": "urn:li:corpuser:mjames", + "type": "DEVELOPER" + } + ], + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + } + }, + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "datahub-business-glossary-2020_04_14-07_00_00" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.GlossaryTermSnapshot": { + "urn": "urn:li:glossaryTerm:Personal Information.Email", + "aspects": [ + { + "com.linkedin.pegasus2avro.glossary.GlossaryTermInfo": { + "customProperties": {}, + "name": "Email", + "definition": "An individual's email address", + "parentNode": "urn:li:glossaryNode:Personal Information", + "termSource": "INTERNAL", + "sourceRef": "DataHub", + "sourceUrl": "https://github.com/datahub-project/datahub/" + } + }, + { + "com.linkedin.pegasus2avro.glossary.GlossaryRelatedTerms": { + "isRelatedTerms": [ + "urn:li:glossaryTerm:Classification.Confidential" + ] + } + }, + { + "com.linkedin.pegasus2avro.common.Ownership": { + "owners": [ + { + "owner": "urn:li:corpGroup:Trust and Safety", + "type": "DEVELOPER" + } + ], + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "datahub-business-glossary-2020_04_14-07_00_00" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.GlossaryTermSnapshot": { + "urn": "urn:li:glossaryTerm:Personal Information.Address", + "aspects": [ + { + "com.linkedin.pegasus2avro.glossary.GlossaryTermInfo": { + "customProperties": {}, + "name": "Address", + "definition": "A physical address", + "parentNode": "urn:li:glossaryNode:Personal Information", + "termSource": "INTERNAL", + "sourceRef": "DataHub", + "sourceUrl": "https://github.com/datahub-project/datahub/" + } + }, + { + "com.linkedin.pegasus2avro.common.Ownership": { + "owners": [ + { + "owner": "urn:li:corpuser:mjames", + "type": "DEVELOPER" + } + ], + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "datahub-business-glossary-2020_04_14-07_00_00" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.GlossaryTermSnapshot": { + "urn": "urn:li:glossaryTerm:Personal Information.Gender", + "aspects": [ + { + "com.linkedin.pegasus2avro.glossary.GlossaryTermInfo": { + "customProperties": {}, + "name": "Gender", + "definition": "The gender identity of the individual", + "parentNode": "urn:li:glossaryNode:Personal Information", + "termSource": "INTERNAL", + "sourceRef": "DataHub", + "sourceUrl": "https://github.com/datahub-project/datahub/" + } + }, + { + "com.linkedin.pegasus2avro.glossary.GlossaryRelatedTerms": { + "isRelatedTerms": [ + "urn:li:glossaryTerm:Classification.Sensitive" + ] + } + }, + { + "com.linkedin.pegasus2avro.common.Ownership": { + "owners": [ + { + "owner": "urn:li:corpuser:mjames", + "type": "DEVELOPER" + } + ], + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "datahub-business-glossary-2020_04_14-07_00_00" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.GlossaryNodeSnapshot": { + "urn": "urn:li:glossaryNode:Clients And Accounts", + "aspects": [ + { + "com.linkedin.pegasus2avro.glossary.GlossaryNodeInfo": { + "definition": "Provides basic concepts such as account, account holder, account provider, relationship manager that are commonly used by financial services providers to describe customers and to determine counterparty identities", + "name": "Clients And Accounts" + } + }, + { + "com.linkedin.pegasus2avro.common.Ownership": { + "owners": [ + { + "owner": "urn:li:corpGroup:finance", + "type": "DEVELOPER" + } + ], + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + } + }, + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "datahub-business-glossary-2020_04_14-07_00_00" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.GlossaryTermSnapshot": { + "urn": "urn:li:glossaryTerm:Clients And Accounts.Account", + "aspects": [ + { + "com.linkedin.pegasus2avro.glossary.GlossaryTermInfo": { + "customProperties": {}, + "name": "Account", + "definition": "Container for records associated with a business arrangement for regular transactions and services", + "parentNode": "urn:li:glossaryNode:Clients And Accounts", + "termSource": "EXTERNAL", + "sourceRef": "FIBO", + "sourceUrl": "https://spec.edmcouncil.org/fibo/ontology/FBC/ProductsAndServices/ClientsAndAccounts/Account" + } + }, + { + "com.linkedin.pegasus2avro.glossary.GlossaryRelatedTerms": { + "isRelatedTerms": [ + "urn:li:glossaryTerm:Classification.Highly Confidential" + ], + "hasRelatedTerms": [ + "urn:li:glossaryTerm:Clients And Accounts.Balance" + ] + } + }, + { + "com.linkedin.pegasus2avro.common.Ownership": { + "owners": [ + { + "owner": "urn:li:corpGroup:finance", + "type": "DEVELOPER" + } + ], + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "datahub-business-glossary-2020_04_14-07_00_00" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.GlossaryTermSnapshot": { + "urn": "urn:li:glossaryTerm:Clients And Accounts.Balance", + "aspects": [ + { + "com.linkedin.pegasus2avro.glossary.GlossaryTermInfo": { + "customProperties": {}, + "name": "Balance", + "definition": "Amount of money available or owed", + "parentNode": "urn:li:glossaryNode:Clients And Accounts", + "termSource": "EXTERNAL", + "sourceRef": "FIBO", + "sourceUrl": "https://spec.edmcouncil.org/fibo/ontology/FBC/ProductsAndServices/ClientsAndAccounts/Balance" + } + }, + { + "com.linkedin.pegasus2avro.common.Ownership": { + "owners": [ + { + "owner": "urn:li:corpGroup:finance", + "type": "DEVELOPER" + } + ], + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "datahub-business-glossary-2020_04_14-07_00_00" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.GlossaryNodeSnapshot": { + "urn": "urn:li:glossaryNode:KPIs", + "aspects": [ + { + "com.linkedin.pegasus2avro.glossary.GlossaryNodeInfo": { + "definition": "Common Business KPIs", + "name": "KPIs" + } + }, + { + "com.linkedin.pegasus2avro.common.Ownership": { + "owners": [ + { + "owner": "urn:li:corpuser:mjames", + "type": "DEVELOPER" + } + ], + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + } + }, + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "datahub-business-glossary-2020_04_14-07_00_00" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.GlossaryTermSnapshot": { + "urn": "urn:li:glossaryTerm:4faf1eed790370f65942f2998a7993d6", + "aspects": [ + { + "com.linkedin.pegasus2avro.glossary.GlossaryTermInfo": { + "customProperties": {}, + "name": "CSAT %", + "definition": "Customer Satisfaction Score", + "parentNode": "urn:li:glossaryNode:KPIs", + "termSource": "INTERNAL", + "sourceRef": "DataHub", + "sourceUrl": "https://github.com/datahub-project/datahub/" + } + }, + { + "com.linkedin.pegasus2avro.common.Ownership": { + "owners": [ + { + "owner": "urn:li:corpuser:mjames", + "type": "DEVELOPER" + } + ], + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "datahub-business-glossary-2020_04_14-07_00_00" + } +} +] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/business-glossary/glossary_to_file.yml b/metadata-ingestion/tests/integration/business-glossary/glossary_to_file.yml new file mode 100644 index 0000000000000..4e7c4977f073f --- /dev/null +++ b/metadata-ingestion/tests/integration/business-glossary/glossary_to_file.yml @@ -0,0 +1,10 @@ +source: + type: datahub-business-glossary + config: + # Coordinates + file: ./business_glossary.yml + +sink: + type: file + config: + filename: glossary_events.json diff --git a/metadata-ingestion/tests/integration/business-glossary/test_business_glossary.py b/metadata-ingestion/tests/integration/business-glossary/test_business_glossary.py new file mode 100644 index 0000000000000..2ac9cca972bad --- /dev/null +++ b/metadata-ingestion/tests/integration/business-glossary/test_business_glossary.py @@ -0,0 +1,43 @@ +import shutil +from typing import List + +import pytest +from freezegun import freeze_time + +from datahub.ingestion.source.metadata import business_glossary +from tests.test_helpers import mce_helpers +from tests.test_helpers.click_helpers import run_datahub_cmd + +FROZEN_TIME = "2020-04-14 07:00:00" + + +@freeze_time(FROZEN_TIME) +@pytest.mark.integration +def test_glossary_ingest(docker_compose_runner, pytestconfig, tmp_path, mock_time): + test_resources_dir = pytestconfig.rootpath / "tests/integration/business-glossary" + + # Run the metadata ingestion pipeline. + config_file = (test_resources_dir / "glossary_to_file.yml").resolve() + shutil.copy(test_resources_dir / "business_glossary.yml", tmp_path) + run_datahub_cmd( + ["ingest", "--strict-warnings", "-c", f"{config_file}"], tmp_path=tmp_path + ) + # These paths change from one instance run of the clickhouse docker to the other, and the FROZEN_TIME does not apply to these. + ignore_paths: List[str] = [ + r"root\[\d+\]\['proposedSnapshot'\].+\['aspects'\].+\['customProperties'\]\['metadata_modification_time'\]", + r"root\[\d+\]\['proposedSnapshot'\].+\['aspects'\].+\['customProperties'\]\['data_paths'\]", + r"root\[\d+\]\['proposedSnapshot'\].+\['aspects'\].+\['customProperties'\]\['metadata_path'\]", + ] + # Verify the output. + mce_helpers.check_golden_file( + pytestconfig, + ignore_paths=ignore_paths, + output_path=tmp_path / "glossary_events.json", + golden_path=test_resources_dir / "glossary_events_golden.json", + ) + + +@freeze_time(FROZEN_TIME) +def test_auto_id_creation_on_reserved_char(): + id_: str = business_glossary.create_id(["pii", "secure % password"], None, False) + assert id_ == "24baf9389cc05c162c7148c96314d733" diff --git a/metadata-ingestion/tests/integration/dbt/dbt_deleted_actor_mces_golden.json b/metadata-ingestion/tests/integration/dbt/dbt_deleted_actor_mces_golden.json index 64aa7f6233f39..6e3167c5d860f 100644 --- a/metadata-ingestion/tests/integration/dbt/dbt_deleted_actor_mces_golden.json +++ b/metadata-ingestion/tests/integration/dbt/dbt_deleted_actor_mces_golden.json @@ -25,6 +25,7 @@ "materialization": "table", "dbt_file_path": "models/transform/payments_by_customer_by_month.sql", "catalog_type": "BASE TABLE", + "language": "sql", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v4.json", "manifest_version": "1.0.3", "manifest_adapter": "postgres", @@ -154,6 +155,7 @@ "node_type": "model", "materialization": "ephemeral", "dbt_file_path": "models/transform/customer_details.sql", + "language": "sql", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v4.json", "manifest_version": "1.0.3", "manifest_adapter": "postgres", @@ -263,6 +265,7 @@ "materialization": "table", "dbt_file_path": "models/billing/monthly_billing_with_cust.sql", "catalog_type": "BASE TABLE", + "language": "sql", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v4.json", "manifest_version": "1.0.3", "manifest_adapter": "postgres", @@ -413,6 +416,7 @@ "materialization": "view", "dbt_file_path": "models/base/payments_base.sql", "catalog_type": "VIEW", + "language": "sql", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v4.json", "manifest_version": "1.0.3", "manifest_adapter": "postgres", @@ -618,6 +622,7 @@ "node_type": "source", "dbt_file_path": "models/base.yml", "catalog_type": "BASE TABLE", + "language": "sql", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v4.json", "manifest_version": "1.0.3", "manifest_adapter": "postgres", @@ -800,6 +805,7 @@ "node_type": "source", "dbt_file_path": "models/base.yml", "catalog_type": "BASE TABLE", + "language": "sql", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v4.json", "manifest_version": "1.0.3", "manifest_adapter": "postgres", @@ -922,6 +928,7 @@ "node_type": "source", "dbt_file_path": "models/base.yml", "catalog_type": "BASE TABLE", + "language": "sql", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v4.json", "manifest_version": "1.0.3", "manifest_adapter": "postgres", @@ -1059,6 +1066,7 @@ "node_type": "source", "dbt_file_path": "models/base.yml", "catalog_type": "BASE TABLE", + "language": "sql", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v4.json", "manifest_version": "1.0.3", "manifest_adapter": "postgres", @@ -1195,6 +1203,7 @@ "node_type": "source", "dbt_file_path": "models/base.yml", "catalog_type": "BASE TABLE", + "language": "sql", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v4.json", "manifest_version": "1.0.3", "manifest_adapter": "postgres", @@ -1401,6 +1410,7 @@ "node_type": "source", "dbt_file_path": "models/base.yml", "catalog_type": "BASE TABLE", + "language": "sql", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v4.json", "manifest_version": "1.0.3", "manifest_adapter": "postgres", @@ -1563,6 +1573,7 @@ "node_type": "source", "dbt_file_path": "models/base.yml", "catalog_type": "BASE TABLE", + "language": "sql", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v4.json", "manifest_version": "1.0.3", "manifest_adapter": "postgres", @@ -1735,6 +1746,7 @@ "node_type": "source", "dbt_file_path": "models/base.yml", "catalog_type": "BASE TABLE", + "language": "sql", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v4.json", "manifest_version": "1.0.3", "manifest_adapter": "postgres", @@ -1893,6 +1905,7 @@ "node_type": "source", "dbt_file_path": "models/base.yml", "catalog_type": "BASE TABLE", + "language": "sql", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v4.json", "manifest_version": "1.0.3", "manifest_adapter": "postgres", @@ -2051,6 +2064,7 @@ "node_type": "source", "dbt_file_path": "models/base.yml", "catalog_type": "BASE TABLE", + "language": "sql", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v4.json", "manifest_version": "1.0.3", "manifest_adapter": "postgres", @@ -2209,6 +2223,7 @@ "node_type": "source", "dbt_file_path": "models/base.yml", "catalog_type": "BASE TABLE", + "language": "sql", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v4.json", "manifest_version": "1.0.3", "manifest_adapter": "postgres", diff --git a/metadata-ingestion/tests/integration/dbt/dbt_enabled_with_schemas_mces_golden.json b/metadata-ingestion/tests/integration/dbt/dbt_enabled_with_schemas_mces_golden.json index 48961e13a59f4..27244ff889c4f 100644 --- a/metadata-ingestion/tests/integration/dbt/dbt_enabled_with_schemas_mces_golden.json +++ b/metadata-ingestion/tests/integration/dbt/dbt_enabled_with_schemas_mces_golden.json @@ -30,6 +30,7 @@ "node_type": "model", "materialization": "ephemeral", "dbt_file_path": "models/transform/customer_details.sql", + "language": "sql", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json", "manifest_version": "0.19.1", "manifest_adapter": "postgres", @@ -204,6 +205,7 @@ "materialization": "table", "dbt_file_path": "models/billing/monthly_billing_with_cust.sql", "catalog_type": "BASE TABLE", + "language": "sql", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json", "manifest_version": "0.19.1", "manifest_adapter": "postgres", @@ -371,6 +373,7 @@ "materialization": "view", "dbt_file_path": "models/base/payments_base.sql", "catalog_type": "VIEW", + "language": "sql", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json", "manifest_version": "0.19.1", "manifest_adapter": "postgres", @@ -585,6 +588,7 @@ "materialization": "table", "dbt_file_path": "models/transform/payments_by_customer_by_month.sql", "catalog_type": "BASE TABLE", + "language": "sql", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json", "manifest_version": "0.19.1", "manifest_adapter": "postgres", @@ -717,6 +721,7 @@ "node_type": "source", "dbt_file_path": "models/base.yml", "catalog_type": "BASE TABLE", + "language": "sql", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json", "manifest_version": "0.19.1", "manifest_adapter": "postgres", @@ -878,6 +883,7 @@ "node_type": "source", "dbt_file_path": "models/base.yml", "catalog_type": "BASE TABLE", + "language": "sql", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json", "manifest_version": "0.19.1", "manifest_adapter": "postgres", @@ -1060,6 +1066,7 @@ "node_type": "source", "dbt_file_path": "models/base.yml", "catalog_type": "BASE TABLE", + "language": "sql", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json", "manifest_version": "0.19.1", "manifest_adapter": "postgres", @@ -1182,6 +1189,7 @@ "node_type": "source", "dbt_file_path": "models/base.yml", "catalog_type": "BASE TABLE", + "language": "sql", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json", "manifest_version": "0.19.1", "manifest_adapter": "postgres", @@ -1319,6 +1327,7 @@ "node_type": "source", "dbt_file_path": "models/base.yml", "catalog_type": "BASE TABLE", + "language": "sql", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json", "manifest_version": "0.19.1", "manifest_adapter": "postgres", @@ -1458,6 +1467,7 @@ "node_type": "source", "dbt_file_path": "models/base.yml", "catalog_type": "BASE TABLE", + "language": "sql", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json", "manifest_version": "0.19.1", "manifest_adapter": "postgres", @@ -1664,6 +1674,7 @@ "node_type": "source", "dbt_file_path": "models/base.yml", "catalog_type": "BASE TABLE", + "language": "sql", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json", "manifest_version": "0.19.1", "manifest_adapter": "postgres", @@ -1826,6 +1837,7 @@ "node_type": "source", "dbt_file_path": "models/base.yml", "catalog_type": "BASE TABLE", + "language": "sql", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json", "manifest_version": "0.19.1", "manifest_adapter": "postgres", @@ -2001,6 +2013,7 @@ "node_type": "source", "dbt_file_path": "models/base.yml", "catalog_type": "BASE TABLE", + "language": "sql", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json", "manifest_version": "0.19.1", "manifest_adapter": "postgres", @@ -2159,6 +2172,7 @@ "node_type": "source", "dbt_file_path": "models/base.yml", "catalog_type": "BASE TABLE", + "language": "sql", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json", "manifest_version": "0.19.1", "manifest_adapter": "postgres", @@ -2317,6 +2331,7 @@ "node_type": "source", "dbt_file_path": "models/base.yml", "catalog_type": "BASE TABLE", + "language": "sql", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json", "manifest_version": "0.19.1", "manifest_adapter": "postgres", @@ -2475,6 +2490,7 @@ "node_type": "source", "dbt_file_path": "models/base.yml", "catalog_type": "BASE TABLE", + "language": "sql", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json", "manifest_version": "0.19.1", "manifest_adapter": "postgres", diff --git a/metadata-ingestion/tests/integration/dbt/dbt_stateful_tests_golden.json b/metadata-ingestion/tests/integration/dbt/dbt_stateful_tests_golden.json index 12d10453946f5..6b587e025b854 100644 --- a/metadata-ingestion/tests/integration/dbt/dbt_stateful_tests_golden.json +++ b/metadata-ingestion/tests/integration/dbt/dbt_stateful_tests_golden.json @@ -25,6 +25,7 @@ "materialization": "view", "dbt_file_path": "models/staging/stg_customers.sql", "catalog_type": "view", + "language": "sql", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v5.json", "manifest_version": "1.1.0", "manifest_adapter": "bigquery", @@ -155,6 +156,7 @@ "materialization": "view", "dbt_file_path": "models/staging/stg_payments.sql", "catalog_type": "view", + "language": "sql", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v5.json", "manifest_version": "1.1.0", "manifest_adapter": "bigquery", @@ -297,6 +299,7 @@ "materialization": "view", "dbt_file_path": "models/staging/stg_orders.sql", "catalog_type": "view", + "language": "sql", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v5.json", "manifest_version": "1.1.0", "manifest_adapter": "bigquery", @@ -439,6 +442,7 @@ "materialization": "seed", "dbt_file_path": "seeds/raw_customers.csv", "catalog_type": "table", + "language": "sql", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v5.json", "manifest_version": "1.1.0", "manifest_adapter": "bigquery", @@ -548,6 +552,7 @@ "materialization": "seed", "dbt_file_path": "seeds/raw_orders.csv", "catalog_type": "table", + "language": "sql", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v5.json", "manifest_version": "1.1.0", "manifest_adapter": "bigquery", @@ -669,6 +674,7 @@ "materialization": "seed", "dbt_file_path": "seeds/raw_payments.csv", "catalog_type": "table", + "language": "sql", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v5.json", "manifest_version": "1.1.0", "manifest_adapter": "bigquery", @@ -790,6 +796,7 @@ "materialization": "table", "dbt_file_path": "models/customers.sql", "catalog_type": "table", + "language": "sql", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v5.json", "manifest_version": "1.1.0", "manifest_adapter": "bigquery", @@ -990,6 +997,7 @@ "materialization": "table", "dbt_file_path": "models/orders.sql", "catalog_type": "table", + "language": "sql", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v5.json", "manifest_version": "1.1.0", "manifest_adapter": "bigquery", diff --git a/metadata-ingestion/tests/integration/dbt/dbt_test_column_meta_mapping_golden.json b/metadata-ingestion/tests/integration/dbt/dbt_test_column_meta_mapping_golden.json index 36e185ff049c4..42dcf8e359181 100644 --- a/metadata-ingestion/tests/integration/dbt/dbt_test_column_meta_mapping_golden.json +++ b/metadata-ingestion/tests/integration/dbt/dbt_test_column_meta_mapping_golden.json @@ -24,11 +24,12 @@ "node_type": "model", "materialization": "ephemeral", "dbt_file_path": "models/transform/customer_details.sql", - "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v6.json", - "manifest_version": "1.2.1", + "language": "sql", + "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v7.json", + "manifest_version": "1.3.0", "manifest_adapter": "postgres", "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json", - "catalog_version": "1.2.1" + "catalog_version": "1.3.0" }, "name": "customer_details", "description": "", @@ -133,11 +134,12 @@ "materialization": "table", "dbt_file_path": "models/billing/monthly_billing_with_cust.sql", "catalog_type": "BASE TABLE", - "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v6.json", - "manifest_version": "1.2.1", + "language": "sql", + "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v7.json", + "manifest_version": "1.3.0", "manifest_adapter": "postgres", "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json", - "catalog_version": "1.2.1" + "catalog_version": "1.3.0" }, "name": "an-aliased-view-for-monthly-billing", "description": "", @@ -283,11 +285,12 @@ "materialization": "view", "dbt_file_path": "models/base/payments_base.sql", "catalog_type": "VIEW", - "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v6.json", - "manifest_version": "1.2.1", + "language": "sql", + "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v7.json", + "manifest_version": "1.3.0", "manifest_adapter": "postgres", "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json", - "catalog_version": "1.2.1" + "catalog_version": "1.3.0" }, "name": "an-aliased-view-for-payments", "description": "", @@ -489,11 +492,12 @@ "materialization": "table", "dbt_file_path": "models/transform/payments_by_customer_by_month.sql", "catalog_type": "BASE TABLE", - "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v6.json", - "manifest_version": "1.2.1", + "language": "sql", + "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v7.json", + "manifest_version": "1.3.0", "manifest_adapter": "postgres", "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json", - "catalog_version": "1.2.1" + "catalog_version": "1.3.0" }, "name": "payments_by_customer_by_month", "description": "", @@ -551,8 +555,8 @@ }, { "fieldPath": "customer_id", - "nullable": false, "description": "description for customer_id from dbt", + "nullable": false, "type": { "type": { "com.linkedin.pegasus2avro.schema.NumberType": {} @@ -646,11 +650,12 @@ "node_type": "source", "dbt_file_path": "models/base.yml", "catalog_type": "BASE TABLE", - "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v6.json", - "manifest_version": "1.2.1", + "language": "sql", + "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v7.json", + "manifest_version": "1.3.0", "manifest_adapter": "postgres", "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json", - "catalog_version": "1.2.1" + "catalog_version": "1.3.0" }, "name": "actor", "description": "description for actor table from dbt", @@ -800,11 +805,12 @@ "node_type": "source", "dbt_file_path": "models/base.yml", "catalog_type": "BASE TABLE", - "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v6.json", - "manifest_version": "1.2.1", + "language": "sql", + "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v7.json", + "manifest_version": "1.3.0", "manifest_adapter": "postgres", "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json", - "catalog_version": "1.2.1" + "catalog_version": "1.3.0" }, "name": "address", "description": "a user's address", @@ -982,11 +988,12 @@ "node_type": "source", "dbt_file_path": "models/base.yml", "catalog_type": "BASE TABLE", - "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v6.json", - "manifest_version": "1.2.1", + "language": "sql", + "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v7.json", + "manifest_version": "1.3.0", "manifest_adapter": "postgres", "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json", - "catalog_version": "1.2.1" + "catalog_version": "1.3.0" }, "name": "category", "description": "a user's category", @@ -1104,11 +1111,12 @@ "node_type": "source", "dbt_file_path": "models/base.yml", "catalog_type": "BASE TABLE", - "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v6.json", - "manifest_version": "1.2.1", + "language": "sql", + "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v7.json", + "manifest_version": "1.3.0", "manifest_adapter": "postgres", "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json", - "catalog_version": "1.2.1" + "catalog_version": "1.3.0" }, "name": "city", "description": "", @@ -1241,11 +1249,12 @@ "node_type": "source", "dbt_file_path": "models/base.yml", "catalog_type": "BASE TABLE", - "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v6.json", - "manifest_version": "1.2.1", + "language": "sql", + "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v7.json", + "manifest_version": "1.3.0", "manifest_adapter": "postgres", "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json", - "catalog_version": "1.2.1" + "catalog_version": "1.3.0" }, "name": "country", "description": "", @@ -1380,11 +1389,12 @@ "node_type": "source", "dbt_file_path": "models/base.yml", "catalog_type": "BASE TABLE", - "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v6.json", - "manifest_version": "1.2.1", + "language": "sql", + "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v7.json", + "manifest_version": "1.3.0", "manifest_adapter": "postgres", "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json", - "catalog_version": "1.2.1" + "catalog_version": "1.3.0" }, "name": "customer", "description": "description for customer table from dbt", @@ -1586,11 +1596,12 @@ "node_type": "source", "dbt_file_path": "models/base.yml", "catalog_type": "BASE TABLE", - "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v6.json", - "manifest_version": "1.2.1", + "language": "sql", + "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v7.json", + "manifest_version": "1.3.0", "manifest_adapter": "postgres", "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json", - "catalog_version": "1.2.1" + "catalog_version": "1.3.0" }, "name": "payment_p2020_01", "description": "", @@ -1748,11 +1759,12 @@ "node_type": "source", "dbt_file_path": "models/base.yml", "catalog_type": "BASE TABLE", - "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v6.json", - "manifest_version": "1.2.1", + "language": "sql", + "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v7.json", + "manifest_version": "1.3.0", "manifest_adapter": "postgres", "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json", - "catalog_version": "1.2.1" + "catalog_version": "1.3.0" }, "name": "payment_p2020_02", "description": "", @@ -1923,11 +1935,12 @@ "node_type": "source", "dbt_file_path": "models/base.yml", "catalog_type": "BASE TABLE", - "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v6.json", - "manifest_version": "1.2.1", + "language": "sql", + "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v7.json", + "manifest_version": "1.3.0", "manifest_adapter": "postgres", "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json", - "catalog_version": "1.2.1" + "catalog_version": "1.3.0" }, "name": "payment_p2020_03", "description": "", @@ -2081,11 +2094,12 @@ "node_type": "source", "dbt_file_path": "models/base.yml", "catalog_type": "BASE TABLE", - "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v6.json", - "manifest_version": "1.2.1", + "language": "sql", + "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v7.json", + "manifest_version": "1.3.0", "manifest_adapter": "postgres", "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json", - "catalog_version": "1.2.1" + "catalog_version": "1.3.0" }, "name": "payment_p2020_04", "description": "", @@ -2239,11 +2253,12 @@ "node_type": "source", "dbt_file_path": "models/base.yml", "catalog_type": "BASE TABLE", - "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v6.json", - "manifest_version": "1.2.1", + "language": "sql", + "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v7.json", + "manifest_version": "1.3.0", "manifest_adapter": "postgres", "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json", - "catalog_version": "1.2.1" + "catalog_version": "1.3.0" }, "name": "payment_p2020_05", "description": "a payment", @@ -2397,11 +2412,12 @@ "node_type": "source", "dbt_file_path": "models/base.yml", "catalog_type": "BASE TABLE", - "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v6.json", - "manifest_version": "1.2.1", + "language": "sql", + "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v7.json", + "manifest_version": "1.3.0", "manifest_adapter": "postgres", "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json", - "catalog_version": "1.2.1" + "catalog_version": "1.3.0" }, "name": "payment_p2020_06", "description": "", diff --git a/metadata-ingestion/tests/integration/dbt/dbt_test_events_golden.json b/metadata-ingestion/tests/integration/dbt/dbt_test_events_golden.json index a214aff0f9e7f..3098e9693202d 100644 --- a/metadata-ingestion/tests/integration/dbt/dbt_test_events_golden.json +++ b/metadata-ingestion/tests/integration/dbt/dbt_test_events_golden.json @@ -25,6 +25,7 @@ "materialization": "view", "dbt_file_path": "models/staging/stg_customers.sql", "catalog_type": "view", + "language": "sql", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v5.json", "manifest_version": "1.1.0", "manifest_adapter": "bigquery", @@ -155,6 +156,7 @@ "materialization": "view", "dbt_file_path": "models/staging/stg_payments.sql", "catalog_type": "view", + "language": "sql", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v5.json", "manifest_version": "1.1.0", "manifest_adapter": "bigquery", @@ -297,6 +299,7 @@ "materialization": "view", "dbt_file_path": "models/staging/stg_orders.sql", "catalog_type": "view", + "language": "sql", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v5.json", "manifest_version": "1.1.0", "manifest_adapter": "bigquery", @@ -439,6 +442,7 @@ "materialization": "seed", "dbt_file_path": "seeds/raw_customers.csv", "catalog_type": "table", + "language": "sql", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v5.json", "manifest_version": "1.1.0", "manifest_adapter": "bigquery", @@ -548,6 +552,7 @@ "materialization": "seed", "dbt_file_path": "seeds/raw_orders.csv", "catalog_type": "table", + "language": "sql", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v5.json", "manifest_version": "1.1.0", "manifest_adapter": "bigquery", @@ -669,6 +674,7 @@ "materialization": "seed", "dbt_file_path": "seeds/raw_payments.csv", "catalog_type": "table", + "language": "sql", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v5.json", "manifest_version": "1.1.0", "manifest_adapter": "bigquery", @@ -790,6 +796,7 @@ "materialization": "table", "dbt_file_path": "models/customers.sql", "catalog_type": "table", + "language": "sql", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v5.json", "manifest_version": "1.1.0", "manifest_adapter": "bigquery", @@ -990,6 +997,7 @@ "materialization": "table", "dbt_file_path": "models/orders.sql", "catalog_type": "table", + "language": "sql", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v5.json", "manifest_version": "1.1.0", "manifest_adapter": "bigquery", diff --git a/metadata-ingestion/tests/integration/dbt/dbt_test_with_complex_owner_patterns_mces_golden.json b/metadata-ingestion/tests/integration/dbt/dbt_test_with_complex_owner_patterns_mces_golden.json index b6dbcc82fb49c..b9615c1f63dd5 100644 --- a/metadata-ingestion/tests/integration/dbt/dbt_test_with_complex_owner_patterns_mces_golden.json +++ b/metadata-ingestion/tests/integration/dbt/dbt_test_with_complex_owner_patterns_mces_golden.json @@ -29,6 +29,7 @@ "node_type": "model", "materialization": "ephemeral", "dbt_file_path": "models/transform/customer_details.sql", + "language": "sql", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json", "manifest_version": "0.19.1", "manifest_adapter": "postgres", @@ -168,6 +169,7 @@ "materialization": "table", "dbt_file_path": "models/billing/monthly_billing_with_cust.sql", "catalog_type": "BASE TABLE", + "language": "sql", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json", "manifest_version": "0.19.1", "manifest_adapter": "postgres", @@ -318,6 +320,7 @@ "materialization": "view", "dbt_file_path": "models/base/payments_base.sql", "catalog_type": "VIEW", + "language": "sql", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json", "manifest_version": "0.19.1", "manifest_adapter": "postgres", @@ -532,6 +535,7 @@ "materialization": "table", "dbt_file_path": "models/transform/payments_by_customer_by_month.sql", "catalog_type": "BASE TABLE", + "language": "sql", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json", "manifest_version": "0.19.1", "manifest_adapter": "postgres", @@ -664,6 +668,7 @@ "node_type": "source", "dbt_file_path": "models/base.yml", "catalog_type": "BASE TABLE", + "language": "sql", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json", "manifest_version": "0.19.1", "manifest_adapter": "postgres", @@ -822,6 +827,7 @@ "node_type": "source", "dbt_file_path": "models/base.yml", "catalog_type": "BASE TABLE", + "language": "sql", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json", "manifest_version": "0.19.1", "manifest_adapter": "postgres", @@ -1004,6 +1010,7 @@ "node_type": "source", "dbt_file_path": "models/base.yml", "catalog_type": "BASE TABLE", + "language": "sql", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json", "manifest_version": "0.19.1", "manifest_adapter": "postgres", @@ -1126,6 +1133,7 @@ "node_type": "source", "dbt_file_path": "models/base.yml", "catalog_type": "BASE TABLE", + "language": "sql", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json", "manifest_version": "0.19.1", "manifest_adapter": "postgres", @@ -1263,6 +1271,7 @@ "node_type": "source", "dbt_file_path": "models/base.yml", "catalog_type": "BASE TABLE", + "language": "sql", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json", "manifest_version": "0.19.1", "manifest_adapter": "postgres", @@ -1399,6 +1408,7 @@ "node_type": "source", "dbt_file_path": "models/base.yml", "catalog_type": "BASE TABLE", + "language": "sql", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json", "manifest_version": "0.19.1", "manifest_adapter": "postgres", @@ -1605,6 +1615,7 @@ "node_type": "source", "dbt_file_path": "models/base.yml", "catalog_type": "BASE TABLE", + "language": "sql", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json", "manifest_version": "0.19.1", "manifest_adapter": "postgres", @@ -1767,6 +1778,7 @@ "node_type": "source", "dbt_file_path": "models/base.yml", "catalog_type": "BASE TABLE", + "language": "sql", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json", "manifest_version": "0.19.1", "manifest_adapter": "postgres", @@ -1939,6 +1951,7 @@ "node_type": "source", "dbt_file_path": "models/base.yml", "catalog_type": "BASE TABLE", + "language": "sql", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json", "manifest_version": "0.19.1", "manifest_adapter": "postgres", @@ -2097,6 +2110,7 @@ "node_type": "source", "dbt_file_path": "models/base.yml", "catalog_type": "BASE TABLE", + "language": "sql", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json", "manifest_version": "0.19.1", "manifest_adapter": "postgres", @@ -2255,6 +2269,7 @@ "node_type": "source", "dbt_file_path": "models/base.yml", "catalog_type": "BASE TABLE", + "language": "sql", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json", "manifest_version": "0.19.1", "manifest_adapter": "postgres", diff --git a/metadata-ingestion/tests/integration/dbt/dbt_test_with_data_platform_instance_mces_golden.json b/metadata-ingestion/tests/integration/dbt/dbt_test_with_data_platform_instance_mces_golden.json index cfb3f37085f2b..b3ab9597fcee8 100644 --- a/metadata-ingestion/tests/integration/dbt/dbt_test_with_data_platform_instance_mces_golden.json +++ b/metadata-ingestion/tests/integration/dbt/dbt_test_with_data_platform_instance_mces_golden.json @@ -30,6 +30,7 @@ "node_type": "model", "materialization": "ephemeral", "dbt_file_path": "models/transform/customer_details.sql", + "language": "sql", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json", "manifest_version": "0.19.1", "manifest_adapter": "postgres", @@ -169,6 +170,7 @@ "materialization": "table", "dbt_file_path": "models/billing/monthly_billing_with_cust.sql", "catalog_type": "BASE TABLE", + "language": "sql", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json", "manifest_version": "0.19.1", "manifest_adapter": "postgres", @@ -319,6 +321,7 @@ "materialization": "view", "dbt_file_path": "models/base/payments_base.sql", "catalog_type": "VIEW", + "language": "sql", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json", "manifest_version": "0.19.1", "manifest_adapter": "postgres", @@ -533,6 +536,7 @@ "materialization": "table", "dbt_file_path": "models/transform/payments_by_customer_by_month.sql", "catalog_type": "BASE TABLE", + "language": "sql", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json", "manifest_version": "0.19.1", "manifest_adapter": "postgres", @@ -665,6 +669,7 @@ "node_type": "source", "dbt_file_path": "models/base.yml", "catalog_type": "BASE TABLE", + "language": "sql", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json", "manifest_version": "0.19.1", "manifest_adapter": "postgres", @@ -823,6 +828,7 @@ "node_type": "source", "dbt_file_path": "models/base.yml", "catalog_type": "BASE TABLE", + "language": "sql", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json", "manifest_version": "0.19.1", "manifest_adapter": "postgres", @@ -1005,6 +1011,7 @@ "node_type": "source", "dbt_file_path": "models/base.yml", "catalog_type": "BASE TABLE", + "language": "sql", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json", "manifest_version": "0.19.1", "manifest_adapter": "postgres", @@ -1127,6 +1134,7 @@ "node_type": "source", "dbt_file_path": "models/base.yml", "catalog_type": "BASE TABLE", + "language": "sql", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json", "manifest_version": "0.19.1", "manifest_adapter": "postgres", @@ -1264,6 +1272,7 @@ "node_type": "source", "dbt_file_path": "models/base.yml", "catalog_type": "BASE TABLE", + "language": "sql", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json", "manifest_version": "0.19.1", "manifest_adapter": "postgres", @@ -1400,6 +1409,7 @@ "node_type": "source", "dbt_file_path": "models/base.yml", "catalog_type": "BASE TABLE", + "language": "sql", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json", "manifest_version": "0.19.1", "manifest_adapter": "postgres", @@ -1606,6 +1616,7 @@ "node_type": "source", "dbt_file_path": "models/base.yml", "catalog_type": "BASE TABLE", + "language": "sql", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json", "manifest_version": "0.19.1", "manifest_adapter": "postgres", @@ -1768,6 +1779,7 @@ "node_type": "source", "dbt_file_path": "models/base.yml", "catalog_type": "BASE TABLE", + "language": "sql", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json", "manifest_version": "0.19.1", "manifest_adapter": "postgres", @@ -1940,6 +1952,7 @@ "node_type": "source", "dbt_file_path": "models/base.yml", "catalog_type": "BASE TABLE", + "language": "sql", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json", "manifest_version": "0.19.1", "manifest_adapter": "postgres", @@ -2098,6 +2111,7 @@ "node_type": "source", "dbt_file_path": "models/base.yml", "catalog_type": "BASE TABLE", + "language": "sql", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json", "manifest_version": "0.19.1", "manifest_adapter": "postgres", @@ -2256,6 +2270,7 @@ "node_type": "source", "dbt_file_path": "models/base.yml", "catalog_type": "BASE TABLE", + "language": "sql", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json", "manifest_version": "0.19.1", "manifest_adapter": "postgres", @@ -2414,6 +2429,7 @@ "node_type": "source", "dbt_file_path": "models/base.yml", "catalog_type": "BASE TABLE", + "language": "sql", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json", "manifest_version": "0.19.1", "manifest_adapter": "postgres", diff --git a/metadata-ingestion/tests/integration/dbt/dbt_test_with_non_incremental_lineage_mces_golden.json b/metadata-ingestion/tests/integration/dbt/dbt_test_with_non_incremental_lineage_mces_golden.json index 1ebf225898505..9f418aba5a40e 100644 --- a/metadata-ingestion/tests/integration/dbt/dbt_test_with_non_incremental_lineage_mces_golden.json +++ b/metadata-ingestion/tests/integration/dbt/dbt_test_with_non_incremental_lineage_mces_golden.json @@ -30,6 +30,7 @@ "node_type": "model", "materialization": "ephemeral", "dbt_file_path": "models/transform/customer_details.sql", + "language": "sql", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json", "manifest_version": "0.19.1", "manifest_adapter": "postgres", @@ -169,6 +170,7 @@ "materialization": "table", "dbt_file_path": "models/billing/monthly_billing_with_cust.sql", "catalog_type": "BASE TABLE", + "language": "sql", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json", "manifest_version": "0.19.1", "manifest_adapter": "postgres", @@ -319,6 +321,7 @@ "materialization": "view", "dbt_file_path": "models/base/payments_base.sql", "catalog_type": "VIEW", + "language": "sql", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json", "manifest_version": "0.19.1", "manifest_adapter": "postgres", @@ -533,6 +536,7 @@ "materialization": "table", "dbt_file_path": "models/transform/payments_by_customer_by_month.sql", "catalog_type": "BASE TABLE", + "language": "sql", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json", "manifest_version": "0.19.1", "manifest_adapter": "postgres", @@ -665,6 +669,7 @@ "node_type": "source", "dbt_file_path": "models/base.yml", "catalog_type": "BASE TABLE", + "language": "sql", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json", "manifest_version": "0.19.1", "manifest_adapter": "postgres", @@ -823,6 +828,7 @@ "node_type": "source", "dbt_file_path": "models/base.yml", "catalog_type": "BASE TABLE", + "language": "sql", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json", "manifest_version": "0.19.1", "manifest_adapter": "postgres", @@ -1005,6 +1011,7 @@ "node_type": "source", "dbt_file_path": "models/base.yml", "catalog_type": "BASE TABLE", + "language": "sql", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json", "manifest_version": "0.19.1", "manifest_adapter": "postgres", @@ -1127,6 +1134,7 @@ "node_type": "source", "dbt_file_path": "models/base.yml", "catalog_type": "BASE TABLE", + "language": "sql", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json", "manifest_version": "0.19.1", "manifest_adapter": "postgres", @@ -1264,6 +1272,7 @@ "node_type": "source", "dbt_file_path": "models/base.yml", "catalog_type": "BASE TABLE", + "language": "sql", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json", "manifest_version": "0.19.1", "manifest_adapter": "postgres", @@ -1400,6 +1409,7 @@ "node_type": "source", "dbt_file_path": "models/base.yml", "catalog_type": "BASE TABLE", + "language": "sql", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json", "manifest_version": "0.19.1", "manifest_adapter": "postgres", @@ -1606,6 +1616,7 @@ "node_type": "source", "dbt_file_path": "models/base.yml", "catalog_type": "BASE TABLE", + "language": "sql", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json", "manifest_version": "0.19.1", "manifest_adapter": "postgres", @@ -1768,6 +1779,7 @@ "node_type": "source", "dbt_file_path": "models/base.yml", "catalog_type": "BASE TABLE", + "language": "sql", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json", "manifest_version": "0.19.1", "manifest_adapter": "postgres", @@ -1940,6 +1952,7 @@ "node_type": "source", "dbt_file_path": "models/base.yml", "catalog_type": "BASE TABLE", + "language": "sql", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json", "manifest_version": "0.19.1", "manifest_adapter": "postgres", @@ -2098,6 +2111,7 @@ "node_type": "source", "dbt_file_path": "models/base.yml", "catalog_type": "BASE TABLE", + "language": "sql", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json", "manifest_version": "0.19.1", "manifest_adapter": "postgres", @@ -2256,6 +2270,7 @@ "node_type": "source", "dbt_file_path": "models/base.yml", "catalog_type": "BASE TABLE", + "language": "sql", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json", "manifest_version": "0.19.1", "manifest_adapter": "postgres", @@ -2414,6 +2429,7 @@ "node_type": "source", "dbt_file_path": "models/base.yml", "catalog_type": "BASE TABLE", + "language": "sql", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json", "manifest_version": "0.19.1", "manifest_adapter": "postgres", diff --git a/metadata-ingestion/tests/integration/dbt/dbt_test_with_target_platform_instance_mces_golden.json b/metadata-ingestion/tests/integration/dbt/dbt_test_with_target_platform_instance_mces_golden.json index 0de55b45c78de..a07b04dd8dda3 100644 --- a/metadata-ingestion/tests/integration/dbt/dbt_test_with_target_platform_instance_mces_golden.json +++ b/metadata-ingestion/tests/integration/dbt/dbt_test_with_target_platform_instance_mces_golden.json @@ -30,6 +30,7 @@ "node_type": "model", "materialization": "ephemeral", "dbt_file_path": "models/transform/customer_details.sql", + "language": "sql", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json", "manifest_version": "0.19.1", "manifest_adapter": "postgres", @@ -169,6 +170,7 @@ "materialization": "table", "dbt_file_path": "models/billing/monthly_billing_with_cust.sql", "catalog_type": "BASE TABLE", + "language": "sql", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json", "manifest_version": "0.19.1", "manifest_adapter": "postgres", @@ -319,6 +321,7 @@ "materialization": "view", "dbt_file_path": "models/base/payments_base.sql", "catalog_type": "VIEW", + "language": "sql", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json", "manifest_version": "0.19.1", "manifest_adapter": "postgres", @@ -533,6 +536,7 @@ "materialization": "table", "dbt_file_path": "models/transform/payments_by_customer_by_month.sql", "catalog_type": "BASE TABLE", + "language": "sql", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json", "manifest_version": "0.19.1", "manifest_adapter": "postgres", @@ -665,6 +669,7 @@ "node_type": "source", "dbt_file_path": "models/base.yml", "catalog_type": "BASE TABLE", + "language": "sql", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json", "manifest_version": "0.19.1", "manifest_adapter": "postgres", @@ -823,6 +828,7 @@ "node_type": "source", "dbt_file_path": "models/base.yml", "catalog_type": "BASE TABLE", + "language": "sql", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json", "manifest_version": "0.19.1", "manifest_adapter": "postgres", @@ -1005,6 +1011,7 @@ "node_type": "source", "dbt_file_path": "models/base.yml", "catalog_type": "BASE TABLE", + "language": "sql", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json", "manifest_version": "0.19.1", "manifest_adapter": "postgres", @@ -1127,6 +1134,7 @@ "node_type": "source", "dbt_file_path": "models/base.yml", "catalog_type": "BASE TABLE", + "language": "sql", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json", "manifest_version": "0.19.1", "manifest_adapter": "postgres", @@ -1264,6 +1272,7 @@ "node_type": "source", "dbt_file_path": "models/base.yml", "catalog_type": "BASE TABLE", + "language": "sql", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json", "manifest_version": "0.19.1", "manifest_adapter": "postgres", @@ -1400,6 +1409,7 @@ "node_type": "source", "dbt_file_path": "models/base.yml", "catalog_type": "BASE TABLE", + "language": "sql", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json", "manifest_version": "0.19.1", "manifest_adapter": "postgres", @@ -1606,6 +1616,7 @@ "node_type": "source", "dbt_file_path": "models/base.yml", "catalog_type": "BASE TABLE", + "language": "sql", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json", "manifest_version": "0.19.1", "manifest_adapter": "postgres", @@ -1768,6 +1779,7 @@ "node_type": "source", "dbt_file_path": "models/base.yml", "catalog_type": "BASE TABLE", + "language": "sql", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json", "manifest_version": "0.19.1", "manifest_adapter": "postgres", @@ -1940,6 +1952,7 @@ "node_type": "source", "dbt_file_path": "models/base.yml", "catalog_type": "BASE TABLE", + "language": "sql", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json", "manifest_version": "0.19.1", "manifest_adapter": "postgres", @@ -2098,6 +2111,7 @@ "node_type": "source", "dbt_file_path": "models/base.yml", "catalog_type": "BASE TABLE", + "language": "sql", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json", "manifest_version": "0.19.1", "manifest_adapter": "postgres", @@ -2256,6 +2270,7 @@ "node_type": "source", "dbt_file_path": "models/base.yml", "catalog_type": "BASE TABLE", + "language": "sql", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json", "manifest_version": "0.19.1", "manifest_adapter": "postgres", @@ -2414,6 +2429,7 @@ "node_type": "source", "dbt_file_path": "models/base.yml", "catalog_type": "BASE TABLE", + "language": "sql", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v1.json", "manifest_version": "0.19.1", "manifest_adapter": "postgres", diff --git a/metadata-ingestion/tests/integration/dbt/sample_dbt_catalog.json b/metadata-ingestion/tests/integration/dbt/sample_dbt_catalog.json index ac6984cbc2d06..4ffa72c47de27 100644 --- a/metadata-ingestion/tests/integration/dbt/sample_dbt_catalog.json +++ b/metadata-ingestion/tests/integration/dbt/sample_dbt_catalog.json @@ -2,7 +2,7 @@ "errors": null, "metadata": { "dbt_schema_version": "https://schemas.getdbt.com/dbt/catalog/v1.json", - "dbt_version": "1.2.1", + "dbt_version": "1.3.0", "env": {}, "generated_at": "2021-06-19T21:38:36.384613Z", "invocation_id": "just-some-random-id-2" diff --git a/metadata-ingestion/tests/integration/dbt/sample_dbt_manifest.json b/metadata-ingestion/tests/integration/dbt/sample_dbt_manifest.json index 47c2fbbd54cd9..1ecfca1579c58 100644 --- a/metadata-ingestion/tests/integration/dbt/sample_dbt_manifest.json +++ b/metadata-ingestion/tests/integration/dbt/sample_dbt_manifest.json @@ -57,12 +57,13 @@ "macros": { "macro.dbt._split_part_negative": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.466659, "depends_on": { "macros": [] }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro _split_part_negative(string_text, delimiter_text, part_number) %}\n\n split_part(\n {{ string_text }},\n {{ delimiter_text }},\n length({{ string_text }})\n - length(\n replace({{ string_text }}, {{ delimiter_text }}, '')\n ) + 2 {{ part_number }}\n )\n\n{% endmacro %}", @@ -74,12 +75,13 @@ "path": "macros/utils/split_part.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt._split_part_negative" }, "macro.dbt.after_commit": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.242806, "depends_on": { "macros": [ "macro.dbt.make_hook_config" @@ -87,6 +89,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro after_commit(sql) %}\n {{ make_hook_config(sql, inside_transaction=False) }}\n{% endmacro %}", @@ -98,12 +101,13 @@ "path": "macros/materializations/hooks.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.after_commit" }, "macro.dbt.alter_column_comment": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.512034, "depends_on": { "macros": [ "macro.dbt_postgres.postgres__alter_column_comment" @@ -111,6 +115,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro alter_column_comment(relation, column_dict) -%}\n {{ return(adapter.dispatch('alter_column_comment', 'dbt')(relation, column_dict)) }}\n{% endmacro %}", @@ -122,12 +127,13 @@ "path": "macros/adapters/persist_docs.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.alter_column_comment" }, "macro.dbt.alter_column_type": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.528278, "depends_on": { "macros": [ "macro.dbt.default__alter_column_type" @@ -135,6 +141,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro alter_column_type(relation, column_name, new_column_type) -%}\n {{ return(adapter.dispatch('alter_column_type', 'dbt')(relation, column_name, new_column_type)) }}\n{% endmacro %}", @@ -146,12 +153,13 @@ "path": "macros/adapters/columns.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.alter_column_type" }, "macro.dbt.alter_relation_add_remove_columns": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.529997, "depends_on": { "macros": [ "macro.dbt.default__alter_relation_add_remove_columns" @@ -159,6 +167,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro alter_relation_add_remove_columns(relation, add_columns = none, remove_columns = none) -%}\n {{ return(adapter.dispatch('alter_relation_add_remove_columns', 'dbt')(relation, add_columns, remove_columns)) }}\n{% endmacro %}", @@ -170,12 +179,13 @@ "path": "macros/adapters/columns.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.alter_relation_add_remove_columns" }, "macro.dbt.alter_relation_comment": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.5129092, "depends_on": { "macros": [ "macro.dbt_postgres.postgres__alter_relation_comment" @@ -183,6 +193,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro alter_relation_comment(relation, relation_comment) -%}\n {{ return(adapter.dispatch('alter_relation_comment', 'dbt')(relation, relation_comment)) }}\n{% endmacro %}", @@ -194,12 +205,13 @@ "path": "macros/adapters/persist_docs.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.alter_relation_comment" }, "macro.dbt.any_value": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.4485052, "depends_on": { "macros": [ "macro.dbt_postgres.postgres__any_value" @@ -207,6 +219,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro any_value(expression) -%}\n {{ return(adapter.dispatch('any_value', 'dbt') (expression)) }}\n{% endmacro %}", @@ -218,12 +231,13 @@ "path": "macros/utils/any_value.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.any_value" }, "macro.dbt.apply_grants": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.50767, "depends_on": { "macros": [ "macro.dbt.default__apply_grants" @@ -231,6 +245,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro apply_grants(relation, grant_config, should_revoke) %}\n {{ return(adapter.dispatch(\"apply_grants\", \"dbt\")(relation, grant_config, should_revoke)) }}\n{% endmacro %}", @@ -242,12 +257,91 @@ "path": "macros/adapters/apply_grants.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.apply_grants" }, + "macro.dbt.array_append": { + "arguments": [], + "created_at": 1670298551.470447, + "depends_on": { + "macros": [ + "macro.dbt.default__array_append" + ] + }, + "description": "", + "docs": { + "node_color": null, + "show": true + }, + "macro_sql": "{% macro array_append(array, new_element) -%}\n {{ return(adapter.dispatch('array_append', 'dbt')(array, new_element)) }}\n{%- endmacro %}", + "meta": {}, + "name": "array_append", + "original_file_path": "macros/utils/array_append.sql", + "package_name": "dbt", + "patch_path": null, + "path": "macros/utils/array_append.sql", + "resource_type": "macro", + "root_path": "/some-path/sample-dbt", + "supported_languages": null, + "tags": [], + "unique_id": "macro.dbt.array_append" + }, + "macro.dbt.array_concat": { + "arguments": [], + "created_at": 1670298551.4605079, + "depends_on": { + "macros": [ + "macro.dbt.default__array_concat" + ] + }, + "description": "", + "docs": { + "node_color": null, + "show": true + }, + "macro_sql": "{% macro array_concat(array_1, array_2) -%}\n {{ return(adapter.dispatch('array_concat', 'dbt')(array_1, array_2)) }}\n{%- endmacro %}", + "meta": {}, + "name": "array_concat", + "original_file_path": "macros/utils/array_concat.sql", + "package_name": "dbt", + "patch_path": null, + "path": "macros/utils/array_concat.sql", + "resource_type": "macro", + "root_path": "/some-path/sample-dbt", + "supported_languages": null, + "tags": [], + "unique_id": "macro.dbt.array_concat" + }, + "macro.dbt.array_construct": { + "arguments": [], + "created_at": 1670298551.46897, + "depends_on": { + "macros": [ + "macro.dbt.default__array_construct" + ] + }, + "description": "", + "docs": { + "node_color": null, + "show": true + }, + "macro_sql": "{% macro array_construct(inputs=[], data_type=api.Column.translate_type('integer')) -%}\n {{ return(adapter.dispatch('array_construct', 'dbt')(inputs, data_type)) }}\n{%- endmacro %}", + "meta": {}, + "name": "array_construct", + "original_file_path": "macros/utils/array_construct.sql", + "package_name": "dbt", + "patch_path": null, + "path": "macros/utils/array_construct.sql", + "resource_type": "macro", + "root_path": "/some-path/sample-dbt", + "supported_languages": null, + "tags": [], + "unique_id": "macro.dbt.array_construct" + }, "macro.dbt.before_begin": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.242221, "depends_on": { "macros": [ "macro.dbt.make_hook_config" @@ -255,6 +349,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro before_begin(sql) %}\n {{ make_hook_config(sql, inside_transaction=False) }}\n{% endmacro %}", @@ -266,12 +361,13 @@ "path": "macros/materializations/hooks.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.before_begin" }, "macro.dbt.bool_or": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.461714, "depends_on": { "macros": [ "macro.dbt.default__bool_or" @@ -279,6 +375,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro bool_or(expression) -%}\n {{ return(adapter.dispatch('bool_or', 'dbt') (expression)) }}\n{% endmacro %}", @@ -290,12 +387,61 @@ "path": "macros/utils/bool_or.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.bool_or" }, + "macro.dbt.build_config_dict": { + "arguments": [], + "created_at": 1670298551.5360699, + "depends_on": { + "macros": [] + }, + "description": "", + "docs": { + "node_color": null, + "show": true + }, + "macro_sql": "{% macro build_config_dict(model) %}\n {%- set config_dict = {} -%}\n {%- for key in model.config.config_keys_used -%}\n {# weird type testing with enum, would be much easier to write this logic in Python! #}\n {%- if key == 'language' -%}\n {%- set value = 'python' -%}\n {%- endif -%}\n {%- set value = model.config[key] -%}\n {%- do config_dict.update({key: value}) -%}\n {%- endfor -%}\nconfig_dict = {{ config_dict }}\n{% endmacro %}", + "meta": {}, + "name": "build_config_dict", + "original_file_path": "macros/python_model/python.sql", + "package_name": "dbt", + "patch_path": null, + "path": "macros/python_model/python.sql", + "resource_type": "macro", + "root_path": "/some-path/sample-dbt", + "supported_languages": null, + "tags": [], + "unique_id": "macro.dbt.build_config_dict" + }, + "macro.dbt.build_ref_function": { + "arguments": [], + "created_at": 1670298551.5341718, + "depends_on": { + "macros": [] + }, + "description": "", + "docs": { + "node_color": null, + "show": true + }, + "macro_sql": "{% macro build_ref_function(model) %}\n\n {%- set ref_dict = {} -%}\n {%- for _ref in model.refs -%}\n {%- set resolved = ref(*_ref) -%}\n {%- do ref_dict.update({_ref | join(\".\"): resolved.quote(database=False, schema=False, identifier=False) | string}) -%}\n {%- endfor -%}\n\ndef ref(*args,dbt_load_df_function):\n refs = {{ ref_dict | tojson }}\n key = \".\".join(args)\n return dbt_load_df_function(refs[key])\n\n{% endmacro %}", + "meta": {}, + "name": "build_ref_function", + "original_file_path": "macros/python_model/python.sql", + "package_name": "dbt", + "patch_path": null, + "path": "macros/python_model/python.sql", + "resource_type": "macro", + "root_path": "/some-path/sample-dbt", + "supported_languages": null, + "tags": [], + "unique_id": "macro.dbt.build_ref_function" + }, "macro.dbt.build_snapshot_staging_table": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.27379, "depends_on": { "macros": [ "macro.dbt.make_temp_relation", @@ -306,6 +452,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro build_snapshot_staging_table(strategy, sql, target_relation) %}\n {% set temp_relation = make_temp_relation(target_relation) %}\n\n {% set select = snapshot_staging_table(strategy, sql, target_relation) %}\n\n {% call statement('build_snapshot_staging_relation') %}\n {{ create_table_as(True, temp_relation, select) }}\n {% endcall %}\n\n {% do return(temp_relation) %}\n{% endmacro %}", @@ -317,12 +464,13 @@ "path": "macros/materializations/snapshots/helpers.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.build_snapshot_staging_table" }, "macro.dbt.build_snapshot_table": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.2724018, "depends_on": { "macros": [ "macro.dbt.default__build_snapshot_table" @@ -330,6 +478,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro build_snapshot_table(strategy, sql) -%}\n {{ adapter.dispatch('build_snapshot_table', 'dbt')(strategy, sql) }}\n{% endmacro %}", @@ -341,12 +490,37 @@ "path": "macros/materializations/snapshots/helpers.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.build_snapshot_table" }, + "macro.dbt.build_source_function": { + "arguments": [], + "created_at": 1670298551.5351338, + "depends_on": { + "macros": [] + }, + "description": "", + "docs": { + "node_color": null, + "show": true + }, + "macro_sql": "{% macro build_source_function(model) %}\n\n {%- set source_dict = {} -%}\n {%- for _source in model.sources -%}\n {%- set resolved = source(*_source) -%}\n {%- do source_dict.update({_source | join(\".\"): resolved.quote(database=False, schema=False, identifier=False) | string}) -%}\n {%- endfor -%}\n\ndef source(*args, dbt_load_df_function):\n sources = {{ source_dict | tojson }}\n key = \".\".join(args)\n return dbt_load_df_function(sources[key])\n\n{% endmacro %}", + "meta": {}, + "name": "build_source_function", + "original_file_path": "macros/python_model/python.sql", + "package_name": "dbt", + "patch_path": null, + "path": "macros/python_model/python.sql", + "resource_type": "macro", + "root_path": "/some-path/sample-dbt", + "supported_languages": null, + "tags": [], + "unique_id": "macro.dbt.build_source_function" + }, "macro.dbt.call_dcl_statements": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.5065012, "depends_on": { "macros": [ "macro.dbt.default__call_dcl_statements" @@ -354,6 +528,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro call_dcl_statements(dcl_statement_list) %}\n {{ return(adapter.dispatch(\"call_dcl_statements\", \"dbt\")(dcl_statement_list)) }}\n{% endmacro %}", @@ -365,12 +540,13 @@ "path": "macros/adapters/apply_grants.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.call_dcl_statements" }, "macro.dbt.cast_bool_to_text": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.44711, "depends_on": { "macros": [ "macro.dbt.default__cast_bool_to_text" @@ -378,6 +554,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro cast_bool_to_text(field) %}\n {{ adapter.dispatch('cast_bool_to_text', 'dbt') (field) }}\n{% endmacro %}", @@ -389,12 +566,13 @@ "path": "macros/utils/cast_bool_to_text.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.cast_bool_to_text" }, "macro.dbt.check_for_schema_changes": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.339218, "depends_on": { "macros": [ "macro.dbt.diff_columns", @@ -403,6 +581,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro check_for_schema_changes(source_relation, target_relation) %}\n\n {% set schema_changed = False %}\n\n {%- set source_columns = adapter.get_columns_in_relation(source_relation) -%}\n {%- set target_columns = adapter.get_columns_in_relation(target_relation) -%}\n {%- set source_not_in_target = diff_columns(source_columns, target_columns) -%}\n {%- set target_not_in_source = diff_columns(target_columns, source_columns) -%}\n\n {% set new_target_types = diff_column_data_types(source_columns, target_columns) %}\n\n {% if source_not_in_target != [] %}\n {% set schema_changed = True %}\n {% elif target_not_in_source != [] or new_target_types != [] %}\n {% set schema_changed = True %}\n {% elif new_target_types != [] %}\n {% set schema_changed = True %}\n {% endif %}\n\n {% set changes_dict = {\n 'schema_changed': schema_changed,\n 'source_not_in_target': source_not_in_target,\n 'target_not_in_source': target_not_in_source,\n 'source_columns': source_columns,\n 'target_columns': target_columns,\n 'new_target_types': new_target_types\n } %}\n\n {% set msg %}\n In {{ target_relation }}:\n Schema changed: {{ schema_changed }}\n Source columns not in target: {{ source_not_in_target }}\n Target columns not in source: {{ target_not_in_source }}\n New column types: {{ new_target_types }}\n {% endset %}\n\n {% do log(msg) %}\n\n {{ return(changes_dict) }}\n\n{% endmacro %}", @@ -414,12 +593,13 @@ "path": "macros/materializations/models/incremental/on_schema_change.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.check_for_schema_changes" }, "macro.dbt.check_schema_exists": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.521215, "depends_on": { "macros": [ "macro.dbt_postgres.postgres__check_schema_exists" @@ -427,6 +607,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro check_schema_exists(information_schema, schema) -%}\n {{ return(adapter.dispatch('check_schema_exists', 'dbt')(information_schema, schema)) }}\n{% endmacro %}", @@ -438,12 +619,13 @@ "path": "macros/adapters/metadata.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.check_schema_exists" }, "macro.dbt.collect_freshness": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.495685, "depends_on": { "macros": [ "macro.dbt.default__collect_freshness" @@ -451,6 +633,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro collect_freshness(source, loaded_at_field, filter) %}\n {{ return(adapter.dispatch('collect_freshness', 'dbt')(source, loaded_at_field, filter))}}\n{% endmacro %}", @@ -462,12 +645,13 @@ "path": "macros/adapters/freshness.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.collect_freshness" }, "macro.dbt.concat": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.407477, "depends_on": { "macros": [ "macro.dbt.default__concat" @@ -475,6 +659,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro concat(fields) -%}\n {{ return(adapter.dispatch('concat', 'dbt')(fields)) }}\n{%- endmacro %}", @@ -486,17 +671,19 @@ "path": "macros/utils/concat.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.concat" }, "macro.dbt.convert_datetime": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.4006772, "depends_on": { "macros": [] }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro convert_datetime(date_str, date_fmt) %}\n\n {% set error_msg -%}\n The provided partition date '{{ date_str }}' does not match the expected format '{{ date_fmt }}'\n {%- endset %}\n\n {% set res = try_or_compiler_error(error_msg, modules.datetime.datetime.strptime, date_str.strip(), date_fmt) %}\n {{ return(res) }}\n\n{% endmacro %}", @@ -508,12 +695,13 @@ "path": "macros/etc/datetime.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.convert_datetime" }, "macro.dbt.copy_grants": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.499706, "depends_on": { "macros": [ "macro.dbt_postgres.postgres__copy_grants" @@ -521,6 +709,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro copy_grants() %}\n {{ return(adapter.dispatch('copy_grants', 'dbt')()) }}\n{% endmacro %}", @@ -532,12 +721,13 @@ "path": "macros/adapters/apply_grants.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.copy_grants" }, "macro.dbt.create_columns": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.268071, "depends_on": { "macros": [ "macro.dbt.default__create_columns" @@ -545,6 +735,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro create_columns(relation, columns) %}\n {{ adapter.dispatch('create_columns', 'dbt')(relation, columns) }}\n{% endmacro %}", @@ -556,12 +747,13 @@ "path": "macros/materializations/snapshots/helpers.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.create_columns" }, "macro.dbt.create_csv_table": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.3752189, "depends_on": { "macros": [ "macro.dbt.default__create_csv_table" @@ -569,6 +761,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro create_csv_table(model, agate_table) -%}\n {{ adapter.dispatch('create_csv_table', 'dbt')(model, agate_table) }}\n{%- endmacro %}", @@ -580,12 +773,13 @@ "path": "macros/materializations/seeds/helpers.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.create_csv_table" }, "macro.dbt.create_indexes": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.478535, "depends_on": { "macros": [ "macro.dbt.default__create_indexes" @@ -593,6 +787,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro create_indexes(relation) -%}\n {{ adapter.dispatch('create_indexes', 'dbt')(relation) }}\n{%- endmacro %}", @@ -604,12 +799,13 @@ "path": "macros/adapters/indexes.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.create_indexes" }, "macro.dbt.create_or_replace_view": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.359894, "depends_on": { "macros": [ "macro.dbt.run_hooks", @@ -623,6 +819,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro create_or_replace_view() %}\n {%- set identifier = model['alias'] -%}\n\n {%- set old_relation = adapter.get_relation(database=database, schema=schema, identifier=identifier) -%}\n {%- set exists_as_view = (old_relation is not none and old_relation.is_view) -%}\n\n {%- set target_relation = api.Relation.create(\n identifier=identifier, schema=schema, database=database,\n type='view') -%}\n {% set grant_config = config.get('grants') %}\n\n {{ run_hooks(pre_hooks) }}\n\n -- If there's a table with the same name and we weren't told to full refresh,\n -- that's an error. If we were told to full refresh, drop it. This behavior differs\n -- for Snowflake and BigQuery, so multiple dispatch is used.\n {%- if old_relation is not none and old_relation.is_table -%}\n {{ handle_existing_table(should_full_refresh(), old_relation) }}\n {%- endif -%}\n\n -- build model\n {% call statement('main') -%}\n {{ get_create_view_as_sql(target_relation, sql) }}\n {%- endcall %}\n\n {% set should_revoke = should_revoke(exists_as_view, full_refresh_mode=True) %}\n {% do apply_grants(target_relation, grant_config, should_revoke=True) %}\n\n {{ run_hooks(post_hooks) }}\n\n {{ return({'relations': [target_relation]}) }}\n\n{% endmacro %}", @@ -634,12 +831,13 @@ "path": "macros/materializations/models/view/create_or_replace_view.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.create_or_replace_view" }, "macro.dbt.create_schema": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.471772, "depends_on": { "macros": [ "macro.dbt_postgres.postgres__create_schema" @@ -647,6 +845,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro create_schema(relation) -%}\n {{ adapter.dispatch('create_schema', 'dbt')(relation) }}\n{% endmacro %}", @@ -658,12 +857,13 @@ "path": "macros/adapters/schema.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.create_schema" }, "macro.dbt.create_table_as": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.350219, "depends_on": { "macros": [ "macro.dbt_postgres.postgres__create_table_as" @@ -671,9 +871,10 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, - "macro_sql": "{% macro create_table_as(temporary, relation, sql) -%}\n {{ adapter.dispatch('create_table_as', 'dbt')(temporary, relation, sql) }}\n{%- endmacro %}", + "macro_sql": "{% macro create_table_as(temporary, relation, compiled_code, language='sql') -%}\n {# backward compatibility for create_table_as that does not support language #}\n {% if language == \"sql\" %}\n {{ adapter.dispatch('create_table_as', 'dbt')(temporary, relation, compiled_code)}}\n {% else %}\n {{ adapter.dispatch('create_table_as', 'dbt')(temporary, relation, compiled_code, language) }}\n {% endif %}\n\n{%- endmacro %}", "meta": {}, "name": "create_table_as", "original_file_path": "macros/materializations/models/table/create_table_as.sql", @@ -682,12 +883,13 @@ "path": "macros/materializations/models/table/create_table_as.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.create_table_as" }, "macro.dbt.create_view_as": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.36138, "depends_on": { "macros": [ "macro.dbt.default__create_view_as" @@ -695,6 +897,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro create_view_as(relation, sql) -%}\n {{ adapter.dispatch('create_view_as', 'dbt')(relation, sql) }}\n{%- endmacro %}", @@ -706,12 +909,13 @@ "path": "macros/materializations/models/view/create_view_as.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.create_view_as" }, "macro.dbt.current_timestamp": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.474187, "depends_on": { "macros": [ "macro.dbt_postgres.postgres__current_timestamp" @@ -719,23 +923,77 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, - "macro_sql": "{% macro current_timestamp() -%}\n {{ adapter.dispatch('current_timestamp', 'dbt')() }}\n{%- endmacro %}", + "macro_sql": "{%- macro current_timestamp() -%}\n {{ adapter.dispatch('current_timestamp', 'dbt')() }}\n{%- endmacro -%}\n\n", "meta": {}, "name": "current_timestamp", - "original_file_path": "macros/adapters/freshness.sql", + "original_file_path": "macros/adapters/timestamps.sql", "package_name": "dbt", "patch_path": null, - "path": "macros/adapters/freshness.sql", + "path": "macros/adapters/timestamps.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.current_timestamp" }, + "macro.dbt.current_timestamp_backcompat": { + "arguments": [], + "created_at": 1670298551.47562, + "depends_on": { + "macros": [ + "macro.dbt_postgres.postgres__current_timestamp_backcompat" + ] + }, + "description": "", + "docs": { + "node_color": null, + "show": true + }, + "macro_sql": "{% macro current_timestamp_backcompat() %}\n {{ return(adapter.dispatch('current_timestamp_backcompat', 'dbt')()) }}\n{% endmacro %}", + "meta": {}, + "name": "current_timestamp_backcompat", + "original_file_path": "macros/adapters/timestamps.sql", + "package_name": "dbt", + "patch_path": null, + "path": "macros/adapters/timestamps.sql", + "resource_type": "macro", + "root_path": "/some-path/sample-dbt", + "supported_languages": null, + "tags": [], + "unique_id": "macro.dbt.current_timestamp_backcompat" + }, + "macro.dbt.current_timestamp_in_utc_backcompat": { + "arguments": [], + "created_at": 1670298551.4763389, + "depends_on": { + "macros": [ + "macro.dbt_postgres.postgres__current_timestamp_in_utc_backcompat" + ] + }, + "description": "", + "docs": { + "node_color": null, + "show": true + }, + "macro_sql": "{% macro current_timestamp_in_utc_backcompat() %}\n {{ return(adapter.dispatch('current_timestamp_in_utc_backcompat', 'dbt')()) }}\n{% endmacro %}", + "meta": {}, + "name": "current_timestamp_in_utc_backcompat", + "original_file_path": "macros/adapters/timestamps.sql", + "package_name": "dbt", + "patch_path": null, + "path": "macros/adapters/timestamps.sql", + "resource_type": "macro", + "root_path": "/some-path/sample-dbt", + "supported_languages": null, + "tags": [], + "unique_id": "macro.dbt.current_timestamp_in_utc_backcompat" + }, "macro.dbt.date_trunc": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.4675012, "depends_on": { "macros": [ "macro.dbt.default__date_trunc" @@ -743,6 +1001,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro date_trunc(datepart, date) -%}\n {{ return(adapter.dispatch('date_trunc', 'dbt') (datepart, date)) }}\n{%- endmacro %}", @@ -754,12 +1013,13 @@ "path": "macros/utils/date_trunc.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.date_trunc" }, "macro.dbt.dateadd": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.409394, "depends_on": { "macros": [ "macro.dbt_postgres.postgres__dateadd" @@ -767,6 +1027,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro dateadd(datepart, interval, from_date_or_timestamp) %}\n {{ return(adapter.dispatch('dateadd', 'dbt')(datepart, interval, from_date_or_timestamp)) }}\n{% endmacro %}", @@ -778,12 +1039,13 @@ "path": "macros/utils/dateadd.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.dateadd" }, "macro.dbt.datediff": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.442951, "depends_on": { "macros": [ "macro.dbt_postgres.postgres__datediff" @@ -791,6 +1053,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro datediff(first_date, second_date, datepart) %}\n {{ return(adapter.dispatch('datediff', 'dbt')(first_date, second_date, datepart)) }}\n{% endmacro %}", @@ -802,12 +1065,13 @@ "path": "macros/utils/datediff.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.datediff" }, "macro.dbt.dates_in_range": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.402984, "depends_on": { "macros": [ "macro.dbt.convert_datetime" @@ -815,6 +1079,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro dates_in_range(start_date_str, end_date_str=none, in_fmt=\"%Y%m%d\", out_fmt=\"%Y%m%d\") %}\n {% set end_date_str = start_date_str if end_date_str is none else end_date_str %}\n\n {% set start_date = convert_datetime(start_date_str, in_fmt) %}\n {% set end_date = convert_datetime(end_date_str, in_fmt) %}\n\n {% set day_count = (end_date - start_date).days %}\n {% if day_count < 0 %}\n {% set msg -%}\n Partiton start date is after the end date ({{ start_date }}, {{ end_date }})\n {%- endset %}\n\n {{ exceptions.raise_compiler_error(msg, model) }}\n {% endif %}\n\n {% set date_list = [] %}\n {% for i in range(0, day_count + 1) %}\n {% set the_date = (modules.datetime.timedelta(days=i) + start_date) %}\n {% if not out_fmt %}\n {% set _ = date_list.append(the_date) %}\n {% else %}\n {% set _ = date_list.append(the_date.strftime(out_fmt)) %}\n {% endif %}\n {% endfor %}\n\n {{ return(date_list) }}\n{% endmacro %}", @@ -826,17 +1091,19 @@ "path": "macros/etc/datetime.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.dates_in_range" }, "macro.dbt.default__alter_column_comment": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.5124328, "depends_on": { "macros": [] }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro default__alter_column_comment(relation, column_dict) -%}\n {{ exceptions.raise_not_implemented(\n 'alter_column_comment macro not implemented for adapter '+adapter.type()) }}\n{% endmacro %}", @@ -848,12 +1115,13 @@ "path": "macros/adapters/persist_docs.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.default__alter_column_comment" }, "macro.dbt.default__alter_column_type": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.52948, "depends_on": { "macros": [ "macro.dbt.statement" @@ -861,6 +1129,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro default__alter_column_type(relation, column_name, new_column_type) -%}\n {#\n 1. Create a new column (w/ temp name and correct type)\n 2. Copy data over to it\n 3. Drop the existing column (cascade!)\n 4. Rename the new column to existing column\n #}\n {%- set tmp_column = column_name + \"__dbt_alter\" -%}\n\n {% call statement('alter_column_type') %}\n alter table {{ relation }} add column {{ adapter.quote(tmp_column) }} {{ new_column_type }};\n update {{ relation }} set {{ adapter.quote(tmp_column) }} = {{ adapter.quote(column_name) }};\n alter table {{ relation }} drop column {{ adapter.quote(column_name) }} cascade;\n alter table {{ relation }} rename column {{ adapter.quote(tmp_column) }} to {{ adapter.quote(column_name) }}\n {% endcall %}\n\n{% endmacro %}", @@ -872,12 +1141,13 @@ "path": "macros/adapters/columns.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.default__alter_column_type" }, "macro.dbt.default__alter_relation_add_remove_columns": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.53156, "depends_on": { "macros": [ "macro.dbt.run_query" @@ -885,6 +1155,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro default__alter_relation_add_remove_columns(relation, add_columns, remove_columns) %}\n\n {% if add_columns is none %}\n {% set add_columns = [] %}\n {% endif %}\n {% if remove_columns is none %}\n {% set remove_columns = [] %}\n {% endif %}\n\n {% set sql -%}\n\n alter {{ relation.type }} {{ relation }}\n\n {% for column in add_columns %}\n add column {{ column.name }} {{ column.data_type }}{{ ',' if not loop.last }}\n {% endfor %}{{ ',' if add_columns and remove_columns }}\n\n {% for column in remove_columns %}\n drop column {{ column.name }}{{ ',' if not loop.last }}\n {% endfor %}\n\n {%- endset -%}\n\n {% do run_query(sql) %}\n\n{% endmacro %}", @@ -896,17 +1167,19 @@ "path": "macros/adapters/columns.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.default__alter_relation_add_remove_columns" }, "macro.dbt.default__alter_relation_comment": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.513366, "depends_on": { "macros": [] }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro default__alter_relation_comment(relation, relation_comment) -%}\n {{ exceptions.raise_not_implemented(\n 'alter_relation_comment macro not implemented for adapter '+adapter.type()) }}\n{% endmacro %}", @@ -918,17 +1191,19 @@ "path": "macros/adapters/persist_docs.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.default__alter_relation_comment" }, "macro.dbt.default__any_value": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.448807, "depends_on": { "macros": [] }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro default__any_value(expression) -%}\n\n any_value({{ expression }})\n\n{%- endmacro %}", @@ -940,12 +1215,13 @@ "path": "macros/utils/any_value.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.default__any_value" }, "macro.dbt.default__apply_grants": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.510319, "depends_on": { "macros": [ "macro.dbt.run_query", @@ -956,6 +1232,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro default__apply_grants(relation, grant_config, should_revoke=True) %}\n {#-- If grant_config is {} or None, this is a no-op --#}\n {% if grant_config %}\n {% if should_revoke %}\n {#-- We think previous grants may have carried over --#}\n {#-- Show current grants and calculate diffs --#}\n {% set current_grants_table = run_query(get_show_grant_sql(relation)) %}\n {% set current_grants_dict = adapter.standardize_grants_dict(current_grants_table) %}\n {% set needs_granting = diff_of_two_dicts(grant_config, current_grants_dict) %}\n {% set needs_revoking = diff_of_two_dicts(current_grants_dict, grant_config) %}\n {% if not (needs_granting or needs_revoking) %}\n {{ log('On ' ~ relation ~': All grants are in place, no revocation or granting needed.')}}\n {% endif %}\n {% else %}\n {#-- We don't think there's any chance of previous grants having carried over. --#}\n {#-- Jump straight to granting what the user has configured. --#}\n {% set needs_revoking = {} %}\n {% set needs_granting = grant_config %}\n {% endif %}\n {% if needs_granting or needs_revoking %}\n {% set revoke_statement_list = get_dcl_statement_list(relation, needs_revoking, get_revoke_sql) %}\n {% set grant_statement_list = get_dcl_statement_list(relation, needs_granting, get_grant_sql) %}\n {% set dcl_statement_list = revoke_statement_list + grant_statement_list %}\n {% if dcl_statement_list %}\n {{ call_dcl_statements(dcl_statement_list) }}\n {% endif %}\n {% endif %}\n {% endif %}\n{% endmacro %}", @@ -967,17 +1244,91 @@ "path": "macros/adapters/apply_grants.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.default__apply_grants" }, + "macro.dbt.default__array_append": { + "arguments": [], + "created_at": 1670298551.470784, + "depends_on": { + "macros": [] + }, + "description": "", + "docs": { + "node_color": null, + "show": true + }, + "macro_sql": "{% macro default__array_append(array, new_element) -%}\n array_append({{ array }}, {{ new_element }})\n{%- endmacro %}", + "meta": {}, + "name": "default__array_append", + "original_file_path": "macros/utils/array_append.sql", + "package_name": "dbt", + "patch_path": null, + "path": "macros/utils/array_append.sql", + "resource_type": "macro", + "root_path": "/some-path/sample-dbt", + "supported_languages": null, + "tags": [], + "unique_id": "macro.dbt.default__array_append" + }, + "macro.dbt.default__array_concat": { + "arguments": [], + "created_at": 1670298551.460879, + "depends_on": { + "macros": [] + }, + "description": "", + "docs": { + "node_color": null, + "show": true + }, + "macro_sql": "{% macro default__array_concat(array_1, array_2) -%}\n array_cat({{ array_1 }}, {{ array_2 }})\n{%- endmacro %}", + "meta": {}, + "name": "default__array_concat", + "original_file_path": "macros/utils/array_concat.sql", + "package_name": "dbt", + "patch_path": null, + "path": "macros/utils/array_concat.sql", + "resource_type": "macro", + "root_path": "/some-path/sample-dbt", + "supported_languages": null, + "tags": [], + "unique_id": "macro.dbt.default__array_concat" + }, + "macro.dbt.default__array_construct": { + "arguments": [], + "created_at": 1670298551.469611, + "depends_on": { + "macros": [] + }, + "description": "", + "docs": { + "node_color": null, + "show": true + }, + "macro_sql": "{% macro default__array_construct(inputs, data_type) -%}\n {% if inputs|length > 0 %}\n array[ {{ inputs|join(' , ') }} ]\n {% else %}\n array[]::{{data_type}}[]\n {% endif %}\n{%- endmacro %}", + "meta": {}, + "name": "default__array_construct", + "original_file_path": "macros/utils/array_construct.sql", + "package_name": "dbt", + "patch_path": null, + "path": "macros/utils/array_construct.sql", + "resource_type": "macro", + "root_path": "/some-path/sample-dbt", + "supported_languages": null, + "tags": [], + "unique_id": "macro.dbt.default__array_construct" + }, "macro.dbt.default__bool_or": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.4620101, "depends_on": { "macros": [] }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro default__bool_or(expression) -%}\n\n bool_or({{ expression }})\n\n{%- endmacro %}", @@ -989,17 +1340,19 @@ "path": "macros/utils/bool_or.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.default__bool_or" }, "macro.dbt.default__build_snapshot_table": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.272919, "depends_on": { "macros": [] }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro default__build_snapshot_table(strategy, sql) %}\n\n select *,\n {{ strategy.scd_id }} as dbt_scd_id,\n {{ strategy.updated_at }} as dbt_updated_at,\n {{ strategy.updated_at }} as dbt_valid_from,\n nullif({{ strategy.updated_at }}, {{ strategy.updated_at }}) as dbt_valid_to\n from (\n {{ sql }}\n ) sbq\n\n{% endmacro %}", @@ -1011,12 +1364,13 @@ "path": "macros/materializations/snapshots/helpers.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.default__build_snapshot_table" }, "macro.dbt.default__call_dcl_statements": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.507085, "depends_on": { "macros": [ "macro.dbt.statement" @@ -1024,6 +1378,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro default__call_dcl_statements(dcl_statement_list) %}\n {#\n -- By default, supply all grant + revoke statements in a single semicolon-separated block,\n -- so that they're all processed together.\n\n -- Some databases do not support this. Those adapters will need to override this macro\n -- to run each statement individually.\n #}\n {% call statement('grants') %}\n {% for dcl_statement in dcl_statement_list %}\n {{ dcl_statement }};\n {% endfor %}\n {% endcall %}\n{% endmacro %}", @@ -1035,17 +1390,19 @@ "path": "macros/adapters/apply_grants.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.default__call_dcl_statements" }, "macro.dbt.default__cast_bool_to_text": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.4475598, "depends_on": { "macros": [] }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro default__cast_bool_to_text(field) %}\n cast({{ field }} as {{ api.Column.translate_type('string') }})\n{% endmacro %}", @@ -1057,12 +1414,13 @@ "path": "macros/utils/cast_bool_to_text.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.default__cast_bool_to_text" }, "macro.dbt.default__check_schema_exists": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.521954, "depends_on": { "macros": [ "macro.dbt.replace", @@ -1071,6 +1429,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro default__check_schema_exists(information_schema, schema) -%}\n {% set sql -%}\n select count(*)\n from {{ information_schema.replace(information_schema_view='SCHEMATA') }}\n where catalog_name='{{ information_schema.database }}'\n and schema_name='{{ schema }}'\n {%- endset %}\n {{ return(run_query(sql)) }}\n{% endmacro %}", @@ -1082,12 +1441,13 @@ "path": "macros/adapters/metadata.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.default__check_schema_exists" }, "macro.dbt.default__collect_freshness": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.496642, "depends_on": { "macros": [ "macro.dbt.statement", @@ -1096,6 +1456,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro default__collect_freshness(source, loaded_at_field, filter) %}\n {% call statement('collect_freshness', fetch_result=True, auto_begin=False) -%}\n select\n max({{ loaded_at_field }}) as max_loaded_at,\n {{ current_timestamp() }} as snapshotted_at\n from {{ source }}\n {% if filter %}\n where {{ filter }}\n {% endif %}\n {% endcall %}\n {{ return(load_result('collect_freshness').table) }}\n{% endmacro %}", @@ -1107,17 +1468,19 @@ "path": "macros/adapters/freshness.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.default__collect_freshness" }, "macro.dbt.default__concat": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.407736, "depends_on": { "macros": [] }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro default__concat(fields) -%}\n {{ fields|join(' || ') }}\n{%- endmacro %}", @@ -1129,17 +1492,19 @@ "path": "macros/utils/concat.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.default__concat" }, "macro.dbt.default__copy_grants": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.49998, "depends_on": { "macros": [] }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro default__copy_grants() %}\n {{ return(True) }}\n{% endmacro %}", @@ -1151,12 +1516,13 @@ "path": "macros/adapters/apply_grants.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.default__copy_grants" }, "macro.dbt.default__create_columns": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.268671, "depends_on": { "macros": [ "macro.dbt.statement" @@ -1164,6 +1530,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro default__create_columns(relation, columns) %}\n {% for column in columns %}\n {% call statement() %}\n alter table {{ relation }} add column \"{{ column.name }}\" {{ column.data_type }};\n {% endcall %}\n {% endfor %}\n{% endmacro %}", @@ -1175,12 +1542,13 @@ "path": "macros/materializations/snapshots/helpers.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.default__create_columns" }, "macro.dbt.default__create_csv_table": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.376986, "depends_on": { "macros": [ "macro.dbt.statement" @@ -1188,6 +1556,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro default__create_csv_table(model, agate_table) %}\n {%- set column_override = model['config'].get('column_types', {}) -%}\n {%- set quote_seed_column = model['config'].get('quote_columns', None) -%}\n\n {% set sql %}\n create table {{ this.render() }} (\n {%- for col_name in agate_table.column_names -%}\n {%- set inferred_type = adapter.convert_type(agate_table, loop.index0) -%}\n {%- set type = column_override.get(col_name, inferred_type) -%}\n {%- set column_name = (col_name | string) -%}\n {{ adapter.quote_seed_column(column_name, quote_seed_column) }} {{ type }} {%- if not loop.last -%}, {%- endif -%}\n {%- endfor -%}\n )\n {% endset %}\n\n {% call statement('_') -%}\n {{ sql }}\n {%- endcall %}\n\n {{ return(sql) }}\n{% endmacro %}", @@ -1199,12 +1568,13 @@ "path": "macros/materializations/seeds/helpers.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.default__create_csv_table" }, "macro.dbt.default__create_indexes": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.4794502, "depends_on": { "macros": [ "macro.dbt.get_create_index_sql", @@ -1213,6 +1583,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro default__create_indexes(relation) -%}\n {%- set _indexes = config.get('indexes', default=[]) -%}\n\n {% for _index_dict in _indexes %}\n {% set create_index_sql = get_create_index_sql(relation, _index_dict) %}\n {% if create_index_sql %}\n {% do run_query(create_index_sql) %}\n {% endif %}\n {% endfor %}\n{% endmacro %}", @@ -1224,12 +1595,13 @@ "path": "macros/adapters/indexes.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.default__create_indexes" }, "macro.dbt.default__create_schema": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.472215, "depends_on": { "macros": [ "macro.dbt.statement" @@ -1237,6 +1609,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro default__create_schema(relation) -%}\n {%- call statement('create_schema') -%}\n create schema if not exists {{ relation.without_identifier() }}\n {% endcall %}\n{% endmacro %}", @@ -1248,17 +1621,19 @@ "path": "macros/adapters/schema.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.default__create_schema" }, "macro.dbt.default__create_table_as": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.351029, "depends_on": { "macros": [] }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro default__create_table_as(temporary, relation, sql) -%}\n {%- set sql_header = config.get('sql_header', none) -%}\n\n {{ sql_header if sql_header is not none }}\n\n create {% if temporary: -%}temporary{%- endif %} table\n {{ relation.include(database=(not temporary), schema=(not temporary)) }}\n as (\n {{ sql }}\n );\n{%- endmacro %}", @@ -1270,17 +1645,19 @@ "path": "macros/materializations/models/table/create_table_as.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.default__create_table_as" }, "macro.dbt.default__create_view_as": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.361897, "depends_on": { "macros": [] }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro default__create_view_as(relation, sql) -%}\n {%- set sql_header = config.get('sql_header', none) -%}\n\n {{ sql_header if sql_header is not none }}\n create view {{ relation }} as (\n {{ sql }}\n );\n{%- endmacro %}", @@ -1292,39 +1669,94 @@ "path": "macros/materializations/models/view/create_view_as.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.default__create_view_as" }, "macro.dbt.default__current_timestamp": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.474595, "depends_on": { "macros": [] }, "description": "", "docs": { + "node_color": null, "show": true }, - "macro_sql": "{% macro default__current_timestamp() -%}\n {{ exceptions.raise_not_implemented(\n 'current_timestamp macro not implemented for adapter '+adapter.type()) }}\n{%- endmacro %}", + "macro_sql": "{% macro default__current_timestamp() -%}\n {{ exceptions.raise_not_implemented(\n 'current_timestamp macro not implemented for adapter ' + adapter.type()) }}\n{%- endmacro %}", "meta": {}, "name": "default__current_timestamp", - "original_file_path": "macros/adapters/freshness.sql", + "original_file_path": "macros/adapters/timestamps.sql", "package_name": "dbt", "patch_path": null, - "path": "macros/adapters/freshness.sql", + "path": "macros/adapters/timestamps.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.default__current_timestamp" }, + "macro.dbt.default__current_timestamp_backcompat": { + "arguments": [], + "created_at": 1670298551.4758148, + "depends_on": { + "macros": [] + }, + "description": "", + "docs": { + "node_color": null, + "show": true + }, + "macro_sql": "{% macro default__current_timestamp_backcompat() %}\n current_timestamp::timestamp\n{% endmacro %}", + "meta": {}, + "name": "default__current_timestamp_backcompat", + "original_file_path": "macros/adapters/timestamps.sql", + "package_name": "dbt", + "patch_path": null, + "path": "macros/adapters/timestamps.sql", + "resource_type": "macro", + "root_path": "/some-path/sample-dbt", + "supported_languages": null, + "tags": [], + "unique_id": "macro.dbt.default__current_timestamp_backcompat" + }, + "macro.dbt.default__current_timestamp_in_utc_backcompat": { + "arguments": [], + "created_at": 1670298551.4767418, + "depends_on": { + "macros": [ + "macro.dbt.current_timestamp_backcompat", + "macro.dbt_postgres.postgres__current_timestamp_backcompat" + ] + }, + "description": "", + "docs": { + "node_color": null, + "show": true + }, + "macro_sql": "{% macro default__current_timestamp_in_utc_backcompat() %}\n {{ return(adapter.dispatch('current_timestamp_backcompat', 'dbt')()) }}\n{% endmacro %}", + "meta": {}, + "name": "default__current_timestamp_in_utc_backcompat", + "original_file_path": "macros/adapters/timestamps.sql", + "package_name": "dbt", + "patch_path": null, + "path": "macros/adapters/timestamps.sql", + "resource_type": "macro", + "root_path": "/some-path/sample-dbt", + "supported_languages": null, + "tags": [], + "unique_id": "macro.dbt.default__current_timestamp_in_utc_backcompat" + }, "macro.dbt.default__date_trunc": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.467838, "depends_on": { "macros": [] }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro default__date_trunc(datepart, date) -%}\n date_trunc('{{datepart}}', {{date}})\n{%- endmacro %}", @@ -1336,17 +1768,19 @@ "path": "macros/utils/date_trunc.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.default__date_trunc" }, "macro.dbt.default__dateadd": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.4097338, "depends_on": { "macros": [] }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro default__dateadd(datepart, interval, from_date_or_timestamp) %}\n\n dateadd(\n {{ datepart }},\n {{ interval }},\n {{ from_date_or_timestamp }}\n )\n\n{% endmacro %}", @@ -1358,17 +1792,19 @@ "path": "macros/utils/dateadd.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.default__dateadd" }, "macro.dbt.default__datediff": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.4434402, "depends_on": { "macros": [] }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro default__datediff(first_date, second_date, datepart) -%}\n\n datediff(\n {{ datepart }},\n {{ first_date }},\n {{ second_date }}\n )\n\n{%- endmacro %}", @@ -1380,12 +1816,13 @@ "path": "macros/utils/datediff.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.default__datediff" }, "macro.dbt.default__drop_relation": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.4889011, "depends_on": { "macros": [ "macro.dbt.statement" @@ -1393,6 +1830,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro default__drop_relation(relation) -%}\n {% call statement('drop_relation', auto_begin=False) -%}\n drop {{ relation.type }} if exists {{ relation }} cascade\n {%- endcall %}\n{% endmacro %}", @@ -1404,12 +1842,13 @@ "path": "macros/adapters/relation.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.default__drop_relation" }, "macro.dbt.default__drop_schema": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.473046, "depends_on": { "macros": [ "macro.dbt.statement" @@ -1417,6 +1856,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro default__drop_schema(relation) -%}\n {%- call statement('drop_schema') -%}\n drop schema if exists {{ relation.without_identifier() }} cascade\n {% endcall %}\n{% endmacro %}", @@ -1428,17 +1868,19 @@ "path": "macros/adapters/schema.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.default__drop_schema" }, "macro.dbt.default__escape_single_quotes": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.41143, "depends_on": { "macros": [] }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro default__escape_single_quotes(expression) -%}\n{{ expression | replace(\"'\",\"''\") }}\n{%- endmacro %}", @@ -1450,17 +1892,19 @@ "path": "macros/utils/escape_single_quotes.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.default__escape_single_quotes" }, "macro.dbt.default__except": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.405764, "depends_on": { "macros": [] }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro default__except() %}\n\n except\n\n{% endmacro %}", @@ -1472,17 +1916,19 @@ "path": "macros/utils/except.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.default__except" }, "macro.dbt.default__generate_alias_name": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.3859031, "depends_on": { "macros": [] }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro default__generate_alias_name(custom_alias_name=none, node=none) -%}\n\n {%- if custom_alias_name is none -%}\n\n {{ node.name }}\n\n {%- else -%}\n\n {{ custom_alias_name | trim }}\n\n {%- endif -%}\n\n{%- endmacro %}", @@ -1494,17 +1940,19 @@ "path": "macros/get_custom_name/get_custom_alias.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.default__generate_alias_name" }, "macro.dbt.default__generate_database_name": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.38943, "depends_on": { "macros": [] }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro default__generate_database_name(custom_database_name=none, node=none) -%}\n {%- set default_database = target.database -%}\n {%- if custom_database_name is none -%}\n\n {{ default_database }}\n\n {%- else -%}\n\n {{ custom_database_name }}\n\n {%- endif -%}\n\n{%- endmacro %}", @@ -1516,17 +1964,19 @@ "path": "macros/get_custom_name/get_custom_database.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.default__generate_database_name" }, "macro.dbt.default__generate_schema_name": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.387462, "depends_on": { "macros": [] }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro default__generate_schema_name(custom_schema_name, node) -%}\n\n {%- set default_schema = target.schema -%}\n {%- if custom_schema_name is none -%}\n\n {{ default_schema }}\n\n {%- else -%}\n\n {{ default_schema }}_{{ custom_schema_name | trim }}\n\n {%- endif -%}\n\n{%- endmacro %}", @@ -1538,17 +1988,19 @@ "path": "macros/get_custom_name/get_custom_schema.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.default__generate_schema_name" }, "macro.dbt.default__get_batch_size": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.3807201, "depends_on": { "macros": [] }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro default__get_batch_size() %}\n {{ return(10000) }}\n{% endmacro %}", @@ -1560,17 +2012,19 @@ "path": "macros/materializations/seeds/helpers.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.default__get_batch_size" }, "macro.dbt.default__get_binding_char": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.379793, "depends_on": { "macros": [] }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro default__get_binding_char() %}\n {{ return('%s') }}\n{% endmacro %}", @@ -1582,17 +2036,19 @@ "path": "macros/materializations/seeds/helpers.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.default__get_binding_char" }, "macro.dbt.default__get_catalog": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.51899, "depends_on": { "macros": [] }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro default__get_catalog(information_schema, schemas) -%}\n\n {% set typename = adapter.type() %}\n {% set msg -%}\n get_catalog not implemented for {{ typename }}\n {%- endset %}\n\n {{ exceptions.raise_compiler_error(msg) }}\n{% endmacro %}", @@ -1604,12 +2060,13 @@ "path": "macros/adapters/metadata.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.default__get_catalog" }, "macro.dbt.default__get_columns_in_query": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.527812, "depends_on": { "macros": [ "macro.dbt.statement" @@ -1617,6 +2074,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro default__get_columns_in_query(select_sql) %}\n {% call statement('get_columns_in_query', fetch_result=True, auto_begin=False) -%}\n select * from (\n {{ select_sql }}\n ) as __dbt_sbq\n where false\n limit 0\n {% endcall %}\n\n {{ return(load_result('get_columns_in_query').table.columns | map(attribute='name') | list) }}\n{% endmacro %}", @@ -1628,17 +2086,19 @@ "path": "macros/adapters/columns.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.default__get_columns_in_query" }, "macro.dbt.default__get_columns_in_relation": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.526114, "depends_on": { "macros": [] }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro default__get_columns_in_relation(relation) -%}\n {{ exceptions.raise_not_implemented(\n 'get_columns_in_relation macro not implemented for adapter '+adapter.type()) }}\n{% endmacro %}", @@ -1650,17 +2110,19 @@ "path": "macros/adapters/columns.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.default__get_columns_in_relation" }, "macro.dbt.default__get_create_index_sql": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.478167, "depends_on": { "macros": [] }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro default__get_create_index_sql(relation, index_dict) -%}\n {% do return(None) %}\n{% endmacro %}", @@ -1672,12 +2134,13 @@ "path": "macros/adapters/indexes.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.default__get_create_index_sql" }, "macro.dbt.default__get_create_table_as_sql": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.34937, "depends_on": { "macros": [ "macro.dbt.create_table_as" @@ -1685,6 +2148,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro default__get_create_table_as_sql(temporary, relation, sql) -%}\n {{ return(create_table_as(temporary, relation, sql)) }}\n{% endmacro %}", @@ -1696,12 +2160,13 @@ "path": "macros/materializations/models/table/create_table_as.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.default__get_create_table_as_sql" }, "macro.dbt.default__get_create_view_as_sql": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.3610141, "depends_on": { "macros": [ "macro.dbt.create_view_as" @@ -1709,6 +2174,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro default__get_create_view_as_sql(relation, sql) -%}\n {{ return(create_view_as(relation, sql)) }}\n{% endmacro %}", @@ -1720,17 +2186,19 @@ "path": "macros/materializations/models/view/create_view_as.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.default__get_create_view_as_sql" }, "macro.dbt.default__get_csv_sql": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.379252, "depends_on": { "macros": [] }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro default__get_csv_sql(create_or_truncate_sql, insert_sql) %}\n {{ create_or_truncate_sql }};\n -- dbt seed --\n {{ insert_sql }}\n{% endmacro %}", @@ -1742,12 +2210,13 @@ "path": "macros/materializations/seeds/helpers.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.default__get_csv_sql" }, "macro.dbt.default__get_dcl_statement_list": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.5059571, "depends_on": { "macros": [ "macro.dbt.support_multiple_grantees_per_dcl_statement" @@ -1755,6 +2224,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "\n\n{%- macro default__get_dcl_statement_list(relation, grant_config, get_dcl_macro) -%}\n {#\n -- Unpack grant_config into specific privileges and the set of users who need them granted/revoked.\n -- Depending on whether this database supports multiple grantees per statement, pass in the list of\n -- all grantees per privilege, or (if not) template one statement per privilege-grantee pair.\n -- `get_dcl_macro` will be either `get_grant_sql` or `get_revoke_sql`\n #}\n {%- set dcl_statements = [] -%}\n {%- for privilege, grantees in grant_config.items() %}\n {%- if support_multiple_grantees_per_dcl_statement() and grantees -%}\n {%- set dcl = get_dcl_macro(relation, privilege, grantees) -%}\n {%- do dcl_statements.append(dcl) -%}\n {%- else -%}\n {%- for grantee in grantees -%}\n {% set dcl = get_dcl_macro(relation, privilege, [grantee]) %}\n {%- do dcl_statements.append(dcl) -%}\n {% endfor -%}\n {%- endif -%}\n {%- endfor -%}\n {{ return(dcl_statements) }}\n{%- endmacro %}", @@ -1766,12 +2236,13 @@ "path": "macros/adapters/apply_grants.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.default__get_dcl_statement_list" }, "macro.dbt.default__get_delete_insert_merge_sql": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.3104372, "depends_on": { "macros": [ "macro.dbt.get_quoted_csv" @@ -1779,9 +2250,10 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, - "macro_sql": "{% macro default__get_delete_insert_merge_sql(target, source, unique_key, dest_columns) -%}\n\n {%- set dest_cols_csv = get_quoted_csv(dest_columns | map(attribute=\"name\")) -%}\n\n {% if unique_key %}\n {% if unique_key is sequence and unique_key is not string %}\n delete from {{target }}\n using {{ source }}\n where (\n {% for key in unique_key %}\n {{ source }}.{{ key }} = {{ target }}.{{ key }}\n {{ \"and \" if not loop.last }}\n {% endfor %}\n );\n {% else %}\n delete from {{ target }}\n where (\n {{ unique_key }}) in (\n select ({{ unique_key }})\n from {{ source }}\n );\n\n {% endif %}\n {% endif %}\n\n insert into {{ target }} ({{ dest_cols_csv }})\n (\n select {{ dest_cols_csv }}\n from {{ source }}\n )\n\n{%- endmacro %}", + "macro_sql": "{% macro default__get_delete_insert_merge_sql(target, source, unique_key, dest_columns) -%}\n\n {%- set dest_cols_csv = get_quoted_csv(dest_columns | map(attribute=\"name\")) -%}\n\n {% if unique_key %}\n {% if unique_key is sequence and unique_key is not string %}\n delete from {{target }}\n using {{ source }}\n where (\n {% for key in unique_key %}\n {{ source }}.{{ key }} = {{ target }}.{{ key }}\n {{ \"and \" if not loop.last }}\n {% endfor %}\n );\n {% else %}\n delete from {{ target }}\n where (\n {{ unique_key }}) in (\n select ({{ unique_key }})\n from {{ source }}\n );\n\n {% endif %}\n {% endif %}\n\n insert into {{ target }} ({{ dest_cols_csv }})\n (\n select {{ dest_cols_csv }}\n from {{ source }}\n )\n\n{%- endmacro %}", "meta": {}, "name": "default__get_delete_insert_merge_sql", "original_file_path": "macros/materializations/models/incremental/merge.sql", @@ -1790,17 +2262,19 @@ "path": "macros/materializations/models/incremental/merge.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.default__get_delete_insert_merge_sql" }, "macro.dbt.default__get_grant_sql": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.503052, "depends_on": { "macros": [] }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "\n\n{%- macro default__get_grant_sql(relation, privilege, grantees) -%}\n grant {{ privilege }} on {{ relation }} to {{ grantees | join(', ') }}\n{%- endmacro -%}\n\n\n", @@ -1812,12 +2286,143 @@ "path": "macros/adapters/apply_grants.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.default__get_grant_sql" }, + "macro.dbt.default__get_incremental_append_sql": { + "arguments": [], + "created_at": 1670298551.315692, + "depends_on": { + "macros": [ + "macro.dbt.get_insert_into_sql" + ] + }, + "description": "", + "docs": { + "node_color": null, + "show": true + }, + "macro_sql": "{% macro default__get_incremental_append_sql(arg_dict) %}\n\n {% do return(get_insert_into_sql(arg_dict[\"target_relation\"], arg_dict[\"temp_relation\"], arg_dict[\"dest_columns\"])) %}\n\n{% endmacro %}", + "meta": {}, + "name": "default__get_incremental_append_sql", + "original_file_path": "macros/materializations/models/incremental/strategies.sql", + "package_name": "dbt", + "patch_path": null, + "path": "macros/materializations/models/incremental/strategies.sql", + "resource_type": "macro", + "root_path": "/some-path/sample-dbt", + "supported_languages": null, + "tags": [], + "unique_id": "macro.dbt.default__get_incremental_append_sql" + }, + "macro.dbt.default__get_incremental_default_sql": { + "arguments": [], + "created_at": 1670298551.319065, + "depends_on": { + "macros": [ + "macro.dbt.get_incremental_append_sql" + ] + }, + "description": "", + "docs": { + "node_color": null, + "show": true + }, + "macro_sql": "{% macro default__get_incremental_default_sql(arg_dict) %}\n\n {% do return(get_incremental_append_sql(arg_dict)) %}\n\n{% endmacro %}", + "meta": {}, + "name": "default__get_incremental_default_sql", + "original_file_path": "macros/materializations/models/incremental/strategies.sql", + "package_name": "dbt", + "patch_path": null, + "path": "macros/materializations/models/incremental/strategies.sql", + "resource_type": "macro", + "root_path": "/some-path/sample-dbt", + "supported_languages": null, + "tags": [], + "unique_id": "macro.dbt.default__get_incremental_default_sql" + }, + "macro.dbt.default__get_incremental_delete_insert_sql": { + "arguments": [], + "created_at": 1670298551.316586, + "depends_on": { + "macros": [ + "macro.dbt.get_delete_insert_merge_sql" + ] + }, + "description": "", + "docs": { + "node_color": null, + "show": true + }, + "macro_sql": "{% macro default__get_incremental_delete_insert_sql(arg_dict) %}\n\n {% do return(get_delete_insert_merge_sql(arg_dict[\"target_relation\"], arg_dict[\"temp_relation\"], arg_dict[\"unique_key\"], arg_dict[\"dest_columns\"])) %}\n\n{% endmacro %}", + "meta": {}, + "name": "default__get_incremental_delete_insert_sql", + "original_file_path": "macros/materializations/models/incremental/strategies.sql", + "package_name": "dbt", + "patch_path": null, + "path": "macros/materializations/models/incremental/strategies.sql", + "resource_type": "macro", + "root_path": "/some-path/sample-dbt", + "supported_languages": null, + "tags": [], + "unique_id": "macro.dbt.default__get_incremental_delete_insert_sql" + }, + "macro.dbt.default__get_incremental_insert_overwrite_sql": { + "arguments": [], + "created_at": 1670298551.3183832, + "depends_on": { + "macros": [ + "macro.dbt.get_insert_overwrite_merge_sql" + ] + }, + "description": "", + "docs": { + "node_color": null, + "show": true + }, + "macro_sql": "{% macro default__get_incremental_insert_overwrite_sql(arg_dict) %}\n\n {% do return(get_insert_overwrite_merge_sql(arg_dict[\"target_relation\"], arg_dict[\"temp_relation\"], arg_dict[\"dest_columns\"], arg_dict[\"predicates\"])) %}\n\n{% endmacro %}", + "meta": {}, + "name": "default__get_incremental_insert_overwrite_sql", + "original_file_path": "macros/materializations/models/incremental/strategies.sql", + "package_name": "dbt", + "patch_path": null, + "path": "macros/materializations/models/incremental/strategies.sql", + "resource_type": "macro", + "root_path": "/some-path/sample-dbt", + "supported_languages": null, + "tags": [], + "unique_id": "macro.dbt.default__get_incremental_insert_overwrite_sql" + }, + "macro.dbt.default__get_incremental_merge_sql": { + "arguments": [], + "created_at": 1670298551.317482, + "depends_on": { + "macros": [ + "macro.dbt.get_merge_sql" + ] + }, + "description": "", + "docs": { + "node_color": null, + "show": true + }, + "macro_sql": "{% macro default__get_incremental_merge_sql(arg_dict) %}\n\n {% do return(get_merge_sql(arg_dict[\"target_relation\"], arg_dict[\"temp_relation\"], arg_dict[\"unique_key\"], arg_dict[\"dest_columns\"])) %}\n\n{% endmacro %}", + "meta": {}, + "name": "default__get_incremental_merge_sql", + "original_file_path": "macros/materializations/models/incremental/strategies.sql", + "package_name": "dbt", + "patch_path": null, + "path": "macros/materializations/models/incremental/strategies.sql", + "resource_type": "macro", + "root_path": "/some-path/sample-dbt", + "supported_languages": null, + "tags": [], + "unique_id": "macro.dbt.default__get_incremental_merge_sql" + }, "macro.dbt.default__get_insert_overwrite_merge_sql": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.312233, "depends_on": { "macros": [ "macro.dbt.get_quoted_csv" @@ -1825,9 +2430,10 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, - "macro_sql": "{% macro default__get_insert_overwrite_merge_sql(target, source, dest_columns, predicates, include_sql_header) -%}\n {%- set predicates = [] if predicates is none else [] + predicates -%}\n {%- set dest_cols_csv = get_quoted_csv(dest_columns | map(attribute=\"name\")) -%}\n {%- set sql_header = config.get('sql_header', none) -%}\n\n {{ sql_header if sql_header is not none and include_sql_header }}\n\n merge into {{ target }} as DBT_INTERNAL_DEST\n using {{ source }} as DBT_INTERNAL_SOURCE\n on FALSE\n\n when not matched by source\n {% if predicates %} and {{ predicates | join(' and ') }} {% endif %}\n then delete\n\n when not matched then insert\n ({{ dest_cols_csv }})\n values\n ({{ dest_cols_csv }})\n\n{% endmacro %}", + "macro_sql": "{% macro default__get_insert_overwrite_merge_sql(target, source, dest_columns, predicates, include_sql_header) -%}\n {#-- The only time include_sql_header is True: --#}\n {#-- BigQuery + insert_overwrite strategy + \"static\" partitions config --#}\n {#-- We should consider including the sql header at the materialization level instead --#}\n\n {%- set predicates = [] if predicates is none else [] + predicates -%}\n {%- set dest_cols_csv = get_quoted_csv(dest_columns | map(attribute=\"name\")) -%}\n {%- set sql_header = config.get('sql_header', none) -%}\n\n {{ sql_header if sql_header is not none and include_sql_header }}\n\n merge into {{ target }} as DBT_INTERNAL_DEST\n using {{ source }} as DBT_INTERNAL_SOURCE\n on FALSE\n\n when not matched by source\n {% if predicates %} and {{ predicates | join(' and ') }} {% endif %}\n then delete\n\n when not matched then insert\n ({{ dest_cols_csv }})\n values\n ({{ dest_cols_csv }})\n\n{% endmacro %}", "meta": {}, "name": "default__get_insert_overwrite_merge_sql", "original_file_path": "macros/materializations/models/incremental/merge.sql", @@ -1836,22 +2442,25 @@ "path": "macros/materializations/models/incremental/merge.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.default__get_insert_overwrite_merge_sql" }, "macro.dbt.default__get_merge_sql": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.308408, "depends_on": { "macros": [ - "macro.dbt.get_quoted_csv" + "macro.dbt.get_quoted_csv", + "macro.dbt.get_merge_update_columns" ] }, "description": "", "docs": { + "node_color": null, "show": true }, - "macro_sql": "{% macro default__get_merge_sql(target, source, unique_key, dest_columns, predicates) -%}\n {%- set predicates = [] if predicates is none else [] + predicates -%}\n {%- set dest_cols_csv = get_quoted_csv(dest_columns | map(attribute=\"name\")) -%}\n {%- set update_columns = config.get('merge_update_columns', default = dest_columns | map(attribute=\"quoted\") | list) -%}\n {%- set sql_header = config.get('sql_header', none) -%}\n\n {% if unique_key %}\n {% if unique_key is sequence and unique_key is not mapping and unique_key is not string %}\n {% for key in unique_key %}\n {% set this_key_match %}\n DBT_INTERNAL_SOURCE.{{ key }} = DBT_INTERNAL_DEST.{{ key }}\n {% endset %}\n {% do predicates.append(this_key_match) %}\n {% endfor %}\n {% else %}\n {% set unique_key_match %}\n DBT_INTERNAL_SOURCE.{{ unique_key }} = DBT_INTERNAL_DEST.{{ unique_key }}\n {% endset %}\n {% do predicates.append(unique_key_match) %}\n {% endif %}\n {% else %}\n {% do predicates.append('FALSE') %}\n {% endif %}\n\n {{ sql_header if sql_header is not none }}\n\n merge into {{ target }} as DBT_INTERNAL_DEST\n using {{ source }} as DBT_INTERNAL_SOURCE\n on {{ predicates | join(' and ') }}\n\n {% if unique_key %}\n when matched then update set\n {% for column_name in update_columns -%}\n {{ column_name }} = DBT_INTERNAL_SOURCE.{{ column_name }}\n {%- if not loop.last %}, {%- endif %}\n {%- endfor %}\n {% endif %}\n\n when not matched then insert\n ({{ dest_cols_csv }})\n values\n ({{ dest_cols_csv }})\n\n{% endmacro %}", + "macro_sql": "{% macro default__get_merge_sql(target, source, unique_key, dest_columns, predicates) -%}\n {%- set predicates = [] if predicates is none else [] + predicates -%}\n {%- set dest_cols_csv = get_quoted_csv(dest_columns | map(attribute=\"name\")) -%}\n {%- set merge_update_columns = config.get('merge_update_columns') -%}\n {%- set merge_exclude_columns = config.get('merge_exclude_columns') -%}\n {%- set update_columns = get_merge_update_columns(merge_update_columns, merge_exclude_columns, dest_columns) -%}\n {%- set sql_header = config.get('sql_header', none) -%}\n\n {% if unique_key %}\n {% if unique_key is sequence and unique_key is not mapping and unique_key is not string %}\n {% for key in unique_key %}\n {% set this_key_match %}\n DBT_INTERNAL_SOURCE.{{ key }} = DBT_INTERNAL_DEST.{{ key }}\n {% endset %}\n {% do predicates.append(this_key_match) %}\n {% endfor %}\n {% else %}\n {% set unique_key_match %}\n DBT_INTERNAL_SOURCE.{{ unique_key }} = DBT_INTERNAL_DEST.{{ unique_key }}\n {% endset %}\n {% do predicates.append(unique_key_match) %}\n {% endif %}\n {% else %}\n {% do predicates.append('FALSE') %}\n {% endif %}\n\n {{ sql_header if sql_header is not none }}\n\n merge into {{ target }} as DBT_INTERNAL_DEST\n using {{ source }} as DBT_INTERNAL_SOURCE\n on {{ predicates | join(' and ') }}\n\n {% if unique_key %}\n when matched then update set\n {% for column_name in update_columns -%}\n {{ column_name }} = DBT_INTERNAL_SOURCE.{{ column_name }}\n {%- if not loop.last %}, {%- endif %}\n {%- endfor %}\n {% endif %}\n\n when not matched then insert\n ({{ dest_cols_csv }})\n values\n ({{ dest_cols_csv }})\n\n{% endmacro %}", "meta": {}, "name": "default__get_merge_sql", "original_file_path": "macros/materializations/models/incremental/merge.sql", @@ -1860,17 +2469,43 @@ "path": "macros/materializations/models/incremental/merge.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.default__get_merge_sql" }, + "macro.dbt.default__get_merge_update_columns": { + "arguments": [], + "created_at": 1670298551.298201, + "depends_on": { + "macros": [] + }, + "description": "", + "docs": { + "node_color": null, + "show": true + }, + "macro_sql": "{% macro default__get_merge_update_columns(merge_update_columns, merge_exclude_columns, dest_columns) %}\n {%- set default_cols = dest_columns | map(attribute=\"quoted\") | list -%}\n\n {%- if merge_update_columns and merge_exclude_columns -%}\n {{ exceptions.raise_compiler_error(\n 'Model cannot specify merge_update_columns and merge_exclude_columns. Please update model to use only one config'\n )}}\n {%- elif merge_update_columns -%}\n {%- set update_columns = merge_update_columns -%}\n {%- elif merge_exclude_columns -%}\n {%- set update_columns = [] -%}\n {%- for column in dest_columns -%}\n {% if column.column | lower not in merge_exclude_columns | map(\"lower\") | list %}\n {%- do update_columns.append(column.quoted) -%}\n {% endif %}\n {%- endfor -%}\n {%- else -%}\n {%- set update_columns = default_cols -%}\n {%- endif -%}\n\n {{ return(update_columns) }}\n\n{% endmacro %}", + "meta": {}, + "name": "default__get_merge_update_columns", + "original_file_path": "macros/materializations/models/incremental/column_helpers.sql", + "package_name": "dbt", + "patch_path": null, + "path": "macros/materializations/models/incremental/column_helpers.sql", + "resource_type": "macro", + "root_path": "/some-path/sample-dbt", + "supported_languages": null, + "tags": [], + "unique_id": "macro.dbt.default__get_merge_update_columns" + }, "macro.dbt.default__get_or_create_relation": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.49324, "depends_on": { "macros": [] }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro default__get_or_create_relation(database, schema, identifier, type) %}\n {%- set target_relation = adapter.get_relation(database=database, schema=schema, identifier=identifier) %}\n\n {% if target_relation %}\n {% do return([true, target_relation]) %}\n {% endif %}\n\n {%- set new_relation = api.Relation.create(\n database=database,\n schema=schema,\n identifier=identifier,\n type=type\n ) -%}\n {% do return([false, new_relation]) %}\n{% endmacro %}", @@ -1882,17 +2517,19 @@ "path": "macros/adapters/relation.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.default__get_or_create_relation" }, "macro.dbt.default__get_revoke_sql": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.5039842, "depends_on": { "macros": [] }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "\n\n{%- macro default__get_revoke_sql(relation, privilege, grantees) -%}\n revoke {{ privilege }} on {{ relation }} from {{ grantees | join(', ') }}\n{%- endmacro -%}\n\n\n", @@ -1904,17 +2541,19 @@ "path": "macros/adapters/apply_grants.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.default__get_revoke_sql" }, "macro.dbt.default__get_show_grant_sql": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.502111, "depends_on": { "macros": [] }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro default__get_show_grant_sql(relation) %}\n show grants on {{ relation }}\n{% endmacro %}", @@ -1926,17 +2565,19 @@ "path": "macros/adapters/apply_grants.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.default__get_show_grant_sql" }, "macro.dbt.default__get_test_sql": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.289614, "depends_on": { "macros": [] }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro default__get_test_sql(main_sql, fail_calc, warn_if, error_if, limit) -%}\n select\n {{ fail_calc }} as failures,\n {{ fail_calc }} {{ warn_if }} as should_warn,\n {{ fail_calc }} {{ error_if }} as should_error\n from (\n {{ main_sql }}\n {{ \"limit \" ~ limit if limit != none }}\n ) dbt_internal_test\n{%- endmacro %}", @@ -1948,17 +2589,19 @@ "path": "macros/materializations/tests/helpers.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.default__get_test_sql" }, "macro.dbt.default__get_true_sql": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.269789, "depends_on": { "macros": [] }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro default__get_true_sql() %}\n {{ return('TRUE') }}\n{% endmacro %}", @@ -1970,17 +2613,19 @@ "path": "macros/materializations/snapshots/helpers.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.default__get_true_sql" }, "macro.dbt.default__get_where_subquery": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.291193, "depends_on": { "macros": [] }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro default__get_where_subquery(relation) -%}\n {% set where = config.get('where', '') %}\n {% if where %}\n {%- set filtered -%}\n (select * from {{ relation }} where {{ where }}) dbt_subquery\n {%- endset -%}\n {% do return(filtered) %}\n {%- else -%}\n {% do return(relation) %}\n {%- endif -%}\n{%- endmacro %}", @@ -1992,17 +2637,19 @@ "path": "macros/materializations/tests/where_subquery.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.default__get_where_subquery" }, "macro.dbt.default__handle_existing_table": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.357036, "depends_on": { "macros": [] }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro default__handle_existing_table(full_refresh, old_relation) %}\n {{ log(\"Dropping relation \" ~ old_relation ~ \" because it is of type \" ~ old_relation.type) }}\n {{ adapter.drop_relation(old_relation) }}\n{% endmacro %}", @@ -2014,17 +2661,19 @@ "path": "macros/materializations/models/view/helpers.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.default__handle_existing_table" }, "macro.dbt.default__hash": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.4462452, "depends_on": { "macros": [] }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro default__hash(field) -%}\n md5(cast({{ field }} as {{ api.Column.translate_type('string') }}))\n{%- endmacro %}", @@ -2036,17 +2685,19 @@ "path": "macros/utils/hash.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.default__hash" }, "macro.dbt.default__information_schema_name": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.519673, "depends_on": { "macros": [] }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro default__information_schema_name(database) -%}\n {%- if database -%}\n {{ database }}.INFORMATION_SCHEMA\n {%- else -%}\n INFORMATION_SCHEMA\n {%- endif -%}\n{%- endmacro %}", @@ -2058,17 +2709,19 @@ "path": "macros/adapters/metadata.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.default__information_schema_name" }, "macro.dbt.default__intersect": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.410484, "depends_on": { "macros": [] }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro default__intersect() %}\n\n intersect\n\n{% endmacro %}", @@ -2080,12 +2733,13 @@ "path": "macros/utils/intersect.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.default__intersect" }, "macro.dbt.default__last_day": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.464326, "depends_on": { "macros": [ "macro.dbt.default_last_day" @@ -2093,6 +2747,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro default__last_day(date, datepart) -%}\n {{dbt.default_last_day(date, datepart)}}\n{%- endmacro %}", @@ -2104,17 +2759,19 @@ "path": "macros/utils/last_day.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.default__last_day" }, "macro.dbt.default__length": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.408619, "depends_on": { "macros": [] }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro default__length(expression) %}\n\n length(\n {{ expression }}\n )\n\n{%- endmacro -%}", @@ -2126,17 +2783,19 @@ "path": "macros/utils/length.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.default__length" }, "macro.dbt.default__list_relations_without_caching": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.522859, "depends_on": { "macros": [] }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro default__list_relations_without_caching(schema_relation) %}\n {{ exceptions.raise_not_implemented(\n 'list_relations_without_caching macro not implemented for adapter '+adapter.type()) }}\n{% endmacro %}", @@ -2148,12 +2807,13 @@ "path": "macros/adapters/metadata.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.default__list_relations_without_caching" }, "macro.dbt.default__list_schemas": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.5207932, "depends_on": { "macros": [ "macro.dbt.information_schema_name", @@ -2162,6 +2822,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro default__list_schemas(database) -%}\n {% set sql %}\n select distinct schema_name\n from {{ information_schema_name(database) }}.SCHEMATA\n where catalog_name ilike '{{ database }}'\n {% endset %}\n {{ return(run_query(sql)) }}\n{% endmacro %}", @@ -2173,17 +2834,19 @@ "path": "macros/adapters/metadata.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.default__list_schemas" }, "macro.dbt.default__listagg": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.4417012, "depends_on": { "macros": [] }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro default__listagg(measure, delimiter_text, order_by_clause, limit_num) -%}\n\n {% if limit_num -%}\n array_to_string(\n array_slice(\n array_agg(\n {{ measure }}\n ){% if order_by_clause -%}\n within group ({{ order_by_clause }})\n {%- endif %}\n ,0\n ,{{ limit_num }}\n ),\n {{ delimiter_text }}\n )\n {%- else %}\n listagg(\n {{ measure }},\n {{ delimiter_text }}\n )\n {% if order_by_clause -%}\n within group ({{ order_by_clause }})\n {%- endif %}\n {%- endif %}\n\n{%- endmacro %}", @@ -2195,12 +2858,13 @@ "path": "macros/utils/listagg.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.default__listagg" }, "macro.dbt.default__load_csv_rows": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.384554, "depends_on": { "macros": [ "macro.dbt.get_batch_size", @@ -2210,6 +2874,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro default__load_csv_rows(model, agate_table) %}\n\n {% set batch_size = get_batch_size() %}\n\n {% set cols_sql = get_seed_column_quoted_csv(model, agate_table.column_names) %}\n {% set bindings = [] %}\n\n {% set statements = [] %}\n\n {% for chunk in agate_table.rows | batch(batch_size) %}\n {% set bindings = [] %}\n\n {% for row in chunk %}\n {% do bindings.extend(row) %}\n {% endfor %}\n\n {% set sql %}\n insert into {{ this.render() }} ({{ cols_sql }}) values\n {% for row in chunk -%}\n ({%- for column in agate_table.column_names -%}\n {{ get_binding_char() }}\n {%- if not loop.last%},{%- endif %}\n {%- endfor -%})\n {%- if not loop.last%},{%- endif %}\n {%- endfor %}\n {% endset %}\n\n {% do adapter.add_query(sql, bindings=bindings, abridge_sql_log=True) %}\n\n {% if loop.index0 == 0 %}\n {% do statements.append(sql) %}\n {% endif %}\n {% endfor %}\n\n {# Return SQL so we can render it out into the compiled files #}\n {{ return(statements[0]) }}\n{% endmacro %}", @@ -2221,17 +2886,19 @@ "path": "macros/materializations/seeds/helpers.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.default__load_csv_rows" }, "macro.dbt.default__make_backup_relation": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.4880042, "depends_on": { "macros": [] }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro default__make_backup_relation(base_relation, backup_relation_type, suffix) %}\n {%- set backup_identifier = base_relation.identifier ~ suffix -%}\n {%- set backup_relation = base_relation.incorporate(\n path={\"identifier\": backup_identifier},\n type=backup_relation_type\n ) -%}\n {{ return(backup_relation) }}\n{% endmacro %}", @@ -2243,12 +2910,13 @@ "path": "macros/adapters/relation.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.default__make_backup_relation" }, "macro.dbt.default__make_intermediate_relation": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.485622, "depends_on": { "macros": [ "macro.dbt.default__make_temp_relation" @@ -2256,6 +2924,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro default__make_intermediate_relation(base_relation, suffix) %}\n {{ return(default__make_temp_relation(base_relation, suffix)) }}\n{% endmacro %}", @@ -2267,17 +2936,19 @@ "path": "macros/adapters/relation.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.default__make_intermediate_relation" }, "macro.dbt.default__make_temp_relation": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.486759, "depends_on": { "macros": [] }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro default__make_temp_relation(base_relation, suffix) %}\n {%- set temp_identifier = base_relation.identifier ~ suffix -%}\n {%- set temp_relation = base_relation.incorporate(\n path={\"identifier\": temp_identifier}) -%}\n\n {{ return(temp_relation) }}\n{% endmacro %}", @@ -2289,12 +2960,13 @@ "path": "macros/adapters/relation.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.default__make_temp_relation" }, "macro.dbt.default__persist_docs": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.515136, "depends_on": { "macros": [ "macro.dbt.run_query", @@ -2304,6 +2976,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro default__persist_docs(relation, model, for_relation, for_columns) -%}\n {% if for_relation and config.persist_relation_docs() and model.description %}\n {% do run_query(alter_relation_comment(relation, model.description)) %}\n {% endif %}\n\n {% if for_columns and config.persist_column_docs() and model.columns %}\n {% do run_query(alter_column_comment(relation, model.columns)) %}\n {% endif %}\n{% endmacro %}", @@ -2315,17 +2988,19 @@ "path": "macros/adapters/persist_docs.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.default__persist_docs" }, "macro.dbt.default__position": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.450131, "depends_on": { "macros": [] }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro default__position(substring_text, string_text) %}\n\n position(\n {{ substring_text }} in {{ string_text }}\n )\n\n{%- endmacro -%}", @@ -2337,17 +3012,19 @@ "path": "macros/utils/position.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.default__position" }, "macro.dbt.default__post_snapshot": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.269226, "depends_on": { "macros": [] }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro default__post_snapshot(staging_relation) %}\n {# no-op #}\n{% endmacro %}", @@ -2359,12 +3036,13 @@ "path": "macros/materializations/snapshots/helpers.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.default__post_snapshot" }, "macro.dbt.default__rename_relation": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.491068, "depends_on": { "macros": [ "macro.dbt.statement" @@ -2372,6 +3050,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro default__rename_relation(from_relation, to_relation) -%}\n {% set target_name = adapter.quote_as_configured(to_relation.identifier, 'identifier') %}\n {% call statement('rename_relation') -%}\n alter table {{ from_relation }} rename to {{ target_name }}\n {%- endcall %}\n{% endmacro %}", @@ -2383,17 +3062,19 @@ "path": "macros/adapters/relation.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.default__rename_relation" }, "macro.dbt.default__replace": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.4068532, "depends_on": { "macros": [] }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro default__replace(field, old_chars, new_chars) %}\n\n replace(\n {{ field }},\n {{ old_chars }},\n {{ new_chars }}\n )\n\n\n{% endmacro %}", @@ -2405,12 +3086,13 @@ "path": "macros/utils/replace.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.default__replace" }, "macro.dbt.default__reset_csv_table": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.3784, "depends_on": { "macros": [ "macro.dbt.create_csv_table" @@ -2418,6 +3100,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro default__reset_csv_table(model, full_refresh, old_relation, agate_table) %}\n {% set sql = \"\" %}\n {% if full_refresh %}\n {{ adapter.drop_relation(old_relation) }}\n {% set sql = create_csv_table(model, agate_table) %}\n {% else %}\n {{ adapter.truncate_relation(old_relation) }}\n {% set sql = \"truncate table \" ~ old_relation %}\n {% endif %}\n\n {{ return(sql) }}\n{% endmacro %}", @@ -2429,17 +3112,19 @@ "path": "macros/materializations/seeds/helpers.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.default__reset_csv_table" }, "macro.dbt.default__right": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.438331, "depends_on": { "macros": [] }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro default__right(string_text, length_expression) %}\n\n right(\n {{ string_text }},\n {{ length_expression }}\n )\n\n{%- endmacro -%}", @@ -2451,17 +3136,19 @@ "path": "macros/utils/right.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.default__right" }, "macro.dbt.default__safe_cast": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.4448671, "depends_on": { "macros": [] }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro default__safe_cast(field, type) %}\n {# most databases don't support this function yet\n so we just need to use cast #}\n cast({{field}} as {{type}})\n{% endmacro %}", @@ -2473,12 +3160,13 @@ "path": "macros/utils/safe_cast.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.default__safe_cast" }, "macro.dbt.default__snapshot_get_time": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.475214, "depends_on": { "macros": [ "macro.dbt.current_timestamp" @@ -2486,28 +3174,31 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, - "macro_sql": "{% macro default__snapshot_get_time() -%}\n {{ current_timestamp() }}\n{%- endmacro %}", + "macro_sql": "{% macro default__snapshot_get_time() %}\n {{ current_timestamp() }}\n{% endmacro %}", "meta": {}, "name": "default__snapshot_get_time", - "original_file_path": "macros/materializations/snapshots/strategies.sql", + "original_file_path": "macros/adapters/timestamps.sql", "package_name": "dbt", "patch_path": null, - "path": "macros/materializations/snapshots/strategies.sql", + "path": "macros/adapters/timestamps.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.default__snapshot_get_time" }, "macro.dbt.default__snapshot_hash_arguments": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.253029, "depends_on": { "macros": [] }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro default__snapshot_hash_arguments(args) -%}\n md5({%- for arg in args -%}\n coalesce(cast({{ arg }} as varchar ), '')\n {% if not loop.last %} || '|' || {% endif %}\n {%- endfor -%})\n{%- endmacro %}", @@ -2519,17 +3210,19 @@ "path": "macros/materializations/snapshots/strategies.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.default__snapshot_hash_arguments" }, "macro.dbt.default__snapshot_merge_sql": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.2463481, "depends_on": { "macros": [] }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro default__snapshot_merge_sql(target, source, insert_cols) -%}\n {%- set insert_cols_csv = insert_cols | join(', ') -%}\n\n merge into {{ target }} as DBT_INTERNAL_DEST\n using {{ source }} as DBT_INTERNAL_SOURCE\n on DBT_INTERNAL_SOURCE.dbt_scd_id = DBT_INTERNAL_DEST.dbt_scd_id\n\n when matched\n and DBT_INTERNAL_DEST.dbt_valid_to is null\n and DBT_INTERNAL_SOURCE.dbt_change_type in ('update', 'delete')\n then update\n set dbt_valid_to = DBT_INTERNAL_SOURCE.dbt_valid_to\n\n when not matched\n and DBT_INTERNAL_SOURCE.dbt_change_type = 'insert'\n then insert ({{ insert_cols_csv }})\n values ({{ insert_cols_csv }})\n\n{% endmacro %}", @@ -2541,12 +3234,13 @@ "path": "macros/materializations/snapshots/snapshot_merge.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.default__snapshot_merge_sql" }, "macro.dbt.default__snapshot_staging_table": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.2720032, "depends_on": { "macros": [ "macro.dbt.snapshot_get_time" @@ -2554,6 +3248,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro default__snapshot_staging_table(strategy, source_sql, target_relation) -%}\n\n with snapshot_query as (\n\n {{ source_sql }}\n\n ),\n\n snapshotted_data as (\n\n select *,\n {{ strategy.unique_key }} as dbt_unique_key\n\n from {{ target_relation }}\n where dbt_valid_to is null\n\n ),\n\n insertions_source_data as (\n\n select\n *,\n {{ strategy.unique_key }} as dbt_unique_key,\n {{ strategy.updated_at }} as dbt_updated_at,\n {{ strategy.updated_at }} as dbt_valid_from,\n nullif({{ strategy.updated_at }}, {{ strategy.updated_at }}) as dbt_valid_to,\n {{ strategy.scd_id }} as dbt_scd_id\n\n from snapshot_query\n ),\n\n updates_source_data as (\n\n select\n *,\n {{ strategy.unique_key }} as dbt_unique_key,\n {{ strategy.updated_at }} as dbt_updated_at,\n {{ strategy.updated_at }} as dbt_valid_from,\n {{ strategy.updated_at }} as dbt_valid_to\n\n from snapshot_query\n ),\n\n {%- if strategy.invalidate_hard_deletes %}\n\n deletes_source_data as (\n\n select\n *,\n {{ strategy.unique_key }} as dbt_unique_key\n from snapshot_query\n ),\n {% endif %}\n\n insertions as (\n\n select\n 'insert' as dbt_change_type,\n source_data.*\n\n from insertions_source_data as source_data\n left outer join snapshotted_data on snapshotted_data.dbt_unique_key = source_data.dbt_unique_key\n where snapshotted_data.dbt_unique_key is null\n or (\n snapshotted_data.dbt_unique_key is not null\n and (\n {{ strategy.row_changed }}\n )\n )\n\n ),\n\n updates as (\n\n select\n 'update' as dbt_change_type,\n source_data.*,\n snapshotted_data.dbt_scd_id\n\n from updates_source_data as source_data\n join snapshotted_data on snapshotted_data.dbt_unique_key = source_data.dbt_unique_key\n where (\n {{ strategy.row_changed }}\n )\n )\n\n {%- if strategy.invalidate_hard_deletes -%}\n ,\n\n deletes as (\n\n select\n 'delete' as dbt_change_type,\n source_data.*,\n {{ snapshot_get_time() }} as dbt_valid_from,\n {{ snapshot_get_time() }} as dbt_updated_at,\n {{ snapshot_get_time() }} as dbt_valid_to,\n snapshotted_data.dbt_scd_id\n\n from snapshotted_data\n left join deletes_source_data as source_data on snapshotted_data.dbt_unique_key = source_data.dbt_unique_key\n where source_data.dbt_unique_key is null\n )\n {%- endif %}\n\n select * from insertions\n union all\n select * from updates\n {%- if strategy.invalidate_hard_deletes %}\n union all\n select * from deletes\n {%- endif %}\n\n{%- endmacro %}", @@ -2565,17 +3260,19 @@ "path": "macros/materializations/snapshots/helpers.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.default__snapshot_staging_table" }, "macro.dbt.default__snapshot_string_as_time": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.255126, "depends_on": { "macros": [] }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro default__snapshot_string_as_time(timestamp) %}\n {% do exceptions.raise_not_implemented(\n 'snapshot_string_as_time macro not implemented for adapter '+adapter.type()\n ) %}\n{% endmacro %}", @@ -2587,17 +3284,19 @@ "path": "macros/materializations/snapshots/strategies.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.default__snapshot_string_as_time" }, "macro.dbt.default__split_part": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.466079, "depends_on": { "macros": [] }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro default__split_part(string_text, delimiter_text, part_number) %}\n\n split_part(\n {{ string_text }},\n {{ delimiter_text }},\n {{ part_number }}\n )\n\n{% endmacro %}", @@ -2609,17 +3308,19 @@ "path": "macros/utils/split_part.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.default__split_part" }, "macro.dbt.default__string_literal": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.451336, "depends_on": { "macros": [] }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro default__string_literal(value) -%}\n '{{ value }}'\n{%- endmacro %}", @@ -2631,17 +3332,19 @@ "path": "macros/utils/literal.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.default__string_literal" }, "macro.dbt.default__support_multiple_grantees_per_dcl_statement": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.500675, "depends_on": { "macros": [] }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "\n\n{%- macro default__support_multiple_grantees_per_dcl_statement() -%}\n {{ return(True) }}\n{%- endmacro -%}\n\n\n", @@ -2653,17 +3356,19 @@ "path": "macros/adapters/apply_grants.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.default__support_multiple_grantees_per_dcl_statement" }, "macro.dbt.default__test_accepted_values": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.3928099, "depends_on": { "macros": [] }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro default__test_accepted_values(model, column_name, values, quote=True) %}\n\nwith all_values as (\n\n select\n {{ column_name }} as value_field,\n count(*) as n_records\n\n from {{ model }}\n group by {{ column_name }}\n\n)\n\nselect *\nfrom all_values\nwhere value_field not in (\n {% for value in values -%}\n {% if quote -%}\n '{{ value }}'\n {%- else -%}\n {{ value }}\n {%- endif -%}\n {%- if not loop.last -%},{%- endif %}\n {%- endfor %}\n)\n\n{% endmacro %}", @@ -2675,12 +3380,13 @@ "path": "macros/generic_test_sql/accepted_values.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.default__test_accepted_values" }, "macro.dbt.default__test_not_null": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.390924, "depends_on": { "macros": [ "macro.dbt.should_store_failures" @@ -2688,6 +3394,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro default__test_not_null(model, column_name) %}\n\n{% set column_list = '*' if should_store_failures() else column_name %}\n\nselect {{ column_list }}\nfrom {{ model }}\nwhere {{ column_name }} is null\n\n{% endmacro %}", @@ -2699,17 +3406,19 @@ "path": "macros/generic_test_sql/not_null.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.default__test_not_null" }, "macro.dbt.default__test_relationships": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.390204, "depends_on": { "macros": [] }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro default__test_relationships(model, column_name, to, field) %}\n\nwith child as (\n select {{ column_name }} as from_field\n from {{ model }}\n where {{ column_name }} is not null\n),\n\nparent as (\n select {{ field }} as to_field\n from {{ to }}\n)\n\nselect\n from_field\n\nfrom child\nleft join parent\n on child.from_field = parent.to_field\n\nwhere parent.to_field is null\n\n{% endmacro %}", @@ -2721,17 +3430,19 @@ "path": "macros/generic_test_sql/relationships.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.default__test_relationships" }, "macro.dbt.default__test_unique": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.391626, "depends_on": { "macros": [] }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro default__test_unique(model, column_name) %}\n\nselect\n {{ column_name }} as unique_field,\n count(*) as n_records\n\nfrom {{ model }}\nwhere {{ column_name }} is not null\ngroup by {{ column_name }}\nhaving count(*) > 1\n\n{% endmacro %}", @@ -2743,12 +3454,13 @@ "path": "macros/generic_test_sql/unique.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.default__test_unique" }, "macro.dbt.default__truncate_relation": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.489698, "depends_on": { "macros": [ "macro.dbt.statement" @@ -2756,6 +3468,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro default__truncate_relation(relation) -%}\n {% call statement('truncate_relation') -%}\n truncate table {{ relation }}\n {%- endcall %}\n{% endmacro %}", @@ -2767,39 +3480,67 @@ "path": "macros/adapters/relation.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.default__truncate_relation" }, - "macro.dbt.default__type_bigint": { + "macro.dbt.default__type_bigint": { + "arguments": [], + "created_at": 1670298551.4577549, + "depends_on": { + "macros": [] + }, + "description": "", + "docs": { + "node_color": null, + "show": true + }, + "macro_sql": "{% macro default__type_bigint() %}\n {{ return(api.Column.translate_type(\"bigint\")) }}\n{% endmacro %}", + "meta": {}, + "name": "default__type_bigint", + "original_file_path": "macros/utils/data_types.sql", + "package_name": "dbt", + "patch_path": null, + "path": "macros/utils/data_types.sql", + "resource_type": "macro", + "root_path": "/some-path/sample-dbt", + "supported_languages": null, + "tags": [], + "unique_id": "macro.dbt.default__type_bigint" + }, + "macro.dbt.default__type_boolean": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.459562, "depends_on": { "macros": [] }, "description": "", "docs": { + "node_color": null, "show": true }, - "macro_sql": "{% macro default__type_bigint() %}\n {{ return(api.Column.translate_type(\"bigint\")) }}\n{% endmacro %}", + "macro_sql": "{%- macro default__type_boolean() -%}\n {{ return(api.Column.translate_type(\"boolean\")) }}\n{%- endmacro -%}\n\n", "meta": {}, - "name": "default__type_bigint", + "name": "default__type_boolean", "original_file_path": "macros/utils/data_types.sql", "package_name": "dbt", "patch_path": null, "path": "macros/utils/data_types.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], - "unique_id": "macro.dbt.default__type_bigint" + "unique_id": "macro.dbt.default__type_boolean" }, "macro.dbt.default__type_float": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.455922, "depends_on": { "macros": [] }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro default__type_float() %}\n {{ return(api.Column.translate_type(\"float\")) }}\n{% endmacro %}", @@ -2811,17 +3552,19 @@ "path": "macros/utils/data_types.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.default__type_float" }, "macro.dbt.default__type_int": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.458689, "depends_on": { "macros": [] }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{%- macro default__type_int() -%}\n {{ return(api.Column.translate_type(\"integer\")) }}\n{%- endmacro -%}\n\n", @@ -2833,17 +3576,19 @@ "path": "macros/utils/data_types.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.default__type_int" }, "macro.dbt.default__type_numeric": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.4568682, "depends_on": { "macros": [] }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro default__type_numeric() %}\n {{ return(api.Column.numeric_type(\"numeric\", 28, 6)) }}\n{% endmacro %}", @@ -2855,17 +3600,19 @@ "path": "macros/utils/data_types.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.default__type_numeric" }, "macro.dbt.default__type_string": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.454168, "depends_on": { "macros": [] }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro default__type_string() %}\n {{ return(api.Column.translate_type(\"string\")) }}\n{% endmacro %}", @@ -2877,17 +3624,19 @@ "path": "macros/utils/data_types.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.default__type_string" }, "macro.dbt.default__type_timestamp": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.4550478, "depends_on": { "macros": [] }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro default__type_timestamp() %}\n {{ return(api.Column.translate_type(\"timestamp\")) }}\n{% endmacro %}", @@ -2899,12 +3648,13 @@ "path": "macros/utils/data_types.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.default__type_timestamp" }, "macro.dbt.default_last_day": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.463785, "depends_on": { "macros": [ "macro.dbt.dateadd", @@ -2913,6 +3663,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "\n\n{%- macro default_last_day(date, datepart) -%}\n cast(\n {{dbt.dateadd('day', '-1',\n dbt.dateadd(datepart, '1', dbt.date_trunc(datepart, date))\n )}}\n as date)\n{%- endmacro -%}\n\n", @@ -2924,20 +3675,22 @@ "path": "macros/utils/last_day.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.default_last_day" }, "macro.dbt.diff_column_data_types": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.2963068, "depends_on": { "macros": [] }, "description": "", "docs": { + "node_color": null, "show": true }, - "macro_sql": "{% macro diff_column_data_types(source_columns, target_columns) %}\n\n {% set result = [] %}\n {% for sc in source_columns %}\n {% set tc = target_columns | selectattr(\"name\", \"equalto\", sc.name) | list | first %}\n {% if tc %}\n {% if sc.data_type != tc.data_type %}\n {{ result.append( { 'column_name': tc.name, 'new_type': sc.data_type } ) }}\n {% endif %}\n {% endif %}\n {% endfor %}\n\n {{ return(result) }}\n\n{% endmacro %}", + "macro_sql": "{% macro diff_column_data_types(source_columns, target_columns) %}\n\n {% set result = [] %}\n {% for sc in source_columns %}\n {% set tc = target_columns | selectattr(\"name\", \"equalto\", sc.name) | list | first %}\n {% if tc %}\n {% if sc.data_type != tc.data_type and not sc.can_expand_to(other_column=tc) %}\n {{ result.append( { 'column_name': tc.name, 'new_type': sc.data_type } ) }}\n {% endif %}\n {% endif %}\n {% endfor %}\n\n {{ return(result) }}\n\n{% endmacro %}", "meta": {}, "name": "diff_column_data_types", "original_file_path": "macros/materializations/models/incremental/column_helpers.sql", @@ -2946,17 +3699,19 @@ "path": "macros/materializations/models/incremental/column_helpers.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.diff_column_data_types" }, "macro.dbt.diff_columns": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.294998, "depends_on": { "macros": [] }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro diff_columns(source_columns, target_columns) %}\n\n {% set result = [] %}\n {% set source_names = source_columns | map(attribute = 'column') | list %}\n {% set target_names = target_columns | map(attribute = 'column') | list %}\n\n {# --check whether the name attribute exists in the target - this does not perform a data type check #}\n {% for sc in source_columns %}\n {% if sc.name not in target_names %}\n {{ result.append(sc) }}\n {% endif %}\n {% endfor %}\n\n {{ return(result) }}\n\n{% endmacro %}", @@ -2968,12 +3723,13 @@ "path": "macros/materializations/models/incremental/column_helpers.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.diff_columns" }, "macro.dbt.drop_relation": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.488418, "depends_on": { "macros": [ "macro.dbt.default__drop_relation" @@ -2981,6 +3737,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro drop_relation(relation) -%}\n {{ return(adapter.dispatch('drop_relation', 'dbt')(relation)) }}\n{% endmacro %}", @@ -2992,17 +3749,19 @@ "path": "macros/adapters/relation.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.drop_relation" }, "macro.dbt.drop_relation_if_exists": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.494573, "depends_on": { "macros": [] }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro drop_relation_if_exists(relation) %}\n {% if relation is not none %}\n {{ adapter.drop_relation(relation) }}\n {% endif %}\n{% endmacro %}", @@ -3014,12 +3773,13 @@ "path": "macros/adapters/relation.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.drop_relation_if_exists" }, "macro.dbt.drop_schema": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.4726038, "depends_on": { "macros": [ "macro.dbt_postgres.postgres__drop_schema" @@ -3027,6 +3787,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro drop_schema(relation) -%}\n {{ adapter.dispatch('drop_schema', 'dbt')(relation) }}\n{% endmacro %}", @@ -3038,12 +3799,13 @@ "path": "macros/adapters/schema.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.drop_schema" }, "macro.dbt.escape_single_quotes": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.4111412, "depends_on": { "macros": [ "macro.dbt.default__escape_single_quotes" @@ -3051,6 +3813,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro escape_single_quotes(expression) %}\n {{ return(adapter.dispatch('escape_single_quotes', 'dbt') (expression)) }}\n{% endmacro %}", @@ -3062,12 +3825,13 @@ "path": "macros/utils/escape_single_quotes.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.escape_single_quotes" }, "macro.dbt.except": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.405597, "depends_on": { "macros": [ "macro.dbt.default__except" @@ -3075,6 +3839,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro except() %}\n {{ return(adapter.dispatch('except', 'dbt')()) }}\n{% endmacro %}", @@ -3086,12 +3851,13 @@ "path": "macros/utils/except.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.except" }, "macro.dbt.generate_alias_name": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.3854442, "depends_on": { "macros": [ "macro.dbt.default__generate_alias_name" @@ -3099,6 +3865,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro generate_alias_name(custom_alias_name=none, node=none) -%}\n {% do return(adapter.dispatch('generate_alias_name', 'dbt')(custom_alias_name, node)) %}\n{%- endmacro %}", @@ -3110,12 +3877,13 @@ "path": "macros/get_custom_name/get_custom_alias.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.generate_alias_name" }, "macro.dbt.generate_database_name": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.388889, "depends_on": { "macros": [ "macro.dbt.default__generate_database_name" @@ -3123,6 +3891,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro generate_database_name(custom_database_name=none, node=none) -%}\n {% do return(adapter.dispatch('generate_database_name', 'dbt')(custom_database_name, node)) %}\n{%- endmacro %}", @@ -3134,12 +3903,13 @@ "path": "macros/get_custom_name/get_custom_database.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.generate_database_name" }, "macro.dbt.generate_schema_name": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.386942, "depends_on": { "macros": [ "macro.dbt.default__generate_schema_name" @@ -3147,6 +3917,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro generate_schema_name(custom_schema_name=none, node=none) -%}\n {{ return(adapter.dispatch('generate_schema_name', 'dbt')(custom_schema_name, node)) }}\n{% endmacro %}", @@ -3158,17 +3929,19 @@ "path": "macros/get_custom_name/get_custom_schema.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.generate_schema_name" }, "macro.dbt.generate_schema_name_for_env": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.388025, "depends_on": { "macros": [] }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro generate_schema_name_for_env(custom_schema_name, node) -%}\n\n {%- set default_schema = target.schema -%}\n {%- if target.name == 'prod' and custom_schema_name is not none -%}\n\n {{ custom_schema_name | trim }}\n\n {%- else -%}\n\n {{ default_schema }}\n\n {%- endif -%}\n\n{%- endmacro %}", @@ -3180,12 +3953,13 @@ "path": "macros/get_custom_name/get_custom_schema.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.generate_schema_name_for_env" }, "macro.dbt.get_batch_size": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.3802922, "depends_on": { "macros": [ "macro.dbt.default__get_batch_size" @@ -3193,6 +3967,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro get_batch_size() -%}\n {{ return(adapter.dispatch('get_batch_size', 'dbt')()) }}\n{%- endmacro %}", @@ -3204,12 +3979,13 @@ "path": "macros/materializations/seeds/helpers.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.get_batch_size" }, "macro.dbt.get_binding_char": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.379551, "depends_on": { "macros": [ "macro.dbt.default__get_binding_char" @@ -3217,6 +3993,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro get_binding_char() -%}\n {{ adapter.dispatch('get_binding_char', 'dbt')() }}\n{%- endmacro %}", @@ -3228,12 +4005,13 @@ "path": "macros/materializations/seeds/helpers.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.get_binding_char" }, "macro.dbt.get_catalog": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.51827, "depends_on": { "macros": [ "macro.dbt_postgres.postgres__get_catalog" @@ -3241,6 +4019,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro get_catalog(information_schema, schemas) -%}\n {{ return(adapter.dispatch('get_catalog', 'dbt')(information_schema, schemas)) }}\n{%- endmacro %}", @@ -3252,12 +4031,13 @@ "path": "macros/adapters/metadata.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.get_catalog" }, "macro.dbt.get_columns_in_query": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.527127, "depends_on": { "macros": [ "macro.dbt.default__get_columns_in_query" @@ -3265,6 +4045,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro get_columns_in_query(select_sql) -%}\n {{ return(adapter.dispatch('get_columns_in_query', 'dbt')(select_sql)) }}\n{% endmacro %}", @@ -3276,12 +4057,13 @@ "path": "macros/adapters/columns.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.get_columns_in_query" }, "macro.dbt.get_columns_in_relation": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.525661, "depends_on": { "macros": [ "macro.dbt_postgres.postgres__get_columns_in_relation" @@ -3289,6 +4071,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro get_columns_in_relation(relation) -%}\n {{ return(adapter.dispatch('get_columns_in_relation', 'dbt')(relation)) }}\n{% endmacro %}", @@ -3300,12 +4083,13 @@ "path": "macros/adapters/columns.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.get_columns_in_relation" }, "macro.dbt.get_create_index_sql": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.4778419, "depends_on": { "macros": [ "macro.dbt_postgres.postgres__get_create_index_sql" @@ -3313,6 +4097,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro get_create_index_sql(relation, index_dict) -%}\n {{ return(adapter.dispatch('get_create_index_sql', 'dbt')(relation, index_dict)) }}\n{% endmacro %}", @@ -3324,12 +4109,13 @@ "path": "macros/adapters/indexes.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.get_create_index_sql" }, "macro.dbt.get_create_table_as_sql": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.349, "depends_on": { "macros": [ "macro.dbt.default__get_create_table_as_sql" @@ -3337,6 +4123,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro get_create_table_as_sql(temporary, relation, sql) -%}\n {{ adapter.dispatch('get_create_table_as_sql', 'dbt')(temporary, relation, sql) }}\n{%- endmacro %}", @@ -3348,12 +4135,13 @@ "path": "macros/materializations/models/table/create_table_as.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.get_create_table_as_sql" }, "macro.dbt.get_create_view_as_sql": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.360684, "depends_on": { "macros": [ "macro.dbt.default__get_create_view_as_sql" @@ -3361,6 +4149,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro get_create_view_as_sql(relation, sql) -%}\n {{ adapter.dispatch('get_create_view_as_sql', 'dbt')(relation, sql) }}\n{%- endmacro %}", @@ -3372,12 +4161,13 @@ "path": "macros/materializations/models/view/create_view_as.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.get_create_view_as_sql" }, "macro.dbt.get_csv_sql": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.378786, "depends_on": { "macros": [ "macro.dbt.default__get_csv_sql" @@ -3385,6 +4175,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro get_csv_sql(create_or_truncate_sql, insert_sql) %}\n {{ adapter.dispatch('get_csv_sql', 'dbt')(create_or_truncate_sql, insert_sql) }}\n{% endmacro %}", @@ -3396,12 +4187,13 @@ "path": "macros/materializations/seeds/helpers.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.get_csv_sql" }, "macro.dbt.get_dcl_statement_list": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.50449, "depends_on": { "macros": [ "macro.dbt.default__get_dcl_statement_list" @@ -3409,6 +4201,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro get_dcl_statement_list(relation, grant_config, get_dcl_macro) %}\n {{ return(adapter.dispatch('get_dcl_statement_list', 'dbt')(relation, grant_config, get_dcl_macro)) }}\n{% endmacro %}", @@ -3420,12 +4213,13 @@ "path": "macros/adapters/apply_grants.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.get_dcl_statement_list" }, "macro.dbt.get_delete_insert_merge_sql": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.308919, "depends_on": { "macros": [ "macro.dbt.default__get_delete_insert_merge_sql" @@ -3433,6 +4227,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro get_delete_insert_merge_sql(target, source, unique_key, dest_columns) -%}\n {{ adapter.dispatch('get_delete_insert_merge_sql', 'dbt')(target, source, unique_key, dest_columns) }}\n{%- endmacro %}", @@ -3444,12 +4239,13 @@ "path": "macros/materializations/models/incremental/merge.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.get_delete_insert_merge_sql" }, "macro.dbt.get_grant_sql": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.502623, "depends_on": { "macros": [ "macro.dbt.default__get_grant_sql" @@ -3457,6 +4253,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro get_grant_sql(relation, privilege, grantees) %}\n {{ return(adapter.dispatch('get_grant_sql', 'dbt')(relation, privilege, grantees)) }}\n{% endmacro %}", @@ -3468,12 +4265,169 @@ "path": "macros/adapters/apply_grants.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.get_grant_sql" }, + "macro.dbt.get_incremental_append_sql": { + "arguments": [], + "created_at": 1670298551.3152268, + "depends_on": { + "macros": [ + "macro.dbt.default__get_incremental_append_sql" + ] + }, + "description": "", + "docs": { + "node_color": null, + "show": true + }, + "macro_sql": "{% macro get_incremental_append_sql(arg_dict) %}\n\n {{ return(adapter.dispatch('get_incremental_append_sql', 'dbt')(arg_dict)) }}\n\n{% endmacro %}", + "meta": {}, + "name": "get_incremental_append_sql", + "original_file_path": "macros/materializations/models/incremental/strategies.sql", + "package_name": "dbt", + "patch_path": null, + "path": "macros/materializations/models/incremental/strategies.sql", + "resource_type": "macro", + "root_path": "/some-path/sample-dbt", + "supported_languages": null, + "tags": [], + "unique_id": "macro.dbt.get_incremental_append_sql" + }, + "macro.dbt.get_incremental_default_sql": { + "arguments": [], + "created_at": 1670298551.318759, + "depends_on": { + "macros": [ + "macro.dbt_postgres.postgres__get_incremental_default_sql" + ] + }, + "description": "", + "docs": { + "node_color": null, + "show": true + }, + "macro_sql": "{% macro get_incremental_default_sql(arg_dict) %}\n\n {{ return(adapter.dispatch('get_incremental_default_sql', 'dbt')(arg_dict)) }}\n\n{% endmacro %}", + "meta": {}, + "name": "get_incremental_default_sql", + "original_file_path": "macros/materializations/models/incremental/strategies.sql", + "package_name": "dbt", + "patch_path": null, + "path": "macros/materializations/models/incremental/strategies.sql", + "resource_type": "macro", + "root_path": "/some-path/sample-dbt", + "supported_languages": null, + "tags": [], + "unique_id": "macro.dbt.get_incremental_default_sql" + }, + "macro.dbt.get_incremental_delete_insert_sql": { + "arguments": [], + "created_at": 1670298551.3160691, + "depends_on": { + "macros": [ + "macro.dbt.default__get_incremental_delete_insert_sql" + ] + }, + "description": "", + "docs": { + "node_color": null, + "show": true + }, + "macro_sql": "{% macro get_incremental_delete_insert_sql(arg_dict) %}\n\n {{ return(adapter.dispatch('get_incremental_delete_insert_sql', 'dbt')(arg_dict)) }}\n\n{% endmacro %}", + "meta": {}, + "name": "get_incremental_delete_insert_sql", + "original_file_path": "macros/materializations/models/incremental/strategies.sql", + "package_name": "dbt", + "patch_path": null, + "path": "macros/materializations/models/incremental/strategies.sql", + "resource_type": "macro", + "root_path": "/some-path/sample-dbt", + "supported_languages": null, + "tags": [], + "unique_id": "macro.dbt.get_incremental_delete_insert_sql" + }, + "macro.dbt.get_incremental_insert_overwrite_sql": { + "arguments": [], + "created_at": 1670298551.317858, + "depends_on": { + "macros": [ + "macro.dbt.default__get_incremental_insert_overwrite_sql" + ] + }, + "description": "", + "docs": { + "node_color": null, + "show": true + }, + "macro_sql": "{% macro get_incremental_insert_overwrite_sql(arg_dict) %}\n\n {{ return(adapter.dispatch('get_incremental_insert_overwrite_sql', 'dbt')(arg_dict)) }}\n\n{% endmacro %}", + "meta": {}, + "name": "get_incremental_insert_overwrite_sql", + "original_file_path": "macros/materializations/models/incremental/strategies.sql", + "package_name": "dbt", + "patch_path": null, + "path": "macros/materializations/models/incremental/strategies.sql", + "resource_type": "macro", + "root_path": "/some-path/sample-dbt", + "supported_languages": null, + "tags": [], + "unique_id": "macro.dbt.get_incremental_insert_overwrite_sql" + }, + "macro.dbt.get_incremental_merge_sql": { + "arguments": [], + "created_at": 1670298551.31696, + "depends_on": { + "macros": [ + "macro.dbt.default__get_incremental_merge_sql" + ] + }, + "description": "", + "docs": { + "node_color": null, + "show": true + }, + "macro_sql": "{% macro get_incremental_merge_sql(arg_dict) %}\n\n {{ return(adapter.dispatch('get_incremental_merge_sql', 'dbt')(arg_dict)) }}\n\n{% endmacro %}", + "meta": {}, + "name": "get_incremental_merge_sql", + "original_file_path": "macros/materializations/models/incremental/strategies.sql", + "package_name": "dbt", + "patch_path": null, + "path": "macros/materializations/models/incremental/strategies.sql", + "resource_type": "macro", + "root_path": "/some-path/sample-dbt", + "supported_languages": null, + "tags": [], + "unique_id": "macro.dbt.get_incremental_merge_sql" + }, + "macro.dbt.get_insert_into_sql": { + "arguments": [], + "created_at": 1670298551.319639, + "depends_on": { + "macros": [ + "macro.dbt.get_quoted_csv" + ] + }, + "description": "", + "docs": { + "node_color": null, + "show": true + }, + "macro_sql": "{% macro get_insert_into_sql(target_relation, temp_relation, dest_columns) %}\n\n {%- set dest_cols_csv = get_quoted_csv(dest_columns | map(attribute=\"name\")) -%}\n\n insert into {{ target_relation }} ({{ dest_cols_csv }})\n (\n select {{ dest_cols_csv }}\n from {{ temp_relation }}\n )\n\n{% endmacro %}", + "meta": {}, + "name": "get_insert_into_sql", + "original_file_path": "macros/materializations/models/incremental/strategies.sql", + "package_name": "dbt", + "patch_path": null, + "path": "macros/materializations/models/incremental/strategies.sql", + "resource_type": "macro", + "root_path": "/some-path/sample-dbt", + "supported_languages": null, + "tags": [], + "unique_id": "macro.dbt.get_insert_into_sql" + }, "macro.dbt.get_insert_overwrite_merge_sql": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.3109581, "depends_on": { "macros": [ "macro.dbt.default__get_insert_overwrite_merge_sql" @@ -3481,6 +4435,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro get_insert_overwrite_merge_sql(target, source, dest_columns, predicates, include_sql_header=false) -%}\n {{ adapter.dispatch('get_insert_overwrite_merge_sql', 'dbt')(target, source, dest_columns, predicates, include_sql_header) }}\n{%- endmacro %}", @@ -3492,12 +4447,13 @@ "path": "macros/materializations/models/incremental/merge.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.get_insert_overwrite_merge_sql" }, "macro.dbt.get_merge_sql": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.3051069, "depends_on": { "macros": [ "macro.dbt.default__get_merge_sql" @@ -3505,6 +4461,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro get_merge_sql(target, source, unique_key, dest_columns, predicates=none) -%}\n {{ adapter.dispatch('get_merge_sql', 'dbt')(target, source, unique_key, dest_columns, predicates) }}\n{%- endmacro %}", @@ -3516,12 +4473,39 @@ "path": "macros/materializations/models/incremental/merge.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.get_merge_sql" }, + "macro.dbt.get_merge_update_columns": { + "arguments": [], + "created_at": 1670298551.296777, + "depends_on": { + "macros": [ + "macro.dbt.default__get_merge_update_columns" + ] + }, + "description": "", + "docs": { + "node_color": null, + "show": true + }, + "macro_sql": "{% macro get_merge_update_columns(merge_update_columns, merge_exclude_columns, dest_columns) %}\n {{ return(adapter.dispatch('get_merge_update_columns', 'dbt')(merge_update_columns, merge_exclude_columns, dest_columns)) }}\n{% endmacro %}", + "meta": {}, + "name": "get_merge_update_columns", + "original_file_path": "macros/materializations/models/incremental/column_helpers.sql", + "package_name": "dbt", + "patch_path": null, + "path": "macros/materializations/models/incremental/column_helpers.sql", + "resource_type": "macro", + "root_path": "/some-path/sample-dbt", + "supported_languages": null, + "tags": [], + "unique_id": "macro.dbt.get_merge_update_columns" + }, "macro.dbt.get_or_create_relation": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.491816, "depends_on": { "macros": [ "macro.dbt.default__get_or_create_relation" @@ -3529,6 +4513,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro get_or_create_relation(database, schema, identifier, type) -%}\n {{ return(adapter.dispatch('get_or_create_relation', 'dbt')(database, schema, identifier, type)) }}\n{% endmacro %}", @@ -3540,17 +4525,19 @@ "path": "macros/adapters/relation.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.get_or_create_relation" }, "macro.dbt.get_quoted_csv": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.293896, "depends_on": { "macros": [] }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro get_quoted_csv(column_names) %}\n\n {% set quoted = [] %}\n {% for col in column_names -%}\n {%- do quoted.append(adapter.quote(col)) -%}\n {%- endfor %}\n\n {%- set dest_cols_csv = quoted | join(', ') -%}\n {{ return(dest_cols_csv) }}\n\n{% endmacro %}", @@ -3562,12 +4549,13 @@ "path": "macros/materializations/models/incremental/column_helpers.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.get_quoted_csv" }, "macro.dbt.get_revoke_sql": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.5035589, "depends_on": { "macros": [ "macro.dbt.default__get_revoke_sql" @@ -3575,6 +4563,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro get_revoke_sql(relation, privilege, grantees) %}\n {{ return(adapter.dispatch('get_revoke_sql', 'dbt')(relation, privilege, grantees)) }}\n{% endmacro %}", @@ -3586,17 +4575,19 @@ "path": "macros/adapters/apply_grants.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.get_revoke_sql" }, "macro.dbt.get_seed_column_quoted_csv": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.381716, "depends_on": { "macros": [] }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro get_seed_column_quoted_csv(model, column_names) %}\n {%- set quote_seed_column = model['config'].get('quote_columns', None) -%}\n {% set quoted = [] %}\n {% for col in column_names -%}\n {%- do quoted.append(adapter.quote_seed_column(col, quote_seed_column)) -%}\n {%- endfor %}\n\n {%- set dest_cols_csv = quoted | join(', ') -%}\n {{ return(dest_cols_csv) }}\n{% endmacro %}", @@ -3608,12 +4599,13 @@ "path": "macros/materializations/seeds/helpers.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.get_seed_column_quoted_csv" }, "macro.dbt.get_show_grant_sql": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.501869, "depends_on": { "macros": [ "macro.dbt_postgres.postgres__get_show_grant_sql" @@ -3621,6 +4613,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro get_show_grant_sql(relation) %}\n {{ return(adapter.dispatch(\"get_show_grant_sql\", \"dbt\")(relation)) }}\n{% endmacro %}", @@ -3632,12 +4625,13 @@ "path": "macros/adapters/apply_grants.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.get_show_grant_sql" }, "macro.dbt.get_test_sql": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.289005, "depends_on": { "macros": [ "macro.dbt.default__get_test_sql" @@ -3645,6 +4639,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro get_test_sql(main_sql, fail_calc, warn_if, error_if, limit) -%}\n {{ adapter.dispatch('get_test_sql', 'dbt')(main_sql, fail_calc, warn_if, error_if, limit) }}\n{%- endmacro %}", @@ -3656,12 +4651,13 @@ "path": "macros/materializations/tests/helpers.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.get_test_sql" }, "macro.dbt.get_true_sql": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.269536, "depends_on": { "macros": [ "macro.dbt.default__get_true_sql" @@ -3669,6 +4665,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro get_true_sql() %}\n {{ adapter.dispatch('get_true_sql', 'dbt')() }}\n{% endmacro %}", @@ -3680,12 +4677,13 @@ "path": "macros/materializations/snapshots/helpers.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.get_true_sql" }, "macro.dbt.get_where_subquery": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.29043, "depends_on": { "macros": [ "macro.dbt.default__get_where_subquery" @@ -3693,6 +4691,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro get_where_subquery(relation) -%}\n {% do return(adapter.dispatch('get_where_subquery', 'dbt')(relation)) %}\n{%- endmacro %}", @@ -3704,12 +4703,13 @@ "path": "macros/materializations/tests/where_subquery.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.get_where_subquery" }, "macro.dbt.handle_existing_table": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.356553, "depends_on": { "macros": [ "macro.dbt.default__handle_existing_table" @@ -3717,6 +4717,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro handle_existing_table(full_refresh, old_relation) %}\n {{ adapter.dispatch('handle_existing_table', 'dbt')(full_refresh, old_relation) }}\n{% endmacro %}", @@ -3728,12 +4729,13 @@ "path": "macros/materializations/models/view/helpers.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.handle_existing_table" }, "macro.dbt.hash": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.445781, "depends_on": { "macros": [ "macro.dbt.default__hash" @@ -3741,6 +4743,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro hash(field) -%}\n {{ return(adapter.dispatch('hash', 'dbt') (field)) }}\n{%- endmacro %}", @@ -3752,12 +4755,13 @@ "path": "macros/utils/hash.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.hash" }, "macro.dbt.in_transaction": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.242513, "depends_on": { "macros": [ "macro.dbt.make_hook_config" @@ -3765,6 +4769,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro in_transaction(sql) %}\n {{ make_hook_config(sql, inside_transaction=True) }}\n{% endmacro %}", @@ -3776,17 +4781,19 @@ "path": "macros/materializations/hooks.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.in_transaction" }, "macro.dbt.incremental_validate_on_schema_change": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.336801, "depends_on": { "macros": [] }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro incremental_validate_on_schema_change(on_schema_change, default='ignore') %}\n\n {% if on_schema_change not in ['sync_all_columns', 'append_new_columns', 'fail', 'ignore'] %}\n\n {% set log_message = 'Invalid value for on_schema_change (%s) specified. Setting default value of %s.' % (on_schema_change, default) %}\n {% do log(log_message) %}\n\n {{ return(default) }}\n\n {% else %}\n\n {{ return(on_schema_change) }}\n\n {% endif %}\n\n{% endmacro %}", @@ -3798,12 +4805,13 @@ "path": "macros/materializations/models/incremental/on_schema_change.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.incremental_validate_on_schema_change" }, "macro.dbt.information_schema_name": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.5193589, "depends_on": { "macros": [ "macro.dbt_postgres.postgres__information_schema_name" @@ -3811,6 +4819,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro information_schema_name(database) %}\n {{ return(adapter.dispatch('information_schema_name', 'dbt')(database)) }}\n{% endmacro %}", @@ -3822,12 +4831,13 @@ "path": "macros/adapters/metadata.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.information_schema_name" }, "macro.dbt.intersect": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.4103181, "depends_on": { "macros": [ "macro.dbt.default__intersect" @@ -3835,6 +4845,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro intersect() %}\n {{ return(adapter.dispatch('intersect', 'dbt')()) }}\n{% endmacro %}", @@ -3846,12 +4857,13 @@ "path": "macros/utils/intersect.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.intersect" }, "macro.dbt.is_incremental": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.3135989, "depends_on": { "macros": [ "macro.dbt.should_full_refresh" @@ -3859,6 +4871,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro is_incremental() %}\n {#-- do not run introspective queries in parsing #}\n {% if not execute %}\n {{ return(False) }}\n {% else %}\n {% set relation = adapter.get_relation(this.database, this.schema, this.table) %}\n {{ return(relation is not none\n and relation.type == 'table'\n and model.config.materialized == 'incremental'\n and not should_full_refresh()) }}\n {% endif %}\n{% endmacro %}", @@ -3870,12 +4883,13 @@ "path": "macros/materializations/models/incremental/is_incremental.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.is_incremental" }, "macro.dbt.last_day": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.463029, "depends_on": { "macros": [ "macro.dbt_postgres.postgres__last_day" @@ -3883,6 +4897,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro last_day(date, datepart) %}\n {{ return(adapter.dispatch('last_day', 'dbt') (date, datepart)) }}\n{% endmacro %}", @@ -3894,12 +4909,13 @@ "path": "macros/utils/last_day.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.last_day" }, "macro.dbt.length": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.408395, "depends_on": { "macros": [ "macro.dbt.default__length" @@ -3907,6 +4923,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro length(expression) -%}\n {{ return(adapter.dispatch('length', 'dbt') (expression)) }}\n{% endmacro %}", @@ -3918,12 +4935,13 @@ "path": "macros/utils/length.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.length" }, "macro.dbt.list_relations_without_caching": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.522364, "depends_on": { "macros": [ "macro.dbt_postgres.postgres__list_relations_without_caching" @@ -3931,6 +4949,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro list_relations_without_caching(schema_relation) %}\n {{ return(adapter.dispatch('list_relations_without_caching', 'dbt')(schema_relation)) }}\n{% endmacro %}", @@ -3942,12 +4961,13 @@ "path": "macros/adapters/metadata.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.list_relations_without_caching" }, "macro.dbt.list_schemas": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.520302, "depends_on": { "macros": [ "macro.dbt_postgres.postgres__list_schemas" @@ -3955,6 +4975,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro list_schemas(database) -%}\n {{ return(adapter.dispatch('list_schemas', 'dbt')(database)) }}\n{% endmacro %}", @@ -3966,12 +4987,13 @@ "path": "macros/adapters/metadata.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.list_schemas" }, "macro.dbt.listagg": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.440186, "depends_on": { "macros": [ "macro.dbt_postgres.postgres__listagg" @@ -3979,6 +5001,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro listagg(measure, delimiter_text=\"','\", order_by_clause=none, limit_num=none) -%}\n {{ return(adapter.dispatch('listagg', 'dbt') (measure, delimiter_text, order_by_clause, limit_num)) }}\n{%- endmacro %}", @@ -3990,17 +5013,19 @@ "path": "macros/utils/listagg.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.listagg" }, "macro.dbt.load_cached_relation": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.4937768, "depends_on": { "macros": [] }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro load_cached_relation(relation) %}\n {% do return(adapter.get_relation(\n database=relation.database,\n schema=relation.schema,\n identifier=relation.identifier\n )) -%}\n{% endmacro %}", @@ -4012,12 +5037,13 @@ "path": "macros/adapters/relation.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.load_cached_relation" }, "macro.dbt.load_csv_rows": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.382082, "depends_on": { "macros": [ "macro.dbt.default__load_csv_rows" @@ -4025,6 +5051,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro load_csv_rows(model, agate_table) -%}\n {{ adapter.dispatch('load_csv_rows', 'dbt')(model, agate_table) }}\n{%- endmacro %}", @@ -4036,12 +5063,13 @@ "path": "macros/materializations/seeds/helpers.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.load_csv_rows" }, "macro.dbt.load_relation": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.4941142, "depends_on": { "macros": [ "macro.dbt.load_cached_relation" @@ -4049,6 +5077,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro load_relation(relation) %}\n {{ return(load_cached_relation(relation)) }}\n{% endmacro %}", @@ -4060,12 +5089,13 @@ "path": "macros/adapters/relation.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.load_relation" }, "macro.dbt.make_backup_relation": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.4873002, "depends_on": { "macros": [ "macro.dbt_postgres.postgres__make_backup_relation" @@ -4073,6 +5103,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro make_backup_relation(base_relation, backup_relation_type, suffix='__dbt_backup') %}\n {{ return(adapter.dispatch('make_backup_relation', 'dbt')(base_relation, backup_relation_type, suffix)) }}\n{% endmacro %}", @@ -4084,17 +5115,19 @@ "path": "macros/adapters/relation.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.make_backup_relation" }, "macro.dbt.make_hook_config": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.241919, "depends_on": { "macros": [] }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro make_hook_config(sql, inside_transaction) %}\n {{ tojson({\"sql\": sql, \"transaction\": inside_transaction}) }}\n{% endmacro %}", @@ -4106,12 +5139,13 @@ "path": "macros/materializations/hooks.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.make_hook_config" }, "macro.dbt.make_intermediate_relation": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.485225, "depends_on": { "macros": [ "macro.dbt_postgres.postgres__make_intermediate_relation" @@ -4119,6 +5153,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro make_intermediate_relation(base_relation, suffix='__dbt_tmp') %}\n {{ return(adapter.dispatch('make_intermediate_relation', 'dbt')(base_relation, suffix)) }}\n{% endmacro %}", @@ -4130,12 +5165,13 @@ "path": "macros/adapters/relation.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.make_intermediate_relation" }, "macro.dbt.make_temp_relation": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.4861162, "depends_on": { "macros": [ "macro.dbt_postgres.postgres__make_temp_relation" @@ -4143,6 +5179,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro make_temp_relation(base_relation, suffix='__dbt_tmp') %}\n {{ return(adapter.dispatch('make_temp_relation', 'dbt')(base_relation, suffix)) }}\n{% endmacro %}", @@ -4154,12 +5191,13 @@ "path": "macros/adapters/relation.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.make_temp_relation" }, "macro.dbt.materialization_incremental_default": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.3286989, "depends_on": { "macros": [ "macro.dbt.load_cached_relation", @@ -4173,7 +5211,6 @@ "macro.dbt.get_create_table_as_sql", "macro.dbt.run_query", "macro.dbt.process_schema_changes", - "macro.dbt.get_delete_insert_merge_sql", "macro.dbt.statement", "macro.dbt.should_revoke", "macro.dbt.apply_grants", @@ -4183,9 +5220,10 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, - "macro_sql": "{% materialization incremental, default -%}\n\n -- relations\n {%- set existing_relation = load_cached_relation(this) -%}\n {%- set target_relation = this.incorporate(type='table') -%}\n {%- set temp_relation = make_temp_relation(target_relation)-%}\n {%- set intermediate_relation = make_intermediate_relation(target_relation)-%}\n {%- set backup_relation_type = 'table' if existing_relation is none else existing_relation.type -%}\n {%- set backup_relation = make_backup_relation(target_relation, backup_relation_type) -%}\n\n -- configs\n {%- set unique_key = config.get('unique_key') -%}\n {%- set full_refresh_mode = (should_full_refresh() or existing_relation.is_view) -%}\n {%- set on_schema_change = incremental_validate_on_schema_change(config.get('on_schema_change'), default='ignore') -%}\n\n -- the temp_ and backup_ relations should not already exist in the database; get_relation\n -- will return None in that case. Otherwise, we get a relation that we can drop\n -- later, before we try to use this name for the current operation. This has to happen before\n -- BEGIN, in a separate transaction\n {%- set preexisting_intermediate_relation = load_cached_relation(intermediate_relation)-%}\n {%- set preexisting_backup_relation = load_cached_relation(backup_relation) -%}\n -- grab current tables grants config for comparision later on\n {% set grant_config = config.get('grants') %}\n {{ drop_relation_if_exists(preexisting_intermediate_relation) }}\n {{ drop_relation_if_exists(preexisting_backup_relation) }}\n\n {{ run_hooks(pre_hooks, inside_transaction=False) }}\n\n -- `BEGIN` happens here:\n {{ run_hooks(pre_hooks, inside_transaction=True) }}\n\n {% set to_drop = [] %}\n\n {% if existing_relation is none %}\n {% set build_sql = get_create_table_as_sql(False, target_relation, sql) %}\n {% elif full_refresh_mode %}\n {% set build_sql = get_create_table_as_sql(False, intermediate_relation, sql) %}\n {% set need_swap = true %}\n {% else %}\n {% do run_query(get_create_table_as_sql(True, temp_relation, sql)) %}\n {% do adapter.expand_target_column_types(\n from_relation=temp_relation,\n to_relation=target_relation) %}\n {#-- Process schema changes. Returns dict of changes if successful. Use source columns for upserting/merging --#}\n {% set dest_columns = process_schema_changes(on_schema_change, temp_relation, existing_relation) %}\n {% if not dest_columns %}\n {% set dest_columns = adapter.get_columns_in_relation(existing_relation) %}\n {% endif %}\n {% set build_sql = get_delete_insert_merge_sql(target_relation, temp_relation, unique_key, dest_columns) %}\n\n {% endif %}\n\n {% call statement(\"main\") %}\n {{ build_sql }}\n {% endcall %}\n\n {% if need_swap %}\n {% do adapter.rename_relation(target_relation, backup_relation) %}\n {% do adapter.rename_relation(intermediate_relation, target_relation) %}\n {% do to_drop.append(backup_relation) %}\n {% endif %}\n\n {% set should_revoke = should_revoke(existing_relation, full_refresh_mode) %}\n {% do apply_grants(target_relation, grant_config, should_revoke=should_revoke) %}\n\n {% do persist_docs(target_relation, model) %}\n\n {% if existing_relation is none or existing_relation.is_view or should_full_refresh() %}\n {% do create_indexes(target_relation) %}\n {% endif %}\n\n {{ run_hooks(post_hooks, inside_transaction=True) }}\n\n -- `COMMIT` happens here\n {% do adapter.commit() %}\n\n {% for rel in to_drop %}\n {% do adapter.drop_relation(rel) %}\n {% endfor %}\n\n {{ run_hooks(post_hooks, inside_transaction=False) }}\n\n {{ return({'relations': [target_relation]}) }}\n\n{%- endmaterialization %}", + "macro_sql": "{% materialization incremental, default -%}\n\n -- relations\n {%- set existing_relation = load_cached_relation(this) -%}\n {%- set target_relation = this.incorporate(type='table') -%}\n {%- set temp_relation = make_temp_relation(target_relation)-%}\n {%- set intermediate_relation = make_intermediate_relation(target_relation)-%}\n {%- set backup_relation_type = 'table' if existing_relation is none else existing_relation.type -%}\n {%- set backup_relation = make_backup_relation(target_relation, backup_relation_type) -%}\n\n -- configs\n {%- set unique_key = config.get('unique_key') -%}\n {%- set full_refresh_mode = (should_full_refresh() or existing_relation.is_view) -%}\n {%- set on_schema_change = incremental_validate_on_schema_change(config.get('on_schema_change'), default='ignore') -%}\n\n -- the temp_ and backup_ relations should not already exist in the database; get_relation\n -- will return None in that case. Otherwise, we get a relation that we can drop\n -- later, before we try to use this name for the current operation. This has to happen before\n -- BEGIN, in a separate transaction\n {%- set preexisting_intermediate_relation = load_cached_relation(intermediate_relation)-%}\n {%- set preexisting_backup_relation = load_cached_relation(backup_relation) -%}\n -- grab current tables grants config for comparision later on\n {% set grant_config = config.get('grants') %}\n {{ drop_relation_if_exists(preexisting_intermediate_relation) }}\n {{ drop_relation_if_exists(preexisting_backup_relation) }}\n\n {{ run_hooks(pre_hooks, inside_transaction=False) }}\n\n -- `BEGIN` happens here:\n {{ run_hooks(pre_hooks, inside_transaction=True) }}\n\n {% set to_drop = [] %}\n\n {% if existing_relation is none %}\n {% set build_sql = get_create_table_as_sql(False, target_relation, sql) %}\n {% elif full_refresh_mode %}\n {% set build_sql = get_create_table_as_sql(False, intermediate_relation, sql) %}\n {% set need_swap = true %}\n {% else %}\n {% do run_query(get_create_table_as_sql(True, temp_relation, sql)) %}\n {% do adapter.expand_target_column_types(\n from_relation=temp_relation,\n to_relation=target_relation) %}\n {#-- Process schema changes. Returns dict of changes if successful. Use source columns for upserting/merging --#}\n {% set dest_columns = process_schema_changes(on_schema_change, temp_relation, existing_relation) %}\n {% if not dest_columns %}\n {% set dest_columns = adapter.get_columns_in_relation(existing_relation) %}\n {% endif %}\n\n {#-- Get the incremental_strategy, the macro to use for the strategy, and build the sql --#}\n {% set incremental_strategy = config.get('incremental_strategy') or 'default' %}\n {% set incremental_predicates = config.get('incremental_predicates', none) %}\n {% set strategy_sql_macro_func = adapter.get_incremental_strategy_macro(context, incremental_strategy) %}\n {% set strategy_arg_dict = ({'target_relation': target_relation, 'temp_relation': temp_relation, 'unique_key': unique_key, 'dest_columns': dest_columns, 'predicates': incremental_predicates }) %}\n {% set build_sql = strategy_sql_macro_func(strategy_arg_dict) %}\n\n {% endif %}\n\n {% call statement(\"main\") %}\n {{ build_sql }}\n {% endcall %}\n\n {% if need_swap %}\n {% do adapter.rename_relation(target_relation, backup_relation) %}\n {% do adapter.rename_relation(intermediate_relation, target_relation) %}\n {% do to_drop.append(backup_relation) %}\n {% endif %}\n\n {% set should_revoke = should_revoke(existing_relation, full_refresh_mode) %}\n {% do apply_grants(target_relation, grant_config, should_revoke=should_revoke) %}\n\n {% do persist_docs(target_relation, model) %}\n\n {% if existing_relation is none or existing_relation.is_view or should_full_refresh() %}\n {% do create_indexes(target_relation) %}\n {% endif %}\n\n {{ run_hooks(post_hooks, inside_transaction=True) }}\n\n -- `COMMIT` happens here\n {% do adapter.commit() %}\n\n {% for rel in to_drop %}\n {% do adapter.drop_relation(rel) %}\n {% endfor %}\n\n {{ run_hooks(post_hooks, inside_transaction=False) }}\n\n {{ return({'relations': [target_relation]}) }}\n\n{%- endmaterialization %}", "meta": {}, "name": "materialization_incremental_default", "original_file_path": "macros/materializations/models/incremental/incremental.sql", @@ -4194,12 +5232,15 @@ "path": "macros/materializations/models/incremental/incremental.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": [ + "sql" + ], "tags": [], "unique_id": "macro.dbt.materialization_incremental_default" }, "macro.dbt.materialization_seed_default": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.368004, "depends_on": { "macros": [ "macro.dbt.should_full_refresh", @@ -4217,6 +5258,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% materialization seed, default %}\n\n {%- set identifier = model['alias'] -%}\n {%- set full_refresh_mode = (should_full_refresh()) -%}\n\n {%- set old_relation = adapter.get_relation(database=database, schema=schema, identifier=identifier) -%}\n\n {%- set exists_as_table = (old_relation is not none and old_relation.is_table) -%}\n {%- set exists_as_view = (old_relation is not none and old_relation.is_view) -%}\n\n {%- set grant_config = config.get('grants') -%}\n {%- set agate_table = load_agate_table() -%}\n -- grab current tables grants config for comparision later on\n\n {%- do store_result('agate_table', response='OK', agate_table=agate_table) -%}\n\n {{ run_hooks(pre_hooks, inside_transaction=False) }}\n\n -- `BEGIN` happens here:\n {{ run_hooks(pre_hooks, inside_transaction=True) }}\n\n -- build model\n {% set create_table_sql = \"\" %}\n {% if exists_as_view %}\n {{ exceptions.raise_compiler_error(\"Cannot seed to '{}', it is a view\".format(old_relation)) }}\n {% elif exists_as_table %}\n {% set create_table_sql = reset_csv_table(model, full_refresh_mode, old_relation, agate_table) %}\n {% else %}\n {% set create_table_sql = create_csv_table(model, agate_table) %}\n {% endif %}\n\n {% set code = 'CREATE' if full_refresh_mode else 'INSERT' %}\n {% set rows_affected = (agate_table.rows | length) %}\n {% set sql = load_csv_rows(model, agate_table) %}\n\n {% call noop_statement('main', code ~ ' ' ~ rows_affected, code, rows_affected) %}\n {{ get_csv_sql(create_table_sql, sql) }};\n {% endcall %}\n\n {% set target_relation = this.incorporate(type='table') %}\n\n {% set should_revoke = should_revoke(old_relation, full_refresh_mode) %}\n {% do apply_grants(target_relation, grant_config, should_revoke=should_revoke) %}\n\n {% do persist_docs(target_relation, model) %}\n\n {% if full_refresh_mode or not exists_as_table %}\n {% do create_indexes(target_relation) %}\n {% endif %}\n\n {{ run_hooks(post_hooks, inside_transaction=True) }}\n\n -- `COMMIT` happens here\n {{ adapter.commit() }}\n\n {{ run_hooks(post_hooks, inside_transaction=False) }}\n\n {{ return({'relations': [target_relation]}) }}\n\n{% endmaterialization %}", @@ -4228,12 +5270,15 @@ "path": "macros/materializations/seeds/seed.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": [ + "sql" + ], "tags": [], "unique_id": "macro.dbt.materialization_seed_default" }, "macro.dbt.materialization_snapshot_default": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.2841868, "depends_on": { "macros": [ "macro.dbt.get_or_create_relation", @@ -4254,9 +5299,10 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, - "macro_sql": "{% materialization snapshot, default %}\n {%- set config = model['config'] -%}\n\n {%- set target_table = model.get('alias', model.get('name')) -%}\n\n {%- set strategy_name = config.get('strategy') -%}\n {%- set unique_key = config.get('unique_key') %}\n -- grab current tables grants config for comparision later on\n {%- set grant_config = config.get('grants') -%}\n\n {% set target_relation_exists, target_relation = get_or_create_relation(\n database=model.database,\n schema=model.schema,\n identifier=target_table,\n type='table') -%}\n\n {%- if not target_relation.is_table -%}\n {% do exceptions.relation_wrong_type(target_relation, 'table') %}\n {%- endif -%}\n\n\n {{ run_hooks(pre_hooks, inside_transaction=False) }}\n\n {{ run_hooks(pre_hooks, inside_transaction=True) }}\n\n {% set strategy_macro = strategy_dispatch(strategy_name) %}\n {% set strategy = strategy_macro(model, \"snapshotted_data\", \"source_data\", config, target_relation_exists) %}\n\n {% if not target_relation_exists %}\n\n {% set build_sql = build_snapshot_table(strategy, model['compiled_sql']) %}\n {% set final_sql = create_table_as(False, target_relation, build_sql) %}\n\n {% else %}\n\n {{ adapter.valid_snapshot_target(target_relation) }}\n\n {% set staging_table = build_snapshot_staging_table(strategy, sql, target_relation) %}\n\n -- this may no-op if the database does not require column expansion\n {% do adapter.expand_target_column_types(from_relation=staging_table,\n to_relation=target_relation) %}\n\n {% set missing_columns = adapter.get_missing_columns(staging_table, target_relation)\n | rejectattr('name', 'equalto', 'dbt_change_type')\n | rejectattr('name', 'equalto', 'DBT_CHANGE_TYPE')\n | rejectattr('name', 'equalto', 'dbt_unique_key')\n | rejectattr('name', 'equalto', 'DBT_UNIQUE_KEY')\n | list %}\n\n {% do create_columns(target_relation, missing_columns) %}\n\n {% set source_columns = adapter.get_columns_in_relation(staging_table)\n | rejectattr('name', 'equalto', 'dbt_change_type')\n | rejectattr('name', 'equalto', 'DBT_CHANGE_TYPE')\n | rejectattr('name', 'equalto', 'dbt_unique_key')\n | rejectattr('name', 'equalto', 'DBT_UNIQUE_KEY')\n | list %}\n\n {% set quoted_source_columns = [] %}\n {% for column in source_columns %}\n {% do quoted_source_columns.append(adapter.quote(column.name)) %}\n {% endfor %}\n\n {% set final_sql = snapshot_merge_sql(\n target = target_relation,\n source = staging_table,\n insert_cols = quoted_source_columns\n )\n %}\n\n {% endif %}\n\n {% call statement('main') %}\n {{ final_sql }}\n {% endcall %}\n\n {% set should_revoke = should_revoke(target_relation_exists, full_refresh_mode=False) %}\n {% do apply_grants(target_relation, grant_config, should_revoke=should_revoke) %}\n\n {% do persist_docs(target_relation, model) %}\n\n {% if not target_relation_exists %}\n {% do create_indexes(target_relation) %}\n {% endif %}\n\n {{ run_hooks(post_hooks, inside_transaction=True) }}\n\n {{ adapter.commit() }}\n\n {% if staging_table is defined %}\n {% do post_snapshot(staging_table) %}\n {% endif %}\n\n {{ run_hooks(post_hooks, inside_transaction=False) }}\n\n {{ return({'relations': [target_relation]}) }}\n\n{% endmaterialization %}", + "macro_sql": "{% materialization snapshot, default %}\n {%- set config = model['config'] -%}\n\n {%- set target_table = model.get('alias', model.get('name')) -%}\n\n {%- set strategy_name = config.get('strategy') -%}\n {%- set unique_key = config.get('unique_key') %}\n -- grab current tables grants config for comparision later on\n {%- set grant_config = config.get('grants') -%}\n\n {% set target_relation_exists, target_relation = get_or_create_relation(\n database=model.database,\n schema=model.schema,\n identifier=target_table,\n type='table') -%}\n\n {%- if not target_relation.is_table -%}\n {% do exceptions.relation_wrong_type(target_relation, 'table') %}\n {%- endif -%}\n\n\n {{ run_hooks(pre_hooks, inside_transaction=False) }}\n\n {{ run_hooks(pre_hooks, inside_transaction=True) }}\n\n {% set strategy_macro = strategy_dispatch(strategy_name) %}\n {% set strategy = strategy_macro(model, \"snapshotted_data\", \"source_data\", config, target_relation_exists) %}\n\n {% if not target_relation_exists %}\n\n {% set build_sql = build_snapshot_table(strategy, model['compiled_code']) %}\n {% set final_sql = create_table_as(False, target_relation, build_sql) %}\n\n {% else %}\n\n {{ adapter.valid_snapshot_target(target_relation) }}\n\n {% set staging_table = build_snapshot_staging_table(strategy, sql, target_relation) %}\n\n -- this may no-op if the database does not require column expansion\n {% do adapter.expand_target_column_types(from_relation=staging_table,\n to_relation=target_relation) %}\n\n {% set missing_columns = adapter.get_missing_columns(staging_table, target_relation)\n | rejectattr('name', 'equalto', 'dbt_change_type')\n | rejectattr('name', 'equalto', 'DBT_CHANGE_TYPE')\n | rejectattr('name', 'equalto', 'dbt_unique_key')\n | rejectattr('name', 'equalto', 'DBT_UNIQUE_KEY')\n | list %}\n\n {% do create_columns(target_relation, missing_columns) %}\n\n {% set source_columns = adapter.get_columns_in_relation(staging_table)\n | rejectattr('name', 'equalto', 'dbt_change_type')\n | rejectattr('name', 'equalto', 'DBT_CHANGE_TYPE')\n | rejectattr('name', 'equalto', 'dbt_unique_key')\n | rejectattr('name', 'equalto', 'DBT_UNIQUE_KEY')\n | list %}\n\n {% set quoted_source_columns = [] %}\n {% for column in source_columns %}\n {% do quoted_source_columns.append(adapter.quote(column.name)) %}\n {% endfor %}\n\n {% set final_sql = snapshot_merge_sql(\n target = target_relation,\n source = staging_table,\n insert_cols = quoted_source_columns\n )\n %}\n\n {% endif %}\n\n {% call statement('main') %}\n {{ final_sql }}\n {% endcall %}\n\n {% set should_revoke = should_revoke(target_relation_exists, full_refresh_mode=False) %}\n {% do apply_grants(target_relation, grant_config, should_revoke=should_revoke) %}\n\n {% do persist_docs(target_relation, model) %}\n\n {% if not target_relation_exists %}\n {% do create_indexes(target_relation) %}\n {% endif %}\n\n {{ run_hooks(post_hooks, inside_transaction=True) }}\n\n {{ adapter.commit() }}\n\n {% if staging_table is defined %}\n {% do post_snapshot(staging_table) %}\n {% endif %}\n\n {{ run_hooks(post_hooks, inside_transaction=False) }}\n\n {{ return({'relations': [target_relation]}) }}\n\n{% endmaterialization %}", "meta": {}, "name": "materialization_snapshot_default", "original_file_path": "macros/materializations/snapshots/snapshot.sql", @@ -4265,12 +5311,15 @@ "path": "macros/materializations/snapshots/snapshot.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": [ + "sql" + ], "tags": [], "unique_id": "macro.dbt.materialization_snapshot_default" }, "macro.dbt.materialization_table_default": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.347976, "depends_on": { "macros": [ "macro.dbt.load_cached_relation", @@ -4288,6 +5337,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% materialization table, default %}\n\n {%- set existing_relation = load_cached_relation(this) -%}\n {%- set target_relation = this.incorporate(type='table') %}\n {%- set intermediate_relation = make_intermediate_relation(target_relation) -%}\n -- the intermediate_relation should not already exist in the database; get_relation\n -- will return None in that case. Otherwise, we get a relation that we can drop\n -- later, before we try to use this name for the current operation\n {%- set preexisting_intermediate_relation = load_cached_relation(intermediate_relation) -%}\n /*\n See ../view/view.sql for more information about this relation.\n */\n {%- set backup_relation_type = 'table' if existing_relation is none else existing_relation.type -%}\n {%- set backup_relation = make_backup_relation(target_relation, backup_relation_type) -%}\n -- as above, the backup_relation should not already exist\n {%- set preexisting_backup_relation = load_cached_relation(backup_relation) -%}\n -- grab current tables grants config for comparision later on\n {% set grant_config = config.get('grants') %}\n\n -- drop the temp relations if they exist already in the database\n {{ drop_relation_if_exists(preexisting_intermediate_relation) }}\n {{ drop_relation_if_exists(preexisting_backup_relation) }}\n\n {{ run_hooks(pre_hooks, inside_transaction=False) }}\n\n -- `BEGIN` happens here:\n {{ run_hooks(pre_hooks, inside_transaction=True) }}\n\n -- build model\n {% call statement('main') -%}\n {{ get_create_table_as_sql(False, intermediate_relation, sql) }}\n {%- endcall %}\n\n -- cleanup\n {% if existing_relation is not none %}\n {{ adapter.rename_relation(existing_relation, backup_relation) }}\n {% endif %}\n\n {{ adapter.rename_relation(intermediate_relation, target_relation) }}\n\n {% do create_indexes(target_relation) %}\n\n {{ run_hooks(post_hooks, inside_transaction=True) }}\n\n {% set should_revoke = should_revoke(existing_relation, full_refresh_mode=True) %}\n {% do apply_grants(target_relation, grant_config, should_revoke=should_revoke) %}\n\n {% do persist_docs(target_relation, model) %}\n\n -- `COMMIT` happens here\n {{ adapter.commit() }}\n\n -- finally, drop the existing/backup relation after the commit\n {{ drop_relation_if_exists(backup_relation) }}\n\n {{ run_hooks(post_hooks, inside_transaction=False) }}\n\n {{ return({'relations': [target_relation]}) }}\n{% endmaterialization %}", @@ -4299,12 +5349,15 @@ "path": "macros/materializations/models/table/table.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": [ + "sql" + ], "tags": [], "unique_id": "macro.dbt.materialization_table_default" }, "macro.dbt.materialization_test_default": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.288075, "depends_on": { "macros": [ "macro.dbt.should_store_failures", @@ -4315,6 +5368,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{%- materialization test, default -%}\n\n {% set relations = [] %}\n\n {% if should_store_failures() %}\n\n {% set identifier = model['alias'] %}\n {% set old_relation = adapter.get_relation(database=database, schema=schema, identifier=identifier) %}\n {% set target_relation = api.Relation.create(\n identifier=identifier, schema=schema, database=database, type='table') -%} %}\n\n {% if old_relation %}\n {% do adapter.drop_relation(old_relation) %}\n {% endif %}\n\n {% call statement(auto_begin=True) %}\n {{ create_table_as(False, target_relation, sql) }}\n {% endcall %}\n\n {% do relations.append(target_relation) %}\n\n {% set main_sql %}\n select *\n from {{ target_relation }}\n {% endset %}\n\n {{ adapter.commit() }}\n\n {% else %}\n\n {% set main_sql = sql %}\n\n {% endif %}\n\n {% set limit = config.get('limit') %}\n {% set fail_calc = config.get('fail_calc') %}\n {% set warn_if = config.get('warn_if') %}\n {% set error_if = config.get('error_if') %}\n\n {% call statement('main', fetch_result=True) -%}\n\n {{ get_test_sql(main_sql, fail_calc, warn_if, error_if, limit)}}\n\n {%- endcall %}\n\n {{ return({'relations': relations}) }}\n\n{%- endmaterialization -%}", @@ -4326,12 +5380,15 @@ "path": "macros/materializations/tests/test.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": [ + "sql" + ], "tags": [], "unique_id": "macro.dbt.materialization_test_default" }, "macro.dbt.materialization_view_default": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.355838, "depends_on": { "macros": [ "macro.dbt.load_cached_relation", @@ -4348,6 +5405,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{%- materialization view, default -%}\n\n {%- set existing_relation = load_cached_relation(this) -%}\n {%- set target_relation = this.incorporate(type='view') -%}\n {%- set intermediate_relation = make_intermediate_relation(target_relation) -%}\n\n -- the intermediate_relation should not already exist in the database; get_relation\n -- will return None in that case. Otherwise, we get a relation that we can drop\n -- later, before we try to use this name for the current operation\n {%- set preexisting_intermediate_relation = load_cached_relation(intermediate_relation) -%}\n /*\n This relation (probably) doesn't exist yet. If it does exist, it's a leftover from\n a previous run, and we're going to try to drop it immediately. At the end of this\n materialization, we're going to rename the \"existing_relation\" to this identifier,\n and then we're going to drop it. In order to make sure we run the correct one of:\n - drop view ...\n - drop table ...\n\n We need to set the type of this relation to be the type of the existing_relation, if it exists,\n or else \"view\" as a sane default if it does not. Note that if the existing_relation does not\n exist, then there is nothing to move out of the way and subsequentally drop. In that case,\n this relation will be effectively unused.\n */\n {%- set backup_relation_type = 'view' if existing_relation is none else existing_relation.type -%}\n {%- set backup_relation = make_backup_relation(target_relation, backup_relation_type) -%}\n -- as above, the backup_relation should not already exist\n {%- set preexisting_backup_relation = load_cached_relation(backup_relation) -%}\n -- grab current tables grants config for comparision later on\n {% set grant_config = config.get('grants') %}\n\n {{ run_hooks(pre_hooks, inside_transaction=False) }}\n\n -- drop the temp relations if they exist already in the database\n {{ drop_relation_if_exists(preexisting_intermediate_relation) }}\n {{ drop_relation_if_exists(preexisting_backup_relation) }}\n\n -- `BEGIN` happens here:\n {{ run_hooks(pre_hooks, inside_transaction=True) }}\n\n -- build model\n {% call statement('main') -%}\n {{ get_create_view_as_sql(intermediate_relation, sql) }}\n {%- endcall %}\n\n -- cleanup\n -- move the existing view out of the way\n {% if existing_relation is not none %}\n {{ adapter.rename_relation(existing_relation, backup_relation) }}\n {% endif %}\n {{ adapter.rename_relation(intermediate_relation, target_relation) }}\n\n {% set should_revoke = should_revoke(existing_relation, full_refresh_mode=True) %}\n {% do apply_grants(target_relation, grant_config, should_revoke=should_revoke) %}\n\n {% do persist_docs(target_relation, model) %}\n\n {{ run_hooks(post_hooks, inside_transaction=True) }}\n\n {{ adapter.commit() }}\n\n {{ drop_relation_if_exists(backup_relation) }}\n\n {{ run_hooks(post_hooks, inside_transaction=False) }}\n\n {{ return({'relations': [target_relation]}) }}\n\n{%- endmaterialization -%}", @@ -4359,17 +5417,21 @@ "path": "macros/materializations/models/view/view.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": [ + "sql" + ], "tags": [], "unique_id": "macro.dbt.materialization_view_default" }, "macro.dbt.noop_statement": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.3970811, "depends_on": { "macros": [] }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro noop_statement(name=None, message=None, code=None, rows_affected=None, res=None) -%}\n {%- set sql = caller() -%}\n\n {%- if name == 'main' -%}\n {{ log('Writing runtime SQL for node \"{}\"'.format(model['unique_id'])) }}\n {{ write(sql) }}\n {%- endif -%}\n\n {%- if name is not none -%}\n {{ store_raw_result(name, message=message, code=code, rows_affected=rows_affected, agate_table=res) }}\n {%- endif -%}\n\n{%- endmacro %}", @@ -4381,12 +5443,13 @@ "path": "macros/etc/statement.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.noop_statement" }, "macro.dbt.partition_range": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.404437, "depends_on": { "macros": [ "macro.dbt.dates_in_range" @@ -4394,6 +5457,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro partition_range(raw_partition_date, date_fmt='%Y%m%d') %}\n {% set partition_range = (raw_partition_date | string).split(\",\") %}\n\n {% if (partition_range | length) == 1 %}\n {% set start_date = partition_range[0] %}\n {% set end_date = none %}\n {% elif (partition_range | length) == 2 %}\n {% set start_date = partition_range[0] %}\n {% set end_date = partition_range[1] %}\n {% else %}\n {{ exceptions.raise_compiler_error(\"Invalid partition time. Expected format: {Start Date}[,{End Date}]. Got: \" ~ raw_partition_date) }}\n {% endif %}\n\n {{ return(dates_in_range(start_date, end_date, in_fmt=date_fmt)) }}\n{% endmacro %}", @@ -4405,12 +5469,13 @@ "path": "macros/etc/datetime.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.partition_range" }, "macro.dbt.persist_docs": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.5139258, "depends_on": { "macros": [ "macro.dbt.default__persist_docs" @@ -4418,6 +5483,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro persist_docs(relation, model, for_relation=true, for_columns=true) -%}\n {{ return(adapter.dispatch('persist_docs', 'dbt')(relation, model, for_relation, for_columns)) }}\n{% endmacro %}", @@ -4429,12 +5495,13 @@ "path": "macros/adapters/persist_docs.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.persist_docs" }, "macro.dbt.position": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.449753, "depends_on": { "macros": [ "macro.dbt.default__position" @@ -4442,6 +5509,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro position(substring_text, string_text) -%}\n {{ return(adapter.dispatch('position', 'dbt') (substring_text, string_text)) }}\n{% endmacro %}", @@ -4453,12 +5521,13 @@ "path": "macros/utils/position.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.position" }, "macro.dbt.post_snapshot": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.2690291, "depends_on": { "macros": [ "macro.dbt.default__post_snapshot" @@ -4466,6 +5535,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro post_snapshot(staging_relation) %}\n {{ adapter.dispatch('post_snapshot', 'dbt')(staging_relation) }}\n{% endmacro %}", @@ -4477,12 +5547,13 @@ "path": "macros/materializations/snapshots/helpers.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.post_snapshot" }, "macro.dbt.process_schema_changes": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.343294, "depends_on": { "macros": [ "macro.dbt.check_for_schema_changes", @@ -4491,9 +5562,10 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, - "macro_sql": "{% macro process_schema_changes(on_schema_change, source_relation, target_relation) %}\n\n {% if on_schema_change == 'ignore' %}\n\n {{ return({}) }}\n\n {% else %}\n\n {% set schema_changes_dict = check_for_schema_changes(source_relation, target_relation) %}\n\n {% if schema_changes_dict['schema_changed'] %}\n\n {% if on_schema_change == 'fail' %}\n\n {% set fail_msg %}\n The source and target schemas on this incremental model are out of sync!\n They can be reconciled in several ways:\n - set the `on_schema_change` config to either append_new_columns or sync_all_columns, depending on your situation.\n - Re-run the incremental model with `full_refresh: True` to update the target schema.\n - update the schema manually and re-run the process.\n {% endset %}\n\n {% do exceptions.raise_compiler_error(fail_msg) %}\n\n {# -- unless we ignore, run the sync operation per the config #}\n {% else %}\n\n {% do sync_column_schemas(on_schema_change, target_relation, schema_changes_dict) %}\n\n {% endif %}\n\n {% endif %}\n\n {{ return(schema_changes_dict['source_columns']) }}\n\n {% endif %}\n\n{% endmacro %}", + "macro_sql": "{% macro process_schema_changes(on_schema_change, source_relation, target_relation) %}\n\n {% if on_schema_change == 'ignore' %}\n\n {{ return({}) }}\n\n {% else %}\n\n {% set schema_changes_dict = check_for_schema_changes(source_relation, target_relation) %}\n\n {% if schema_changes_dict['schema_changed'] %}\n\n {% if on_schema_change == 'fail' %}\n\n {% set fail_msg %}\n The source and target schemas on this incremental model are out of sync!\n They can be reconciled in several ways:\n - set the `on_schema_change` config to either append_new_columns or sync_all_columns, depending on your situation.\n - Re-run the incremental model with `full_refresh: True` to update the target schema.\n - update the schema manually and re-run the process.\n\n Additional troubleshooting context:\n Source columns not in target: {{ schema_changes_dict['source_not_in_target'] }}\n Target columns not in source: {{ schema_changes_dict['target_not_in_source'] }}\n New column types: {{ schema_changes_dict['new_target_types'] }}\n {% endset %}\n\n {% do exceptions.raise_compiler_error(fail_msg) %}\n\n {# -- unless we ignore, run the sync operation per the config #}\n {% else %}\n\n {% do sync_column_schemas(on_schema_change, target_relation, schema_changes_dict) %}\n\n {% endif %}\n\n {% endif %}\n\n {{ return(schema_changes_dict['source_columns']) }}\n\n {% endif %}\n\n{% endmacro %}", "meta": {}, "name": "process_schema_changes", "original_file_path": "macros/materializations/models/incremental/on_schema_change.sql", @@ -4502,17 +5574,19 @@ "path": "macros/materializations/models/incremental/on_schema_change.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.process_schema_changes" }, "macro.dbt.py_current_timestring": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.4049861, "depends_on": { "macros": [] }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro py_current_timestring() %}\n {% set dt = modules.datetime.datetime.now() %}\n {% do return(dt.strftime(\"%Y%m%d%H%M%S%f\")) %}\n{% endmacro %}", @@ -4524,12 +5598,67 @@ "path": "macros/etc/datetime.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.py_current_timestring" }, + "macro.dbt.py_script_comment": { + "arguments": [], + "created_at": 1670298551.53705, + "depends_on": { + "macros": [] + }, + "description": "", + "docs": { + "node_color": null, + "show": true + }, + "macro_sql": "{%macro py_script_comment()%}\n{%endmacro%}", + "meta": {}, + "name": "py_script_comment", + "original_file_path": "macros/python_model/python.sql", + "package_name": "dbt", + "patch_path": null, + "path": "macros/python_model/python.sql", + "resource_type": "macro", + "root_path": "/some-path/sample-dbt", + "supported_languages": null, + "tags": [], + "unique_id": "macro.dbt.py_script_comment" + }, + "macro.dbt.py_script_postfix": { + "arguments": [], + "created_at": 1670298551.536886, + "depends_on": { + "macros": [ + "macro.dbt.build_ref_function", + "macro.dbt.build_source_function", + "macro.dbt.build_config_dict", + "macro.dbt.is_incremental", + "macro.dbt.py_script_comment" + ] + }, + "description": "", + "docs": { + "node_color": null, + "show": true + }, + "macro_sql": "{% macro py_script_postfix(model) %}\n# This part is user provided model code\n# you will need to copy the next section to run the code\n# COMMAND ----------\n# this part is dbt logic for get ref work, do not modify\n\n{{ build_ref_function(model ) }}\n{{ build_source_function(model ) }}\n{{ build_config_dict(model) }}\n\nclass config:\n def __init__(self, *args, **kwargs):\n pass\n\n @staticmethod\n def get(key, default=None):\n return config_dict.get(key, default)\n\nclass this:\n \"\"\"dbt.this() or dbt.this.identifier\"\"\"\n database = '{{ this.database }}'\n schema = '{{ this.schema }}'\n identifier = '{{ this.identifier }}'\n def __repr__(self):\n return '{{ this }}'\n\n\nclass dbtObj:\n def __init__(self, load_df_function) -> None:\n self.source = lambda *args: source(*args, dbt_load_df_function=load_df_function)\n self.ref = lambda *args: ref(*args, dbt_load_df_function=load_df_function)\n self.config = config\n self.this = this()\n self.is_incremental = {{ is_incremental() }}\n\n# COMMAND ----------\n{{py_script_comment()}}\n{% endmacro %}", + "meta": {}, + "name": "py_script_postfix", + "original_file_path": "macros/python_model/python.sql", + "package_name": "dbt", + "patch_path": null, + "path": "macros/python_model/python.sql", + "resource_type": "macro", + "root_path": "/some-path/sample-dbt", + "supported_languages": null, + "tags": [], + "unique_id": "macro.dbt.py_script_postfix" + }, "macro.dbt.rename_relation": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.490154, "depends_on": { "macros": [ "macro.dbt.default__rename_relation" @@ -4537,6 +5666,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro rename_relation(from_relation, to_relation) -%}\n {{ return(adapter.dispatch('rename_relation', 'dbt')(from_relation, to_relation)) }}\n{% endmacro %}", @@ -4548,12 +5678,13 @@ "path": "macros/adapters/relation.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.rename_relation" }, "macro.dbt.replace": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.406516, "depends_on": { "macros": [ "macro.dbt.default__replace" @@ -4561,6 +5692,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro replace(field, old_chars, new_chars) -%}\n {{ return(adapter.dispatch('replace', 'dbt') (field, old_chars, new_chars)) }}\n{% endmacro %}", @@ -4572,12 +5704,13 @@ "path": "macros/utils/replace.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.replace" }, "macro.dbt.reset_csv_table": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.37745, "depends_on": { "macros": [ "macro.dbt.default__reset_csv_table" @@ -4585,6 +5718,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro reset_csv_table(model, full_refresh, old_relation, agate_table) -%}\n {{ adapter.dispatch('reset_csv_table', 'dbt')(model, full_refresh, old_relation, agate_table) }}\n{%- endmacro %}", @@ -4596,12 +5730,13 @@ "path": "macros/materializations/seeds/helpers.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.reset_csv_table" }, "macro.dbt.right": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.4121852, "depends_on": { "macros": [ "macro.dbt.default__right" @@ -4609,6 +5744,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro right(string_text, length_expression) -%}\n {{ return(adapter.dispatch('right', 'dbt') (string_text, length_expression)) }}\n{% endmacro %}", @@ -4620,12 +5756,13 @@ "path": "macros/utils/right.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.right" }, "macro.dbt.run_hooks": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.241405, "depends_on": { "macros": [ "macro.dbt.statement" @@ -4633,6 +5770,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro run_hooks(hooks, inside_transaction=True) %}\n {% for hook in hooks | selectattr('transaction', 'equalto', inside_transaction) %}\n {% if not inside_transaction and loop.first %}\n {% call statement(auto_begin=inside_transaction) %}\n commit;\n {% endcall %}\n {% endif %}\n {% set rendered = render(hook.get('sql')) | trim %}\n {% if (rendered | length) > 0 %}\n {% call statement(auto_begin=inside_transaction) %}\n {{ rendered }}\n {% endcall %}\n {% endif %}\n {% endfor %}\n{% endmacro %}", @@ -4644,12 +5782,13 @@ "path": "macros/materializations/hooks.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.run_hooks" }, "macro.dbt.run_query": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.397651, "depends_on": { "macros": [ "macro.dbt.statement" @@ -4657,6 +5796,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro run_query(sql) %}\n {% call statement(\"run_query_statement\", fetch_result=true, auto_begin=false) %}\n {{ sql }}\n {% endcall %}\n\n {% do return(load_result(\"run_query_statement\").table) %}\n{% endmacro %}", @@ -4668,12 +5808,13 @@ "path": "macros/etc/statement.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.run_query" }, "macro.dbt.safe_cast": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.444459, "depends_on": { "macros": [ "macro.dbt.default__safe_cast" @@ -4681,6 +5822,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro safe_cast(field, type) %}\n {{ return(adapter.dispatch('safe_cast', 'dbt') (field, type)) }}\n{% endmacro %}", @@ -4692,17 +5834,19 @@ "path": "macros/utils/safe_cast.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.safe_cast" }, "macro.dbt.set_sql_header": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.243606, "depends_on": { "macros": [] }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro set_sql_header(config) -%}\n {{ config.set('sql_header', caller()) }}\n{%- endmacro %}", @@ -4714,17 +5858,19 @@ "path": "macros/materializations/configs.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.set_sql_header" }, "macro.dbt.should_full_refresh": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.244211, "depends_on": { "macros": [] }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro should_full_refresh() %}\n {% set config_full_refresh = config.get('full_refresh') %}\n {% if config_full_refresh is none %}\n {% set config_full_refresh = flags.FULL_REFRESH %}\n {% endif %}\n {% do return(config_full_refresh) %}\n{% endmacro %}", @@ -4736,12 +5882,13 @@ "path": "macros/materializations/configs.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.should_full_refresh" }, "macro.dbt.should_revoke": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.5014431, "depends_on": { "macros": [ "macro.dbt.copy_grants" @@ -4749,6 +5896,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro should_revoke(existing_relation, full_refresh_mode=True) %}\n\n {% if not existing_relation %}\n {#-- The table doesn't already exist, so no grants to copy over --#}\n {{ return(False) }}\n {% elif full_refresh_mode %}\n {#-- The object is being REPLACED -- whether grants are copied over depends on the value of user config --#}\n {{ return(copy_grants()) }}\n {% else %}\n {#-- The table is being merged/upserted/inserted -- grants will be carried over --#}\n {{ return(True) }}\n {% endif %}\n\n{% endmacro %}", @@ -4760,17 +5908,19 @@ "path": "macros/adapters/apply_grants.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.should_revoke" }, "macro.dbt.should_store_failures": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.2448268, "depends_on": { "macros": [] }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro should_store_failures() %}\n {% set config_store_failures = config.get('store_failures') %}\n {% if config_store_failures is none %}\n {% set config_store_failures = flags.STORE_FAILURES %}\n {% endif %}\n {% do return(config_store_failures) %}\n{% endmacro %}", @@ -4782,12 +5932,13 @@ "path": "macros/materializations/configs.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.should_store_failures" }, "macro.dbt.snapshot_check_all_get_existing_columns": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.259003, "depends_on": { "macros": [ "macro.dbt.get_columns_in_query" @@ -4795,9 +5946,10 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, - "macro_sql": "{% macro snapshot_check_all_get_existing_columns(node, target_exists, check_cols_config) -%}\n {%- if not target_exists -%}\n {#-- no table yet -> return whatever the query does --#}\n {{ return((false, query_columns)) }}\n {%- endif -%}\n\n {#-- handle any schema changes --#}\n {%- set target_relation = adapter.get_relation(database=node.database, schema=node.schema, identifier=node.alias) -%}\n\n {% if check_cols_config == 'all' %}\n {%- set query_columns = get_columns_in_query(node['compiled_sql']) -%}\n\n {% elif check_cols_config is iterable and (check_cols_config | length) > 0 %}\n {#-- query for proper casing/quoting, to support comparison below --#}\n {%- set select_check_cols_from_target -%}\n select {{ check_cols_config | join(', ') }} from ({{ node['compiled_sql'] }}) subq\n {%- endset -%}\n {% set query_columns = get_columns_in_query(select_check_cols_from_target) %}\n\n {% else %}\n {% do exceptions.raise_compiler_error(\"Invalid value for 'check_cols': \" ~ check_cols_config) %}\n {% endif %}\n\n {%- set existing_cols = adapter.get_columns_in_relation(target_relation) | map(attribute = 'name') | list -%}\n {%- set ns = namespace() -%} {#-- handle for-loop scoping with a namespace --#}\n {%- set ns.column_added = false -%}\n\n {%- set intersection = [] -%}\n {%- for col in query_columns -%}\n {%- if col in existing_cols -%}\n {%- do intersection.append(adapter.quote(col)) -%}\n {%- else -%}\n {% set ns.column_added = true %}\n {%- endif -%}\n {%- endfor -%}\n {{ return((ns.column_added, intersection)) }}\n{%- endmacro %}", + "macro_sql": "{% macro snapshot_check_all_get_existing_columns(node, target_exists, check_cols_config) -%}\n {%- if not target_exists -%}\n {#-- no table yet -> return whatever the query does --#}\n {{ return((false, query_columns)) }}\n {%- endif -%}\n\n {#-- handle any schema changes --#}\n {%- set target_relation = adapter.get_relation(database=node.database, schema=node.schema, identifier=node.alias) -%}\n\n {% if check_cols_config == 'all' %}\n {%- set query_columns = get_columns_in_query(node['compiled_code']) -%}\n\n {% elif check_cols_config is iterable and (check_cols_config | length) > 0 %}\n {#-- query for proper casing/quoting, to support comparison below --#}\n {%- set select_check_cols_from_target -%}\n select {{ check_cols_config | join(', ') }} from ({{ node['compiled_code'] }}) subq\n {%- endset -%}\n {% set query_columns = get_columns_in_query(select_check_cols_from_target) %}\n\n {% else %}\n {% do exceptions.raise_compiler_error(\"Invalid value for 'check_cols': \" ~ check_cols_config) %}\n {% endif %}\n\n {%- set existing_cols = adapter.get_columns_in_relation(target_relation) | map(attribute = 'name') | list -%}\n {%- set ns = namespace() -%} {#-- handle for-loop scoping with a namespace --#}\n {%- set ns.column_added = false -%}\n\n {%- set intersection = [] -%}\n {%- for col in query_columns -%}\n {%- if col in existing_cols -%}\n {%- do intersection.append(adapter.quote(col)) -%}\n {%- else -%}\n {% set ns.column_added = true %}\n {%- endif -%}\n {%- endfor -%}\n {{ return((ns.column_added, intersection)) }}\n{%- endmacro %}", "meta": {}, "name": "snapshot_check_all_get_existing_columns", "original_file_path": "macros/materializations/snapshots/strategies.sql", @@ -4806,12 +5958,13 @@ "path": "macros/materializations/snapshots/strategies.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.snapshot_check_all_get_existing_columns" }, "macro.dbt.snapshot_check_strategy": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.261952, "depends_on": { "macros": [ "macro.dbt.snapshot_get_time", @@ -4822,6 +5975,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro snapshot_check_strategy(node, snapshotted_rel, current_rel, config, target_exists) %}\n {% set check_cols_config = config['check_cols'] %}\n {% set primary_key = config['unique_key'] %}\n {% set invalidate_hard_deletes = config.get('invalidate_hard_deletes', false) %}\n {% set updated_at = config.get('updated_at', snapshot_get_time()) %}\n\n {% set column_added = false %}\n\n {% set column_added, check_cols = snapshot_check_all_get_existing_columns(node, target_exists, check_cols_config) %}\n\n {%- set row_changed_expr -%}\n (\n {%- if column_added -%}\n {{ get_true_sql() }}\n {%- else -%}\n {%- for col in check_cols -%}\n {{ snapshotted_rel }}.{{ col }} != {{ current_rel }}.{{ col }}\n or\n (\n (({{ snapshotted_rel }}.{{ col }} is null) and not ({{ current_rel }}.{{ col }} is null))\n or\n ((not {{ snapshotted_rel }}.{{ col }} is null) and ({{ current_rel }}.{{ col }} is null))\n )\n {%- if not loop.last %} or {% endif -%}\n {%- endfor -%}\n {%- endif -%}\n )\n {%- endset %}\n\n {% set scd_id_expr = snapshot_hash_arguments([primary_key, updated_at]) %}\n\n {% do return({\n \"unique_key\": primary_key,\n \"updated_at\": updated_at,\n \"row_changed\": row_changed_expr,\n \"scd_id\": scd_id_expr,\n \"invalidate_hard_deletes\": invalidate_hard_deletes\n }) %}\n{% endmacro %}", @@ -4833,12 +5987,13 @@ "path": "macros/materializations/snapshots/strategies.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.snapshot_check_strategy" }, "macro.dbt.snapshot_get_time": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.474946, "depends_on": { "macros": [ "macro.dbt_postgres.postgres__snapshot_get_time" @@ -4846,23 +6001,25 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, - "macro_sql": "{% macro snapshot_get_time() -%}\n {{ adapter.dispatch('snapshot_get_time', 'dbt')() }}\n{%- endmacro %}", + "macro_sql": "\n\n{%- macro snapshot_get_time() -%}\n {{ adapter.dispatch('snapshot_get_time', 'dbt')() }}\n{%- endmacro -%}\n\n", "meta": {}, "name": "snapshot_get_time", - "original_file_path": "macros/materializations/snapshots/strategies.sql", + "original_file_path": "macros/adapters/timestamps.sql", "package_name": "dbt", "patch_path": null, - "path": "macros/materializations/snapshots/strategies.sql", + "path": "macros/adapters/timestamps.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.snapshot_get_time" }, "macro.dbt.snapshot_hash_arguments": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.2525802, "depends_on": { "macros": [ "macro.dbt.default__snapshot_hash_arguments" @@ -4870,6 +6027,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro snapshot_hash_arguments(args) -%}\n {{ adapter.dispatch('snapshot_hash_arguments', 'dbt')(args) }}\n{%- endmacro %}", @@ -4881,12 +6039,13 @@ "path": "macros/materializations/snapshots/strategies.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.snapshot_hash_arguments" }, "macro.dbt.snapshot_merge_sql": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.245815, "depends_on": { "macros": [ "macro.dbt_postgres.postgres__snapshot_merge_sql" @@ -4894,6 +6053,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro snapshot_merge_sql(target, source, insert_cols) -%}\n {{ adapter.dispatch('snapshot_merge_sql', 'dbt')(target, source, insert_cols) }}\n{%- endmacro %}", @@ -4905,12 +6065,13 @@ "path": "macros/materializations/snapshots/snapshot_merge.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.snapshot_merge_sql" }, "macro.dbt.snapshot_staging_table": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.270217, "depends_on": { "macros": [ "macro.dbt.default__snapshot_staging_table" @@ -4918,6 +6079,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro snapshot_staging_table(strategy, source_sql, target_relation) -%}\n {{ adapter.dispatch('snapshot_staging_table', 'dbt')(strategy, source_sql, target_relation) }}\n{% endmacro %}", @@ -4929,12 +6091,13 @@ "path": "macros/materializations/snapshots/helpers.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.snapshot_staging_table" }, "macro.dbt.snapshot_string_as_time": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.254751, "depends_on": { "macros": [ "macro.dbt_postgres.postgres__snapshot_string_as_time" @@ -4942,6 +6105,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro snapshot_string_as_time(timestamp) -%}\n {{ adapter.dispatch('snapshot_string_as_time', 'dbt')(timestamp) }}\n{%- endmacro %}", @@ -4953,12 +6117,13 @@ "path": "macros/materializations/snapshots/strategies.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.snapshot_string_as_time" }, "macro.dbt.snapshot_timestamp_strategy": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.254416, "depends_on": { "macros": [ "macro.dbt.snapshot_hash_arguments" @@ -4966,6 +6131,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro snapshot_timestamp_strategy(node, snapshotted_rel, current_rel, config, target_exists) %}\n {% set primary_key = config['unique_key'] %}\n {% set updated_at = config['updated_at'] %}\n {% set invalidate_hard_deletes = config.get('invalidate_hard_deletes', false) %}\n\n {#/*\n The snapshot relation might not have an {{ updated_at }} value if the\n snapshot strategy is changed from `check` to `timestamp`. We\n should use a dbt-created column for the comparison in the snapshot\n table instead of assuming that the user-supplied {{ updated_at }}\n will be present in the historical data.\n\n See https://github.com/dbt-labs/dbt-core/issues/2350\n */ #}\n {% set row_changed_expr -%}\n ({{ snapshotted_rel }}.dbt_valid_from < {{ current_rel }}.{{ updated_at }})\n {%- endset %}\n\n {% set scd_id_expr = snapshot_hash_arguments([primary_key, updated_at]) %}\n\n {% do return({\n \"unique_key\": primary_key,\n \"updated_at\": updated_at,\n \"row_changed\": row_changed_expr,\n \"scd_id\": scd_id_expr,\n \"invalidate_hard_deletes\": invalidate_hard_deletes\n }) %}\n{% endmacro %}", @@ -4977,12 +6143,13 @@ "path": "macros/materializations/snapshots/strategies.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.snapshot_timestamp_strategy" }, "macro.dbt.split_part": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.4656482, "depends_on": { "macros": [ "macro.dbt_postgres.postgres__split_part" @@ -4990,6 +6157,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro split_part(string_text, delimiter_text, part_number) %}\n {{ return(adapter.dispatch('split_part', 'dbt') (string_text, delimiter_text, part_number)) }}\n{% endmacro %}", @@ -5001,17 +6169,19 @@ "path": "macros/utils/split_part.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.split_part" }, "macro.dbt.sql_convert_columns_in_relation": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.526747, "depends_on": { "macros": [] }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro sql_convert_columns_in_relation(table) -%}\n {% set columns = [] %}\n {% for row in table %}\n {% do columns.append(api.Column(*row)) %}\n {% endfor %}\n {{ return(columns) }}\n{% endmacro %}", @@ -5023,20 +6193,22 @@ "path": "macros/adapters/columns.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.sql_convert_columns_in_relation" }, "macro.dbt.statement": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.395661, "depends_on": { "macros": [] }, "description": "", "docs": { + "node_color": null, "show": true }, - "macro_sql": "{% macro statement(name=None, fetch_result=False, auto_begin=True) -%}\n {%- if execute: -%}\n {%- set sql = caller() -%}\n\n {%- if name == 'main' -%}\n {{ log('Writing runtime SQL for node \"{}\"'.format(model['unique_id'])) }}\n {{ write(sql) }}\n {%- endif -%}\n\n {%- set res, table = adapter.execute(sql, auto_begin=auto_begin, fetch=fetch_result) -%}\n {%- if name is not none -%}\n {{ store_result(name, response=res, agate_table=table) }}\n {%- endif -%}\n\n {%- endif -%}\n{%- endmacro %}", + "macro_sql": "\n{%- macro statement(name=None, fetch_result=False, auto_begin=True, language='sql') -%}\n {%- if execute: -%}\n {%- set compiled_code = caller() -%}\n\n {%- if name == 'main' -%}\n {{ log('Writing runtime {} for node \"{}\"'.format(language, model['unique_id'])) }}\n {{ write(compiled_code) }}\n {%- endif -%}\n {%- if language == 'sql'-%}\n {%- set res, table = adapter.execute(compiled_code, auto_begin=auto_begin, fetch=fetch_result) -%}\n {%- elif language == 'python' -%}\n {%- set res = submit_python_job(model, compiled_code) -%}\n {#-- TODO: What should table be for python models? --#}\n {%- set table = None -%}\n {%- else -%}\n {% do exceptions.raise_compiler_error(\"statement macro didn't get supported language\") %}\n {%- endif -%}\n\n {%- if name is not none -%}\n {{ store_result(name, response=res, agate_table=table) }}\n {%- endif -%}\n\n {%- endif -%}\n{%- endmacro %}", "meta": {}, "name": "statement", "original_file_path": "macros/etc/statement.sql", @@ -5045,17 +6217,19 @@ "path": "macros/etc/statement.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.statement" }, "macro.dbt.strategy_dispatch": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.252231, "depends_on": { "macros": [] }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro strategy_dispatch(name) -%}\n{% set original_name = name %}\n {% if '.' in name %}\n {% set package_name, name = name.split(\".\", 1) %}\n {% else %}\n {% set package_name = none %}\n {% endif %}\n\n {% if package_name is none %}\n {% set package_context = context %}\n {% elif package_name in context %}\n {% set package_context = context[package_name] %}\n {% else %}\n {% set error_msg %}\n Could not find package '{{package_name}}', called with '{{original_name}}'\n {% endset %}\n {{ exceptions.raise_compiler_error(error_msg | trim) }}\n {% endif %}\n\n {%- set search_name = 'snapshot_' ~ name ~ '_strategy' -%}\n\n {% if search_name not in package_context %}\n {% set error_msg %}\n The specified strategy macro '{{name}}' was not found in package '{{ package_name }}'\n {% endset %}\n {{ exceptions.raise_compiler_error(error_msg | trim) }}\n {% endif %}\n {{ return(package_context[search_name]) }}\n{%- endmacro %}", @@ -5067,12 +6241,13 @@ "path": "macros/materializations/snapshots/strategies.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.strategy_dispatch" }, "macro.dbt.string_literal": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.451034, "depends_on": { "macros": [ "macro.dbt.default__string_literal" @@ -5080,6 +6255,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{%- macro string_literal(value) -%}\n {{ return(adapter.dispatch('string_literal', 'dbt') (value)) }}\n{%- endmacro -%}\n\n", @@ -5091,12 +6267,13 @@ "path": "macros/utils/literal.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.string_literal" }, "macro.dbt.support_multiple_grantees_per_dcl_statement": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.5004091, "depends_on": { "macros": [ "macro.dbt.default__support_multiple_grantees_per_dcl_statement" @@ -5104,6 +6281,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro support_multiple_grantees_per_dcl_statement() %}\n {{ return(adapter.dispatch('support_multiple_grantees_per_dcl_statement', 'dbt')()) }}\n{% endmacro %}", @@ -5115,12 +6293,13 @@ "path": "macros/adapters/apply_grants.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.support_multiple_grantees_per_dcl_statement" }, "macro.dbt.sync_column_schemas": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.3416152, "depends_on": { "macros": [ "macro.dbt.alter_relation_add_remove_columns", @@ -5129,6 +6308,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro sync_column_schemas(on_schema_change, target_relation, schema_changes_dict) %}\n\n {%- set add_to_target_arr = schema_changes_dict['source_not_in_target'] -%}\n\n {%- if on_schema_change == 'append_new_columns'-%}\n {%- if add_to_target_arr | length > 0 -%}\n {%- do alter_relation_add_remove_columns(target_relation, add_to_target_arr, none) -%}\n {%- endif -%}\n\n {% elif on_schema_change == 'sync_all_columns' %}\n {%- set remove_from_target_arr = schema_changes_dict['target_not_in_source'] -%}\n {%- set new_target_types = schema_changes_dict['new_target_types'] -%}\n\n {% if add_to_target_arr | length > 0 or remove_from_target_arr | length > 0 %}\n {%- do alter_relation_add_remove_columns(target_relation, add_to_target_arr, remove_from_target_arr) -%}\n {% endif %}\n\n {% if new_target_types != [] %}\n {% for ntt in new_target_types %}\n {% set column_name = ntt['column_name'] %}\n {% set new_type = ntt['new_type'] %}\n {% do alter_column_type(target_relation, column_name, new_type) %}\n {% endfor %}\n {% endif %}\n\n {% endif %}\n\n {% set schema_change_message %}\n In {{ target_relation }}:\n Schema change approach: {{ on_schema_change }}\n Columns added: {{ add_to_target_arr }}\n Columns removed: {{ remove_from_target_arr }}\n Data types changed: {{ new_target_types }}\n {% endset %}\n\n {% do log(schema_change_message) %}\n\n{% endmacro %}", @@ -5140,12 +6320,13 @@ "path": "macros/materializations/models/incremental/on_schema_change.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.sync_column_schemas" }, "macro.dbt.test_accepted_values": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.539191, "depends_on": { "macros": [ "macro.dbt.default__test_accepted_values" @@ -5153,6 +6334,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% test accepted_values(model, column_name, values, quote=True) %}\n {% set macro = adapter.dispatch('test_accepted_values', 'dbt') %}\n {{ macro(model, column_name, values, quote) }}\n{% endtest %}", @@ -5164,12 +6346,13 @@ "path": "tests/generic/builtin.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.test_accepted_values" }, "macro.dbt.test_not_null": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.538603, "depends_on": { "macros": [ "macro.dbt.default__test_not_null" @@ -5177,6 +6360,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% test not_null(model, column_name) %}\n {% set macro = adapter.dispatch('test_not_null', 'dbt') %}\n {{ macro(model, column_name) }}\n{% endtest %}", @@ -5188,12 +6372,13 @@ "path": "tests/generic/builtin.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.test_not_null" }, "macro.dbt.test_relationships": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.539754, "depends_on": { "macros": [ "macro.dbt.default__test_relationships" @@ -5201,6 +6386,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% test relationships(model, column_name, to, field) %}\n {% set macro = adapter.dispatch('test_relationships', 'dbt') %}\n {{ macro(model, column_name, to, field) }}\n{% endtest %}", @@ -5212,12 +6398,13 @@ "path": "tests/generic/builtin.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.test_relationships" }, "macro.dbt.test_unique": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.538126, "depends_on": { "macros": [ "macro.dbt.default__test_unique" @@ -5225,6 +6412,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% test unique(model, column_name) %}\n {% set macro = adapter.dispatch('test_unique', 'dbt') %}\n {{ macro(model, column_name) }}\n{% endtest %}", @@ -5236,12 +6424,13 @@ "path": "tests/generic/builtin.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.test_unique" }, "macro.dbt.truncate_relation": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.489314, "depends_on": { "macros": [ "macro.dbt.default__truncate_relation" @@ -5249,6 +6438,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro truncate_relation(relation) -%}\n {{ return(adapter.dispatch('truncate_relation', 'dbt')(relation)) }}\n{% endmacro %}", @@ -5260,12 +6450,13 @@ "path": "macros/adapters/relation.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.truncate_relation" }, "macro.dbt.type_bigint": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.457319, "depends_on": { "macros": [ "macro.dbt.default__type_bigint" @@ -5273,6 +6464,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "\n\n{%- macro type_bigint() -%}\n {{ return(adapter.dispatch('type_bigint', 'dbt')()) }}\n{%- endmacro -%}\n\n", @@ -5284,12 +6476,39 @@ "path": "macros/utils/data_types.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.type_bigint" }, + "macro.dbt.type_boolean": { + "arguments": [], + "created_at": 1670298551.459148, + "depends_on": { + "macros": [ + "macro.dbt.default__type_boolean" + ] + }, + "description": "", + "docs": { + "node_color": null, + "show": true + }, + "macro_sql": "\n\n{%- macro type_boolean() -%}\n {{ return(adapter.dispatch('type_boolean', 'dbt')()) }}\n{%- endmacro -%}\n\n", + "meta": {}, + "name": "type_boolean", + "original_file_path": "macros/utils/data_types.sql", + "package_name": "dbt", + "patch_path": null, + "path": "macros/utils/data_types.sql", + "resource_type": "macro", + "root_path": "/some-path/sample-dbt", + "supported_languages": null, + "tags": [], + "unique_id": "macro.dbt.type_boolean" + }, "macro.dbt.type_float": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.4554949, "depends_on": { "macros": [ "macro.dbt.default__type_float" @@ -5297,6 +6516,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "\n\n{%- macro type_float() -%}\n {{ return(adapter.dispatch('type_float', 'dbt')()) }}\n{%- endmacro -%}\n\n", @@ -5308,12 +6528,13 @@ "path": "macros/utils/data_types.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.type_float" }, "macro.dbt.type_int": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.45821, "depends_on": { "macros": [ "macro.dbt.default__type_int" @@ -5321,6 +6542,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "\n\n{%- macro type_int() -%}\n {{ return(adapter.dispatch('type_int', 'dbt')()) }}\n{%- endmacro -%}\n\n", @@ -5332,12 +6554,13 @@ "path": "macros/utils/data_types.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.type_int" }, "macro.dbt.type_numeric": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.4563808, "depends_on": { "macros": [ "macro.dbt.default__type_numeric" @@ -5345,6 +6568,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "\n\n{%- macro type_numeric() -%}\n {{ return(adapter.dispatch('type_numeric', 'dbt')()) }}\n{%- endmacro -%}\n\n", @@ -5356,12 +6580,13 @@ "path": "macros/utils/data_types.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.type_numeric" }, "macro.dbt.type_string": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.4537349, "depends_on": { "macros": [ "macro.dbt.default__type_string" @@ -5369,6 +6594,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "\n\n{%- macro type_string() -%}\n {{ return(adapter.dispatch('type_string', 'dbt')()) }}\n{%- endmacro -%}\n\n", @@ -5380,12 +6606,13 @@ "path": "macros/utils/data_types.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.type_string" }, "macro.dbt.type_timestamp": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.454622, "depends_on": { "macros": [ "macro.dbt.default__type_timestamp" @@ -5393,6 +6620,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "\n\n{%- macro type_timestamp() -%}\n {{ return(adapter.dispatch('type_timestamp', 'dbt')()) }}\n{%- endmacro -%}\n\n", @@ -5404,12 +6632,13 @@ "path": "macros/utils/data_types.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt.type_timestamp" }, "macro.dbt_postgres.postgres__alter_column_comment": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.225419, "depends_on": { "macros": [ "macro.dbt_postgres.postgres_escape_comment" @@ -5417,6 +6646,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro postgres__alter_column_comment(relation, column_dict) %}\n {% set existing_columns = adapter.get_columns_in_relation(relation) | map(attribute=\"name\") | list %}\n {% for column_name in column_dict if (column_name in existing_columns) %}\n {% set comment = column_dict[column_name]['description'] %}\n {% set escaped_comment = postgres_escape_comment(comment) %}\n comment on column {{ relation }}.{{ adapter.quote(column_name) if column_dict[column_name]['quote'] else column_name }} is {{ escaped_comment }};\n {% endfor %}\n{% endmacro %}", @@ -5428,12 +6658,13 @@ "path": "macros/adapters.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt_postgres.postgres__alter_column_comment" }, "macro.dbt_postgres.postgres__alter_relation_comment": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.2241971, "depends_on": { "macros": [ "macro.dbt_postgres.postgres_escape_comment" @@ -5441,6 +6672,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro postgres__alter_relation_comment(relation, comment) %}\n {% set escaped_comment = postgres_escape_comment(comment) %}\n comment on {{ relation.type }} {{ relation }} is {{ escaped_comment }};\n{% endmacro %}", @@ -5452,17 +6684,19 @@ "path": "macros/adapters.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt_postgres.postgres__alter_relation_comment" }, "macro.dbt_postgres.postgres__any_value": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.236993, "depends_on": { "macros": [] }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro postgres__any_value(expression) -%}\n\n min({{ expression }})\n\n{%- endmacro %}", @@ -5474,12 +6708,13 @@ "path": "macros/utils/any_value.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt_postgres.postgres__any_value" }, "macro.dbt_postgres.postgres__check_schema_exists": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.21941, "depends_on": { "macros": [ "macro.dbt.statement" @@ -5487,6 +6722,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro postgres__check_schema_exists(information_schema, schema) -%}\n {% if information_schema.database -%}\n {{ adapter.verify_database(information_schema.database) }}\n {%- endif -%}\n {% call statement('check_schema_exists', fetch_result=True, auto_begin=False) %}\n select count(*) from pg_namespace where nspname = '{{ schema }}'\n {% endcall %}\n {{ return(load_result('check_schema_exists').table) }}\n{% endmacro %}", @@ -5498,17 +6734,19 @@ "path": "macros/adapters.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt_postgres.postgres__check_schema_exists" }, "macro.dbt_postgres.postgres__copy_grants": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.2261271, "depends_on": { "macros": [] }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro postgres__copy_grants() %}\n {{ return(False) }}\n{% endmacro %}", @@ -5520,12 +6758,13 @@ "path": "macros/adapters.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt_postgres.postgres__copy_grants" }, "macro.dbt_postgres.postgres__create_schema": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.215181, "depends_on": { "macros": [ "macro.dbt.statement" @@ -5533,6 +6772,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro postgres__create_schema(relation) -%}\n {% if relation.database -%}\n {{ adapter.verify_database(relation.database) }}\n {%- endif -%}\n {%- call statement('create_schema') -%}\n create schema if not exists {{ relation.without_identifier().include(database=False) }}\n {%- endcall -%}\n{% endmacro %}", @@ -5544,17 +6784,19 @@ "path": "macros/adapters.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt_postgres.postgres__create_schema" }, "macro.dbt_postgres.postgres__create_table_as": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.2133482, "depends_on": { "macros": [] }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro postgres__create_table_as(temporary, relation, sql) -%}\n {%- set unlogged = config.get('unlogged', default=false) -%}\n {%- set sql_header = config.get('sql_header', none) -%}\n\n {{ sql_header if sql_header is not none }}\n\n create {% if temporary -%}\n temporary\n {%- elif unlogged -%}\n unlogged\n {%- endif %} table {{ relation }}\n as (\n {{ sql }}\n );\n{%- endmacro %}", @@ -5566,39 +6808,95 @@ "path": "macros/adapters.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt_postgres.postgres__create_table_as" }, "macro.dbt_postgres.postgres__current_timestamp": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.1987891, "depends_on": { "macros": [] }, "description": "", "docs": { + "node_color": null, "show": true }, - "macro_sql": "{% macro postgres__current_timestamp() -%}\n now()\n{%- endmacro %}", + "macro_sql": "{% macro postgres__current_timestamp() -%}\n now()\n{%- endmacro %}", "meta": {}, "name": "postgres__current_timestamp", - "original_file_path": "macros/adapters.sql", + "original_file_path": "macros/timestamps.sql", "package_name": "dbt_postgres", "patch_path": null, - "path": "macros/adapters.sql", + "path": "macros/timestamps.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt_postgres.postgres__current_timestamp" }, + "macro.dbt_postgres.postgres__current_timestamp_backcompat": { + "arguments": [], + "created_at": 1670298551.200057, + "depends_on": { + "macros": [ + "macro.dbt.type_timestamp" + ] + }, + "description": "", + "docs": { + "node_color": null, + "show": true + }, + "macro_sql": "{% macro postgres__current_timestamp_backcompat() %}\n current_timestamp::{{ type_timestamp() }}\n{% endmacro %}", + "meta": {}, + "name": "postgres__current_timestamp_backcompat", + "original_file_path": "macros/timestamps.sql", + "package_name": "dbt_postgres", + "patch_path": null, + "path": "macros/timestamps.sql", + "resource_type": "macro", + "root_path": "/some-path/sample-dbt", + "supported_languages": null, + "tags": [], + "unique_id": "macro.dbt_postgres.postgres__current_timestamp_backcompat" + }, + "macro.dbt_postgres.postgres__current_timestamp_in_utc_backcompat": { + "arguments": [], + "created_at": 1670298551.2002811, + "depends_on": { + "macros": [ + "macro.dbt.type_timestamp" + ] + }, + "description": "", + "docs": { + "node_color": null, + "show": true + }, + "macro_sql": "{% macro postgres__current_timestamp_in_utc_backcompat() %}\n (current_timestamp at time zone 'utc')::{{ type_timestamp() }}\n{% endmacro %}", + "meta": {}, + "name": "postgres__current_timestamp_in_utc_backcompat", + "original_file_path": "macros/timestamps.sql", + "package_name": "dbt_postgres", + "patch_path": null, + "path": "macros/timestamps.sql", + "resource_type": "macro", + "root_path": "/some-path/sample-dbt", + "supported_languages": null, + "tags": [], + "unique_id": "macro.dbt_postgres.postgres__current_timestamp_in_utc_backcompat" + }, "macro.dbt_postgres.postgres__dateadd": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.229297, "depends_on": { "macros": [] }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro postgres__dateadd(datepart, interval, from_date_or_timestamp) %}\n\n {{ from_date_or_timestamp }} + ((interval '1 {{ datepart }}') * ({{ interval }}))\n\n{% endmacro %}", @@ -5610,12 +6908,13 @@ "path": "macros/utils/dateadd.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt_postgres.postgres__dateadd" }, "macro.dbt_postgres.postgres__datediff": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.236534, "depends_on": { "macros": [ "macro.dbt.datediff" @@ -5623,6 +6922,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro postgres__datediff(first_date, second_date, datepart) -%}\n\n {% if datepart == 'year' %}\n (date_part('year', ({{second_date}})::date) - date_part('year', ({{first_date}})::date))\n {% elif datepart == 'quarter' %}\n ({{ datediff(first_date, second_date, 'year') }} * 4 + date_part('quarter', ({{second_date}})::date) - date_part('quarter', ({{first_date}})::date))\n {% elif datepart == 'month' %}\n ({{ datediff(first_date, second_date, 'year') }} * 12 + date_part('month', ({{second_date}})::date) - date_part('month', ({{first_date}})::date))\n {% elif datepart == 'day' %}\n (({{second_date}})::date - ({{first_date}})::date)\n {% elif datepart == 'week' %}\n ({{ datediff(first_date, second_date, 'day') }} / 7 + case\n when date_part('dow', ({{first_date}})::timestamp) <= date_part('dow', ({{second_date}})::timestamp) then\n case when {{first_date}} <= {{second_date}} then 0 else -1 end\n else\n case when {{first_date}} <= {{second_date}} then 1 else 0 end\n end)\n {% elif datepart == 'hour' %}\n ({{ datediff(first_date, second_date, 'day') }} * 24 + date_part('hour', ({{second_date}})::timestamp) - date_part('hour', ({{first_date}})::timestamp))\n {% elif datepart == 'minute' %}\n ({{ datediff(first_date, second_date, 'hour') }} * 60 + date_part('minute', ({{second_date}})::timestamp) - date_part('minute', ({{first_date}})::timestamp))\n {% elif datepart == 'second' %}\n ({{ datediff(first_date, second_date, 'minute') }} * 60 + floor(date_part('second', ({{second_date}})::timestamp)) - floor(date_part('second', ({{first_date}})::timestamp)))\n {% elif datepart == 'millisecond' %}\n ({{ datediff(first_date, second_date, 'minute') }} * 60000 + floor(date_part('millisecond', ({{second_date}})::timestamp)) - floor(date_part('millisecond', ({{first_date}})::timestamp)))\n {% elif datepart == 'microsecond' %}\n ({{ datediff(first_date, second_date, 'minute') }} * 60000000 + floor(date_part('microsecond', ({{second_date}})::timestamp)) - floor(date_part('microsecond', ({{first_date}})::timestamp)))\n {% else %}\n {{ exceptions.raise_compiler_error(\"Unsupported datepart for macro datediff in postgres: {!r}\".format(datepart)) }}\n {% endif %}\n\n{%- endmacro %}", @@ -5634,12 +6934,13 @@ "path": "macros/utils/datediff.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt_postgres.postgres__datediff" }, "macro.dbt_postgres.postgres__drop_schema": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.2158039, "depends_on": { "macros": [ "macro.dbt.statement" @@ -5647,6 +6948,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro postgres__drop_schema(relation) -%}\n {% if relation.database -%}\n {{ adapter.verify_database(relation.database) }}\n {%- endif -%}\n {%- call statement('drop_schema') -%}\n drop schema if exists {{ relation.without_identifier().include(database=False) }} cascade\n {%- endcall -%}\n{% endmacro %}", @@ -5658,12 +6960,13 @@ "path": "macros/adapters.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt_postgres.postgres__drop_schema" }, "macro.dbt_postgres.postgres__get_catalog": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.202367, "depends_on": { "macros": [ "macro.dbt.statement" @@ -5671,6 +6974,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro postgres__get_catalog(information_schema, schemas) -%}\n\n {%- call statement('catalog', fetch_result=True) -%}\n {#\n If the user has multiple databases set and the first one is wrong, this will fail.\n But we won't fail in the case where there are multiple quoting-difference-only dbs, which is better.\n #}\n {% set database = information_schema.database %}\n {{ adapter.verify_database(database) }}\n\n select\n '{{ database }}' as table_database,\n sch.nspname as table_schema,\n tbl.relname as table_name,\n case tbl.relkind\n when 'v' then 'VIEW'\n else 'BASE TABLE'\n end as table_type,\n tbl_desc.description as table_comment,\n col.attname as column_name,\n col.attnum as column_index,\n pg_catalog.format_type(col.atttypid, col.atttypmod) as column_type,\n col_desc.description as column_comment,\n pg_get_userbyid(tbl.relowner) as table_owner\n\n from pg_catalog.pg_namespace sch\n join pg_catalog.pg_class tbl on tbl.relnamespace = sch.oid\n join pg_catalog.pg_attribute col on col.attrelid = tbl.oid\n left outer join pg_catalog.pg_description tbl_desc on (tbl_desc.objoid = tbl.oid and tbl_desc.objsubid = 0)\n left outer join pg_catalog.pg_description col_desc on (col_desc.objoid = tbl.oid and col_desc.objsubid = col.attnum)\n\n where (\n {%- for schema in schemas -%}\n upper(sch.nspname) = upper('{{ schema }}'){%- if not loop.last %} or {% endif -%}\n {%- endfor -%}\n )\n and not pg_is_other_temp_schema(sch.oid) -- not a temporary schema belonging to another session\n and tbl.relpersistence in ('p', 'u') -- [p]ermanent table or [u]nlogged table. Exclude [t]emporary tables\n and tbl.relkind in ('r', 'v', 'f', 'p') -- o[r]dinary table, [v]iew, [f]oreign table, [p]artitioned table. Other values are [i]ndex, [S]equence, [c]omposite type, [t]OAST table, [m]aterialized view\n and col.attnum > 0 -- negative numbers are used for system columns such as oid\n and not col.attisdropped -- column as not been dropped\n\n order by\n sch.nspname,\n tbl.relname,\n col.attnum\n\n {%- endcall -%}\n\n {{ return(load_result('catalog').table) }}\n\n{%- endmacro %}", @@ -5682,12 +6986,13 @@ "path": "macros/catalog.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt_postgres.postgres__get_catalog" }, "macro.dbt_postgres.postgres__get_columns_in_relation": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.2167602, "depends_on": { "macros": [ "macro.dbt.statement", @@ -5696,6 +7001,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro postgres__get_columns_in_relation(relation) -%}\n {% call statement('get_columns_in_relation', fetch_result=True) %}\n select\n column_name,\n data_type,\n character_maximum_length,\n numeric_precision,\n numeric_scale\n\n from {{ relation.information_schema('columns') }}\n where table_name = '{{ relation.identifier }}'\n {% if relation.schema %}\n and table_schema = '{{ relation.schema }}'\n {% endif %}\n order by ordinal_position\n\n {% endcall %}\n {% set table = load_result('get_columns_in_relation').table %}\n {{ return(sql_convert_columns_in_relation(table)) }}\n{% endmacro %}", @@ -5707,17 +7013,19 @@ "path": "macros/adapters.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt_postgres.postgres__get_columns_in_relation" }, "macro.dbt_postgres.postgres__get_create_index_sql": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.214549, "depends_on": { "macros": [] }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro postgres__get_create_index_sql(relation, index_dict) -%}\n {%- set index_config = adapter.parse_index(index_dict) -%}\n {%- set comma_separated_columns = \", \".join(index_config.columns) -%}\n {%- set index_name = index_config.render(relation) -%}\n\n create {% if index_config.unique -%}\n unique\n {%- endif %} index if not exists\n \"{{ index_name }}\"\n on {{ relation }} {% if index_config.type -%}\n using {{ index_config.type }}\n {%- endif %}\n ({{ comma_separated_columns }});\n{%- endmacro %}", @@ -5729,17 +7037,46 @@ "path": "macros/adapters.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt_postgres.postgres__get_create_index_sql" }, + "macro.dbt_postgres.postgres__get_incremental_default_sql": { + "arguments": [], + "created_at": 1670298551.2271192, + "depends_on": { + "macros": [ + "macro.dbt.get_incremental_delete_insert_sql", + "macro.dbt.get_incremental_append_sql" + ] + }, + "description": "", + "docs": { + "node_color": null, + "show": true + }, + "macro_sql": "{% macro postgres__get_incremental_default_sql(arg_dict) %}\n\n {% if arg_dict[\"unique_key\"] %}\n {% do return(get_incremental_delete_insert_sql(arg_dict)) %}\n {% else %}\n {% do return(get_incremental_append_sql(arg_dict)) %}\n {% endif %}\n\n{% endmacro %}", + "meta": {}, + "name": "postgres__get_incremental_default_sql", + "original_file_path": "macros/materializations/incremental_strategies.sql", + "package_name": "dbt_postgres", + "patch_path": null, + "path": "macros/materializations/incremental_strategies.sql", + "resource_type": "macro", + "root_path": "/some-path/sample-dbt", + "supported_languages": null, + "tags": [], + "unique_id": "macro.dbt_postgres.postgres__get_incremental_default_sql" + }, "macro.dbt_postgres.postgres__get_show_grant_sql": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.225875, "depends_on": { "macros": [] }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "\n\n{%- macro postgres__get_show_grant_sql(relation) -%}\n select grantee, privilege_type\n from {{ relation.information_schema('role_table_grants') }}\n where grantor = current_role\n and grantee != current_role\n and table_schema = '{{ relation.schema }}'\n and table_name = '{{ relation.identifier }}'\n{%- endmacro -%}\n\n", @@ -5751,17 +7088,19 @@ "path": "macros/adapters.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt_postgres.postgres__get_show_grant_sql" }, "macro.dbt_postgres.postgres__information_schema_name": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.217869, "depends_on": { "macros": [] }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro postgres__information_schema_name(database) -%}\n {% if database_name -%}\n {{ adapter.verify_database(database_name) }}\n {%- endif -%}\n information_schema\n{%- endmacro %}", @@ -5773,12 +7112,13 @@ "path": "macros/adapters.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt_postgres.postgres__information_schema_name" }, "macro.dbt_postgres.postgres__last_day": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.238176, "depends_on": { "macros": [ "macro.dbt.dateadd", @@ -5788,6 +7128,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro postgres__last_day(date, datepart) -%}\n\n {%- if datepart == 'quarter' -%}\n -- postgres dateadd does not support quarter interval.\n cast(\n {{dbt.dateadd('day', '-1',\n dbt.dateadd('month', '3', dbt.date_trunc(datepart, date))\n )}}\n as date)\n {%- else -%}\n {{dbt.default_last_day(date, datepart)}}\n {%- endif -%}\n\n{%- endmacro %}", @@ -5799,12 +7140,13 @@ "path": "macros/utils/last_day.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt_postgres.postgres__last_day" }, "macro.dbt_postgres.postgres__list_relations_without_caching": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.2175071, "depends_on": { "macros": [ "macro.dbt.statement" @@ -5812,6 +7154,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro postgres__list_relations_without_caching(schema_relation) %}\n {% call statement('list_relations_without_caching', fetch_result=True) -%}\n select\n '{{ schema_relation.database }}' as database,\n tablename as name,\n schemaname as schema,\n 'table' as type\n from pg_tables\n where schemaname ilike '{{ schema_relation.schema }}'\n union all\n select\n '{{ schema_relation.database }}' as database,\n viewname as name,\n schemaname as schema,\n 'view' as type\n from pg_views\n where schemaname ilike '{{ schema_relation.schema }}'\n {% endcall %}\n {{ return(load_result('list_relations_without_caching').table) }}\n{% endmacro %}", @@ -5823,12 +7166,13 @@ "path": "macros/adapters.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt_postgres.postgres__list_relations_without_caching" }, "macro.dbt_postgres.postgres__list_schemas": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.218627, "depends_on": { "macros": [ "macro.dbt.statement" @@ -5836,6 +7180,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro postgres__list_schemas(database) %}\n {% if database -%}\n {{ adapter.verify_database(database) }}\n {%- endif -%}\n {% call statement('list_schemas', fetch_result=True, auto_begin=False) %}\n select distinct nspname from pg_namespace\n {% endcall %}\n {{ return(load_result('list_schemas').table) }}\n{% endmacro %}", @@ -5847,17 +7192,19 @@ "path": "macros/adapters.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt_postgres.postgres__list_schemas" }, "macro.dbt_postgres.postgres__listagg": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.2307389, "depends_on": { "macros": [] }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro postgres__listagg(measure, delimiter_text, order_by_clause, limit_num) -%}\n\n {% if limit_num -%}\n array_to_string(\n (array_agg(\n {{ measure }}\n {% if order_by_clause -%}\n {{ order_by_clause }}\n {%- endif %}\n ))[1:{{ limit_num }}],\n {{ delimiter_text }}\n )\n {%- else %}\n string_agg(\n {{ measure }},\n {{ delimiter_text }}\n {% if order_by_clause -%}\n {{ order_by_clause }}\n {%- endif %}\n )\n {%- endif %}\n\n{%- endmacro %}", @@ -5869,12 +7216,13 @@ "path": "macros/utils/listagg.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt_postgres.postgres__listagg" }, "macro.dbt_postgres.postgres__make_backup_relation": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.222691, "depends_on": { "macros": [ "macro.dbt_postgres.postgres__make_relation_with_suffix" @@ -5882,6 +7230,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro postgres__make_backup_relation(base_relation, backup_relation_type, suffix) %}\n {% set backup_relation = postgres__make_relation_with_suffix(base_relation, suffix, dstring=False) %}\n {{ return(backup_relation.incorporate(type=backup_relation_type)) }}\n{% endmacro %}", @@ -5893,12 +7242,13 @@ "path": "macros/adapters.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt_postgres.postgres__make_backup_relation" }, "macro.dbt_postgres.postgres__make_intermediate_relation": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.221505, "depends_on": { "macros": [ "macro.dbt_postgres.postgres__make_relation_with_suffix" @@ -5906,6 +7256,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro postgres__make_intermediate_relation(base_relation, suffix) %}\n {{ return(postgres__make_relation_with_suffix(base_relation, suffix, dstring=False)) }}\n{% endmacro %}", @@ -5917,17 +7268,19 @@ "path": "macros/adapters.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt_postgres.postgres__make_intermediate_relation" }, "macro.dbt_postgres.postgres__make_relation_with_suffix": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.221094, "depends_on": { "macros": [] }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro postgres__make_relation_with_suffix(base_relation, suffix, dstring) %}\n {% if dstring %}\n {% set dt = modules.datetime.datetime.now() %}\n {% set dtstring = dt.strftime(\"%H%M%S%f\") %}\n {% set suffix = suffix ~ dtstring %}\n {% endif %}\n {% set suffix_length = suffix|length %}\n {% set relation_max_name_length = base_relation.relation_max_name_length() %}\n {% if suffix_length > relation_max_name_length %}\n {% do exceptions.raise_compiler_error('Relation suffix is too long (' ~ suffix_length ~ ' characters). Maximum length is ' ~ relation_max_name_length ~ ' characters.') %}\n {% endif %}\n {% set identifier = base_relation.identifier[:relation_max_name_length - suffix_length] ~ suffix %}\n\n {{ return(base_relation.incorporate(path={\"identifier\": identifier })) }}\n\n {% endmacro %}", @@ -5939,12 +7292,13 @@ "path": "macros/adapters.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt_postgres.postgres__make_relation_with_suffix" }, "macro.dbt_postgres.postgres__make_temp_relation": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.222146, "depends_on": { "macros": [ "macro.dbt_postgres.postgres__make_relation_with_suffix" @@ -5952,6 +7306,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro postgres__make_temp_relation(base_relation, suffix) %}\n {% set temp_relation = postgres__make_relation_with_suffix(base_relation, suffix, dstring=True) %}\n {{ return(temp_relation.incorporate(path={\"schema\": none,\n \"database\": none})) }}\n{% endmacro %}", @@ -5963,12 +7318,13 @@ "path": "macros/adapters.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt_postgres.postgres__make_temp_relation" }, "macro.dbt_postgres.postgres__snapshot_get_time": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.199831, "depends_on": { "macros": [ "macro.dbt.current_timestamp" @@ -5976,28 +7332,31 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro postgres__snapshot_get_time() -%}\n {{ current_timestamp() }}::timestamp without time zone\n{%- endmacro %}", "meta": {}, "name": "postgres__snapshot_get_time", - "original_file_path": "macros/adapters.sql", + "original_file_path": "macros/timestamps.sql", "package_name": "dbt_postgres", "patch_path": null, - "path": "macros/adapters.sql", + "path": "macros/timestamps.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt_postgres.postgres__snapshot_get_time" }, "macro.dbt_postgres.postgres__snapshot_merge_sql": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.2286642, "depends_on": { "macros": [] }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro postgres__snapshot_merge_sql(target, source, insert_cols) -%}\n {%- set insert_cols_csv = insert_cols | join(', ') -%}\n\n update {{ target }}\n set dbt_valid_to = DBT_INTERNAL_SOURCE.dbt_valid_to\n from {{ source }} as DBT_INTERNAL_SOURCE\n where DBT_INTERNAL_SOURCE.dbt_scd_id::text = {{ target }}.dbt_scd_id::text\n and DBT_INTERNAL_SOURCE.dbt_change_type::text in ('update'::text, 'delete'::text)\n and {{ target }}.dbt_valid_to is null;\n\n insert into {{ target }} ({{ insert_cols_csv }})\n select {% for column in insert_cols -%}\n DBT_INTERNAL_SOURCE.{{ column }} {%- if not loop.last %}, {%- endif %}\n {%- endfor %}\n from {{ source }} as DBT_INTERNAL_SOURCE\n where DBT_INTERNAL_SOURCE.dbt_change_type::text = 'insert'::text;\n{% endmacro %}", @@ -6009,34 +7368,37 @@ "path": "macros/materializations/snapshot_merge.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt_postgres.postgres__snapshot_merge_sql" }, "macro.dbt_postgres.postgres__snapshot_string_as_time": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.1996002, "depends_on": { "macros": [] }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro postgres__snapshot_string_as_time(timestamp) -%}\n {%- set result = \"'\" ~ timestamp ~ \"'::timestamp without time zone\" -%}\n {{ return(result) }}\n{%- endmacro %}", "meta": {}, "name": "postgres__snapshot_string_as_time", - "original_file_path": "macros/adapters.sql", + "original_file_path": "macros/timestamps.sql", "package_name": "dbt_postgres", "patch_path": null, - "path": "macros/adapters.sql", + "path": "macros/timestamps.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt_postgres.postgres__snapshot_string_as_time" }, "macro.dbt_postgres.postgres__split_part": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.2391849, "depends_on": { "macros": [ "macro.dbt.default__split_part", @@ -6045,6 +7407,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro postgres__split_part(string_text, delimiter_text, part_number) %}\n\n {% if part_number >= 0 %}\n {{ dbt.default__split_part(string_text, delimiter_text, part_number) }}\n {% else %}\n {{ dbt._split_part_negative(string_text, delimiter_text, part_number) }}\n {% endif %}\n\n{% endmacro %}", @@ -6056,17 +7419,19 @@ "path": "macros/utils/split_part.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt_postgres.postgres__split_part" }, "macro.dbt_postgres.postgres_escape_comment": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.223697, "depends_on": { "macros": [] }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro postgres_escape_comment(comment) -%}\n {% if comment is not string %}\n {% do exceptions.raise_compiler_error('cannot escape a non-string: ' ~ comment) %}\n {% endif %}\n {%- set magic = '$dbt_comment_literal_block$' -%}\n {%- if magic in comment -%}\n {%- do exceptions.raise_compiler_error('The string ' ~ magic ~ ' is not allowed in comments.') -%}\n {%- endif -%}\n {{ magic }}{{ comment }}{{ magic }}\n{%- endmacro %}", @@ -6078,12 +7443,13 @@ "path": "macros/adapters.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt_postgres.postgres_escape_comment" }, "macro.dbt_postgres.postgres_get_relations": { "arguments": [], - "created_at": 1663278957.5715818, + "created_at": 1670298551.203551, "depends_on": { "macros": [ "macro.dbt.statement" @@ -6091,6 +7457,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "macro_sql": "{% macro postgres_get_relations () -%}\n\n {#\n -- in pg_depend, objid is the dependent, refobjid is the referenced object\n -- > a pg_depend entry indicates that the referenced object cannot be\n -- > dropped without also dropping the dependent object.\n #}\n\n {%- call statement('relations', fetch_result=True) -%}\n with relation as (\n select\n pg_rewrite.ev_class as class,\n pg_rewrite.oid as id\n from pg_rewrite\n ),\n class as (\n select\n oid as id,\n relname as name,\n relnamespace as schema,\n relkind as kind\n from pg_class\n ),\n dependency as (\n select distinct\n pg_depend.objid as id,\n pg_depend.refobjid as ref\n from pg_depend\n ),\n schema as (\n select\n pg_namespace.oid as id,\n pg_namespace.nspname as name\n from pg_namespace\n where nspname != 'information_schema' and nspname not like 'pg\\_%'\n ),\n referenced as (\n select\n relation.id AS id,\n referenced_class.name ,\n referenced_class.schema ,\n referenced_class.kind\n from relation\n join class as referenced_class on relation.class=referenced_class.id\n where referenced_class.kind in ('r', 'v')\n ),\n relationships as (\n select\n referenced.name as referenced_name,\n referenced.schema as referenced_schema_id,\n dependent_class.name as dependent_name,\n dependent_class.schema as dependent_schema_id,\n referenced.kind as kind\n from referenced\n join dependency on referenced.id=dependency.id\n join class as dependent_class on dependency.ref=dependent_class.id\n where\n (referenced.name != dependent_class.name or\n referenced.schema != dependent_class.schema)\n )\n\n select\n referenced_schema.name as referenced_schema,\n relationships.referenced_name as referenced_name,\n dependent_schema.name as dependent_schema,\n relationships.dependent_name as dependent_name\n from relationships\n join schema as dependent_schema on relationships.dependent_schema_id=dependent_schema.id\n join schema as referenced_schema on relationships.referenced_schema_id=referenced_schema.id\n group by referenced_schema, referenced_name, dependent_schema, dependent_name\n order by referenced_schema, referenced_name, dependent_schema, dependent_name;\n\n {%- endcall -%}\n\n {{ return(load_result('relations').table) }}\n{% endmacro %}", @@ -6102,16 +7469,17 @@ "path": "macros/relations.sql", "resource_type": "macro", "root_path": "/some-path/sample-dbt", + "supported_languages": null, "tags": [], "unique_id": "macro.dbt_postgres.postgres_get_relations" } }, "metadata": { "adapter_type": "postgres", - "dbt_schema_version": "https://schemas.getdbt.com/dbt/manifest/v6.json", - "dbt_version": "1.2.1", + "dbt_schema_version": "https://schemas.getdbt.com/dbt/manifest/v7.json", + "dbt_version": "1.3.0", "env": {}, - "generated_at": "2021-06-18T21:38:36.384613Z", + "generated_at": "2022-12-06T03:49:11.154513Z", "invocation_id": "just-some-random-id", "project_id": "d7ed27e3f02ff95ca753dd76aed208ee", "send_anonymous_usage_stats": true, @@ -6128,18 +7496,24 @@ }, "columns": {}, "compiled": true, + "compiled_code": "\n\nSELECT\n c.customer_id,\n c.first_name || ' ' || c.last_name as \"full_name\",\n c.email,\n a.address,\n m.city,\n a.postal_code,\n a.phone\nFROM\n \"pagila\".\"public\".\"customer\" c\n left outer join \"pagila\".\"public\".\"address\" a on c.address_id = a.address_id\n left outer join \"pagila\".\"public\".\"city\" m on a.city_id = m.city_id", "compiled_path": "target/compiled/sample_dbt/models/transform/customer_details.sql", - "compiled_sql": "\n\nSELECT\n c.customer_id,\n c.first_name || ' ' || c.last_name as \"full_name\",\n c.email,\n a.address,\n m.city,\n a.postal_code,\n a.phone\nFROM\n \"pagila\".\"public\".\"customer\" c\n left outer join \"pagila\".\"public\".\"address\" a on c.address_id = a.address_id\n left outer join \"pagila\".\"public\".\"city\" m on a.city_id = m.city_id", "config": { "alias": null, "column_types": {}, "database": null, + "docs": { + "node_color": null, + "show": true + }, "enabled": true, "full_refresh": null, "grants": {}, + "incremental_strategy": null, "materialized": "ephemeral", "meta": {}, "on_schema_change": "ignore", + "packages": [], "persist_docs": {}, "post-hook": [], "pre-hook": [], @@ -6148,19 +7522,20 @@ "tags": [], "unique_key": null }, - "created_at": 1663278957.5715818, + "created_at": 1670298551.7804742, "database": "pagila", "deferred": false, "depends_on": { "macros": [], "nodes": [ - "source.sample_dbt.pagila.city", "source.sample_dbt.pagila.customer", - "source.sample_dbt.pagila.address" + "source.sample_dbt.pagila.address", + "source.sample_dbt.pagila.city" ] }, "description": "", "docs": { + "node_color": null, "show": true }, "extra_ctes": [], @@ -6170,6 +7545,7 @@ "transform", "customer_details" ], + "language": "sql", "meta": {}, "metrics": [], "name": "customer_details", @@ -6177,7 +7553,7 @@ "package_name": "sample_dbt", "patch_path": null, "path": "transform/customer_details.sql", - "raw_sql": "{{ config(\n materialized = \"ephemeral\",\n) }}\n\nSELECT\n c.customer_id,\n c.first_name || ' ' || c.last_name as \"full_name\",\n c.email,\n a.address,\n m.city,\n a.postal_code,\n a.phone\nFROM\n {{ source('pagila', 'customer')}} c\n left outer join {{ source('pagila', 'address')}} a on c.address_id = a.address_id\n left outer join {{ source('pagila', 'city') }} m on a.city_id = m.city_id", + "raw_code": "{{ config(\n materialized = \"ephemeral\",\n) }}\n\nSELECT\n c.customer_id,\n c.first_name || ' ' || c.last_name as \"full_name\",\n c.email,\n a.address,\n m.city,\n a.postal_code,\n a.phone\nFROM\n {{ source('pagila', 'customer')}} c\n left outer join {{ source('pagila', 'address')}} a on c.address_id = a.address_id\n left outer join {{ source('pagila', 'city') }} m on a.city_id = m.city_id", "refs": [], "relation_name": null, "resource_type": "model", @@ -6186,15 +7562,15 @@ "sources": [ [ "pagila", - "city" + "customer" ], [ "pagila", - "customer" + "address" ], [ "pagila", - "address" + "city" ] ], "tags": [], @@ -6212,18 +7588,24 @@ }, "columns": {}, "compiled": true, + "compiled_code": "\n\nwith __dbt__cte__customer_details as (\n\n\nSELECT\n c.customer_id,\n c.first_name || ' ' || c.last_name as \"full_name\",\n c.email,\n a.address,\n m.city,\n a.postal_code,\n a.phone\nFROM\n \"pagila\".\"public\".\"customer\" c\n left outer join \"pagila\".\"public\".\"address\" a on c.address_id = a.address_id\n left outer join \"pagila\".\"public\".\"city\" m on a.city_id = m.city_id\n)SELECT \n pbc.billing_month,\n pbc.customer_id,\n pbc.amount,\n cust.email\nFROM\n \"pagila\".\"dbt_postgres\".\"payments_by_customer_by_month\" pbc\n left outer join __dbt__cte__customer_details cust on pbc.customer_id = cust.customer_id\nORDER BY\n pbc.billing_month", "compiled_path": "target/compiled/sample_dbt/models/billing/monthly_billing_with_cust.sql", - "compiled_sql": "\n\nwith __dbt__cte__customer_details as (\n\n\nSELECT\n c.customer_id,\n c.first_name || ' ' || c.last_name as \"full_name\",\n c.email,\n a.address,\n m.city,\n a.postal_code,\n a.phone\nFROM\n \"pagila\".\"public\".\"customer\" c\n left outer join \"pagila\".\"public\".\"address\" a on c.address_id = a.address_id\n left outer join \"pagila\".\"public\".\"city\" m on a.city_id = m.city_id\n)SELECT \n pbc.billing_month,\n pbc.customer_id,\n pbc.amount,\n cust.email\nFROM\n \"pagila\".\"dbt_postgres\".\"payments_by_customer_by_month\" pbc\n left outer join __dbt__cte__customer_details cust on pbc.customer_id = cust.customer_id\nORDER BY\n pbc.billing_month", "config": { "alias": "an-aliased-view-for-monthly-billing", "column_types": {}, "database": null, + "docs": { + "node_color": null, + "show": true + }, "enabled": true, "full_refresh": null, "grants": {}, + "incremental_strategy": null, "materialized": "table", "meta": {}, "on_schema_change": "ignore", + "packages": [], "persist_docs": {}, "post-hook": [], "pre-hook": [], @@ -6232,7 +7614,7 @@ "tags": [], "unique_key": null }, - "created_at": 1663278957.5715818, + "created_at": 1670298551.786046, "database": "pagila", "deferred": false, "depends_on": { @@ -6244,6 +7626,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "extra_ctes": [ @@ -6258,6 +7641,7 @@ "billing", "monthly_billing_with_cust" ], + "language": "sql", "meta": {}, "metrics": [], "name": "monthly_billing_with_cust", @@ -6265,7 +7649,7 @@ "package_name": "sample_dbt", "patch_path": null, "path": "billing/monthly_billing_with_cust.sql", - "raw_sql": "{{ config(\n materialized = \"table\",\n alias='an-aliased-view-for-monthly-billing'\n) }}\n\nSELECT \n pbc.billing_month,\n pbc.customer_id,\n pbc.amount,\n cust.email\nFROM\n {{ ref('payments_by_customer_by_month')}} pbc\n left outer join {{ ref('customer_details')}} cust on pbc.customer_id = cust.customer_id\nORDER BY\n pbc.billing_month", + "raw_code": "{{ config(\n materialized = \"table\",\n alias='an-aliased-view-for-monthly-billing'\n) }}\n\nSELECT \n pbc.billing_month,\n pbc.customer_id,\n pbc.amount,\n cust.email\nFROM\n {{ ref('payments_by_customer_by_month')}} pbc\n left outer join {{ ref('customer_details')}} cust on pbc.customer_id = cust.customer_id\nORDER BY\n pbc.billing_month", "refs": [ [ "payments_by_customer_by_month" @@ -6295,18 +7679,24 @@ }, "columns": {}, "compiled": true, + "compiled_code": "\n\nwith payments as (\n\n select \n *\n from \n \"pagila\".\"public\".\"payment_p2020_01\"\n UNION ALL\n select \n *\n from \n \"pagila\".\"public\".\"payment_p2020_02\"\n UNION ALL\n select \n *\n from \n \"pagila\".\"public\".\"payment_p2020_02\"\n UNION ALL\n select \n *\n from \n \"pagila\".\"public\".\"payment_p2020_03\"\n UNION ALL\n select \n *\n from \n \"pagila\".\"public\".\"payment_p2020_04\"\n UNION ALL\n select \n *\n from \n \"pagila\".\"public\".\"payment_p2020_05\"\n UNION ALL\n select \n *\n from \n \"pagila\".\"public\".\"payment_p2020_06\"\n)\n\nselect *\nfrom payments", "compiled_path": "target/compiled/sample_dbt/models/base/payments_base.sql", - "compiled_sql": "\n\nwith payments as (\n\n select \n *\n from \n \"pagila\".\"public\".\"payment_p2020_01\"\n UNION ALL\n select \n *\n from \n \"pagila\".\"public\".\"payment_p2020_02\"\n UNION ALL\n select \n *\n from \n \"pagila\".\"public\".\"payment_p2020_02\"\n UNION ALL\n select \n *\n from \n \"pagila\".\"public\".\"payment_p2020_03\"\n UNION ALL\n select \n *\n from \n \"pagila\".\"public\".\"payment_p2020_04\"\n UNION ALL\n select \n *\n from \n \"pagila\".\"public\".\"payment_p2020_05\"\n UNION ALL\n select \n *\n from \n \"pagila\".\"public\".\"payment_p2020_06\"\n)\n\nselect *\nfrom payments", "config": { "alias": "an-aliased-view-for-payments", "column_types": {}, "database": null, + "docs": { + "node_color": null, + "show": true + }, "enabled": true, "full_refresh": null, "grants": {}, + "incremental_strategy": null, "materialized": "view", "meta": {}, "on_schema_change": "ignore", + "packages": [], "persist_docs": {}, "post-hook": [], "pre-hook": [], @@ -6315,22 +7705,23 @@ "tags": [], "unique_key": null }, - "created_at": 1663278957.5715818, + "created_at": 1670298551.78323, "database": "pagila", "deferred": false, "depends_on": { "macros": [], "nodes": [ + "source.sample_dbt.pagila.payment_p2020_05", "source.sample_dbt.pagila.payment_p2020_04", "source.sample_dbt.pagila.payment_p2020_01", - "source.sample_dbt.pagila.payment_p2020_02", - "source.sample_dbt.pagila.payment_p2020_03", "source.sample_dbt.pagila.payment_p2020_06", - "source.sample_dbt.pagila.payment_p2020_05" + "source.sample_dbt.pagila.payment_p2020_02", + "source.sample_dbt.pagila.payment_p2020_03" ] }, "description": "", "docs": { + "node_color": null, "show": true }, "extra_ctes": [], @@ -6340,6 +7731,7 @@ "base", "payments_base" ], + "language": "sql", "meta": {}, "metrics": [], "name": "payments_base", @@ -6347,7 +7739,7 @@ "package_name": "sample_dbt", "patch_path": null, "path": "base/payments_base.sql", - "raw_sql": "{{ config(\n materialized=\"view\",\n alias='an-aliased-view-for-payments'\n) }}\n\nwith payments as (\n\n select \n *\n from \n {{ source('pagila', 'payment_p2020_01')}}\n UNION ALL\n select \n *\n from \n {{ source('pagila', 'payment_p2020_02')}}\n UNION ALL\n select \n *\n from \n {{ source('pagila', 'payment_p2020_02')}}\n UNION ALL\n select \n *\n from \n {{ source('pagila', 'payment_p2020_03')}}\n UNION ALL\n select \n *\n from \n {{ source('pagila', 'payment_p2020_04')}}\n UNION ALL\n select \n *\n from \n {{ source('pagila', 'payment_p2020_05')}}\n UNION ALL\n select \n *\n from \n {{ source('pagila', 'payment_p2020_06')}}\n)\n\nselect *\nfrom payments", + "raw_code": "{{ config(\n materialized=\"view\",\n alias='an-aliased-view-for-payments'\n) }}\n\nwith payments as (\n\n select \n *\n from \n {{ source('pagila', 'payment_p2020_01')}}\n UNION ALL\n select \n *\n from \n {{ source('pagila', 'payment_p2020_02')}}\n UNION ALL\n select \n *\n from \n {{ source('pagila', 'payment_p2020_02')}}\n UNION ALL\n select \n *\n from \n {{ source('pagila', 'payment_p2020_03')}}\n UNION ALL\n select \n *\n from \n {{ source('pagila', 'payment_p2020_04')}}\n UNION ALL\n select \n *\n from \n {{ source('pagila', 'payment_p2020_05')}}\n UNION ALL\n select \n *\n from \n {{ source('pagila', 'payment_p2020_06')}}\n)\n\nselect *\nfrom payments", "refs": [], "relation_name": "\"pagila\".\"dbt_postgres\".\"an-aliased-view-for-payments\"", "resource_type": "model", @@ -6356,27 +7748,27 @@ "sources": [ [ "pagila", - "payment_p2020_04" + "payment_p2020_05" ], [ "pagila", - "payment_p2020_01" + "payment_p2020_04" ], [ "pagila", - "payment_p2020_02" + "payment_p2020_01" ], [ "pagila", - "payment_p2020_03" + "payment_p2020_06" ], [ "pagila", - "payment_p2020_06" + "payment_p2020_02" ], [ "pagila", - "payment_p2020_05" + "payment_p2020_03" ] ], "tags": [], @@ -6425,18 +7817,24 @@ } }, "compiled": true, + "compiled_code": "\n\nSELECT\n date_trunc('month', payment_date) as \"billing_month\",\n customer_id,\n sum(amount) as \"amount\"\nFROM\n \"pagila\".\"dbt_postgres\".\"an-aliased-view-for-payments\"\nGROUP BY\n billing_month,\n customer_id", "compiled_path": "target/compiled/sample_dbt/models/transform/payments_by_customer_by_month.sql", - "compiled_sql": "\n\nSELECT\n date_trunc('month', payment_date) as \"billing_month\",\n customer_id,\n sum(amount) as \"amount\"\nFROM\n \"pagila\".\"dbt_postgres\".\"an-aliased-view-for-payments\"\nGROUP BY\n billing_month,\n customer_id", "config": { "alias": null, "column_types": {}, "database": null, + "docs": { + "node_color": null, + "show": true + }, "enabled": true, "full_refresh": null, "grants": {}, + "incremental_strategy": null, "materialized": "table", "meta": {}, "on_schema_change": "ignore", + "packages": [], "persist_docs": {}, "post-hook": [], "pre-hook": [], @@ -6445,7 +7843,7 @@ "tags": [], "unique_key": null }, - "created_at": 1663278957.5715818, + "created_at": 1670298551.767744, "database": "pagila", "deferred": false, "depends_on": { @@ -6456,6 +7854,7 @@ }, "description": "", "docs": { + "node_color": null, "show": true }, "extra_ctes": [], @@ -6465,14 +7864,15 @@ "transform", "payments_by_customer_by_month" ], + "language": "sql", "meta": {}, "metrics": [], "name": "payments_by_customer_by_month", "original_file_path": "models/transform/payments_by_customer_by_month.sql", "package_name": "sample_dbt", - "patch_path": "sample_dbt://models/base.yml", + "patch_path": null, "path": "transform/payments_by_customer_by_month.sql", - "raw_sql": "{{ config(\n materialized = \"table\",\n) }}\n\nSELECT\n date_trunc('month', payment_date) as \"billing_month\",\n customer_id,\n sum(amount) as \"amount\"\nFROM\n {{ ref('payments_base')}}\nGROUP BY\n billing_month,\n customer_id", + "raw_code": "{{ config(\n materialized = \"table\",\n) }}\n\nSELECT\n date_trunc('month', payment_date) as \"billing_month\",\n customer_id,\n sum(amount) as \"amount\"\nFROM\n {{ ref('payments_base')}}\nGROUP BY\n billing_month,\n customer_id", "refs": [ [ "payments_base" @@ -6556,7 +7956,7 @@ "config": { "enabled": true }, - "created_at": 1663278957.5715818, + "created_at": 1670298551.820541, "database": "pagila", "description": "description for actor table from dbt", "external": null, @@ -6611,7 +8011,7 @@ "config": { "enabled": true }, - "created_at": 1663278957.5715818, + "created_at": 1670298551.820682, "database": "pagila", "description": "a user's address", "external": null, @@ -6662,7 +8062,7 @@ "config": { "enabled": true }, - "created_at": 1663278957.5715818, + "created_at": 1670298551.820788, "database": "pagila", "description": "a user's category", "external": null, @@ -6713,7 +8113,7 @@ "config": { "enabled": true }, - "created_at": 1663278957.5715818, + "created_at": 1670298551.820918, "database": "pagila", "description": "", "external": null, @@ -6764,7 +8164,7 @@ "config": { "enabled": true }, - "created_at": 1663278957.5715818, + "created_at": 1670298551.8210292, "database": "pagila", "description": "", "external": null, @@ -6819,7 +8219,7 @@ "config": { "enabled": true }, - "created_at": 1663278957.5715818, + "created_at": 1670298551.821124, "database": "pagila", "description": "description for customer table from dbt", "external": null, @@ -6870,7 +8270,7 @@ "config": { "enabled": true }, - "created_at": 1663278957.5715818, + "created_at": 1670298551.821228, "database": "pagila", "description": "", "external": null, @@ -6921,7 +8321,7 @@ "config": { "enabled": true }, - "created_at": 1663278957.5715818, + "created_at": 1670298551.821324, "database": "pagila", "description": "", "external": null, @@ -6981,7 +8381,7 @@ "config": { "enabled": true }, - "created_at": 1663278957.5715818, + "created_at": 1670298551.821418, "database": "pagila", "description": "", "external": null, @@ -7032,7 +8432,7 @@ "config": { "enabled": true }, - "created_at": 1663278957.5715818, + "created_at": 1670298551.821511, "database": "pagila", "description": "", "external": null, @@ -7083,7 +8483,7 @@ "config": { "enabled": true }, - "created_at": 1663278957.5715818, + "created_at": 1670298551.8216152, "database": "pagila", "description": "a payment", "external": null, @@ -7134,7 +8534,7 @@ "config": { "enabled": true }, - "created_at": 1663278957.5715818, + "created_at": 1670298551.821711, "database": "pagila", "description": "", "external": null, diff --git a/metadata-ingestion/tests/integration/dbt/sample_dbt_sources.json b/metadata-ingestion/tests/integration/dbt/sample_dbt_sources.json index 4d9ac83ee4cbc..38790bdc615b6 100644 --- a/metadata-ingestion/tests/integration/dbt/sample_dbt_sources.json +++ b/metadata-ingestion/tests/integration/dbt/sample_dbt_sources.json @@ -2,7 +2,7 @@ "elapsed_time": 3.1415, "metadata": { "dbt_schema_version": "https://schemas.getdbt.com/dbt/sources/v3.json", - "dbt_version": "1.2.1", + "dbt_version": "1.3.0", "env": {}, "generated_at": "2021-06-18T21:38:36.384613Z", "invocation_id": "just-some-random-id" diff --git a/metadata-ingestion/tests/integration/feast-legacy/core/core.yml b/metadata-ingestion/tests/integration/feast-legacy/core/core.yml deleted file mode 100644 index 517b2649ca250..0000000000000 --- a/metadata-ingestion/tests/integration/feast-legacy/core/core.yml +++ /dev/null @@ -1,5 +0,0 @@ -spring: - datasource: - url: jdbc:postgresql://${DB_HOST:127.0.0.1}:${DB_PORT:5432}/${DB_DATABASE:postgres} - username: ${DB_USERNAME:postgres} - password: ${DB_PASSWORD:password} diff --git a/metadata-ingestion/tests/integration/feast-legacy/docker-compose.yml b/metadata-ingestion/tests/integration/feast-legacy/docker-compose.yml deleted file mode 100644 index 687ab72a43ad9..0000000000000 --- a/metadata-ingestion/tests/integration/feast-legacy/docker-compose.yml +++ /dev/null @@ -1,42 +0,0 @@ -version: "3.7" - -services: - core: - image: gcr.io/kf-feast/feast-core:develop - container_name: "testfeast" - volumes: - - ./core/core.yml:/etc/feast/application.yml - environment: - DB_HOST: db - # restart: on-failure - depends_on: - - db - ports: - - 6565:6565 - command: - - java - - -jar - - /opt/feast/feast-core.jar - - --spring.config.location=classpath:/application.yml,file:/etc/feast/application.yml - - setup: - container_name: "testfeast_setup" - # build from the same Feast image used for ingestion - build: ../../../src/datahub/ingestion/source/feast_image/ - volumes: - - ./make_tests.py:/app/make_tests.py - - ./wait-for-it.sh:/app/wait-for-it.sh - depends_on: - - core - # listen to this port once test cases have been imported, so test script can see when done - ports: - - 6789:6789 - # wait for Feast to start, then insert test data - command: ./wait-for-it.sh testfeast:6565 -t 120 -- python3 make_tests.py - - db: - image: postgres:12-alpine - environment: - POSTGRES_PASSWORD: password - ports: - - "5432:5432" diff --git a/metadata-ingestion/tests/integration/feast-legacy/feast_mces_golden.json b/metadata-ingestion/tests/integration/feast-legacy/feast_mces_golden.json deleted file mode 100644 index 1b1af3ef3fc91..0000000000000 --- a/metadata-ingestion/tests/integration/feast-legacy/feast_mces_golden.json +++ /dev/null @@ -1,641 +0,0 @@ -[ -{ - "auditHeader": null, - "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.MLPrimaryKeySnapshot": { - "urn": "urn:li:mlPrimaryKey:(test_feature_table_all_feature_dtypes,dummy_entity_1)", - "aspects": [ - { - "com.linkedin.pegasus2avro.ml.metadata.MLPrimaryKeyProperties": { - "description": "Dummy entity 1", - "dataType": "TEXT", - "version": null, - "sources": [ - "urn:li:dataset:(urn:li:dataPlatform:file,feast.*,PROD)" - ] - } - } - ] - } - }, - "proposedDelta": null, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "feast-test", - "properties": null - } -}, -{ - "auditHeader": null, - "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.MLPrimaryKeySnapshot": { - "urn": "urn:li:mlPrimaryKey:(test_feature_table_all_feature_dtypes,dummy_entity_2)", - "aspects": [ - { - "com.linkedin.pegasus2avro.ml.metadata.MLPrimaryKeyProperties": { - "description": "Dummy entity 2", - "dataType": "ORDINAL", - "version": null, - "sources": [ - "urn:li:dataset:(urn:li:dataPlatform:file,feast.*,PROD)" - ] - } - } - ] - } - }, - "proposedDelta": null, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "feast-test", - "properties": null - } -}, -{ - "auditHeader": null, - "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.MLFeatureSnapshot": { - "urn": "urn:li:mlFeature:(test_feature_table_all_feature_dtypes,test_BOOL_LIST_feature)", - "aspects": [ - { - "com.linkedin.pegasus2avro.ml.metadata.MLFeatureProperties": { - "description": null, - "dataType": "SEQUENCE", - "version": null, - "sources": [ - "urn:li:dataset:(urn:li:dataPlatform:file,feast.*,PROD)" - ] - } - } - ] - } - }, - "proposedDelta": null, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "feast-test", - "properties": null - } -}, -{ - "auditHeader": null, - "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.MLFeatureSnapshot": { - "urn": "urn:li:mlFeature:(test_feature_table_all_feature_dtypes,test_BOOL_feature)", - "aspects": [ - { - "com.linkedin.pegasus2avro.ml.metadata.MLFeatureProperties": { - "description": null, - "dataType": "BINARY", - "version": null, - "sources": [ - "urn:li:dataset:(urn:li:dataPlatform:file,feast.*,PROD)" - ] - } - } - ] - } - }, - "proposedDelta": null, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "feast-test", - "properties": null - } -}, -{ - "auditHeader": null, - "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.MLFeatureSnapshot": { - "urn": "urn:li:mlFeature:(test_feature_table_all_feature_dtypes,test_BYTES_LIST_feature)", - "aspects": [ - { - "com.linkedin.pegasus2avro.ml.metadata.MLFeatureProperties": { - "description": null, - "dataType": "SEQUENCE", - "version": null, - "sources": [ - "urn:li:dataset:(urn:li:dataPlatform:file,feast.*,PROD)" - ] - } - } - ] - } - }, - "proposedDelta": null, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "feast-test", - "properties": null - } -}, -{ - "auditHeader": null, - "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.MLFeatureSnapshot": { - "urn": "urn:li:mlFeature:(test_feature_table_all_feature_dtypes,test_BYTES_feature)", - "aspects": [ - { - "com.linkedin.pegasus2avro.ml.metadata.MLFeatureProperties": { - "description": null, - "dataType": "BYTE", - "version": null, - "sources": [ - "urn:li:dataset:(urn:li:dataPlatform:file,feast.*,PROD)" - ] - } - } - ] - } - }, - "proposedDelta": null, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "feast-test", - "properties": null - } -}, -{ - "auditHeader": null, - "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.MLFeatureSnapshot": { - "urn": "urn:li:mlFeature:(test_feature_table_all_feature_dtypes,test_DOUBLE_LIST_feature)", - "aspects": [ - { - "com.linkedin.pegasus2avro.ml.metadata.MLFeatureProperties": { - "description": null, - "dataType": "SEQUENCE", - "version": null, - "sources": [ - "urn:li:dataset:(urn:li:dataPlatform:file,feast.*,PROD)" - ] - } - } - ] - } - }, - "proposedDelta": null, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "feast-test", - "properties": null - } -}, -{ - "auditHeader": null, - "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.MLFeatureSnapshot": { - "urn": "urn:li:mlFeature:(test_feature_table_all_feature_dtypes,test_DOUBLE_feature)", - "aspects": [ - { - "com.linkedin.pegasus2avro.ml.metadata.MLFeatureProperties": { - "description": null, - "dataType": "CONTINUOUS", - "version": null, - "sources": [ - "urn:li:dataset:(urn:li:dataPlatform:file,feast.*,PROD)" - ] - } - } - ] - } - }, - "proposedDelta": null, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "feast-test", - "properties": null - } -}, -{ - "auditHeader": null, - "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.MLFeatureSnapshot": { - "urn": "urn:li:mlFeature:(test_feature_table_all_feature_dtypes,test_FLOAT_LIST_feature)", - "aspects": [ - { - "com.linkedin.pegasus2avro.ml.metadata.MLFeatureProperties": { - "description": null, - "dataType": "SEQUENCE", - "version": null, - "sources": [ - "urn:li:dataset:(urn:li:dataPlatform:file,feast.*,PROD)" - ] - } - } - ] - } - }, - "proposedDelta": null, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "feast-test", - "properties": null - } -}, -{ - "auditHeader": null, - "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.MLFeatureSnapshot": { - "urn": "urn:li:mlFeature:(test_feature_table_all_feature_dtypes,test_FLOAT_feature)", - "aspects": [ - { - "com.linkedin.pegasus2avro.ml.metadata.MLFeatureProperties": { - "description": null, - "dataType": "CONTINUOUS", - "version": null, - "sources": [ - "urn:li:dataset:(urn:li:dataPlatform:file,feast.*,PROD)" - ] - } - } - ] - } - }, - "proposedDelta": null, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "feast-test", - "properties": null - } -}, -{ - "auditHeader": null, - "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.MLFeatureSnapshot": { - "urn": "urn:li:mlFeature:(test_feature_table_all_feature_dtypes,test_INT32_LIST_feature)", - "aspects": [ - { - "com.linkedin.pegasus2avro.ml.metadata.MLFeatureProperties": { - "description": null, - "dataType": "SEQUENCE", - "version": null, - "sources": [ - "urn:li:dataset:(urn:li:dataPlatform:file,feast.*,PROD)" - ] - } - } - ] - } - }, - "proposedDelta": null, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "feast-test", - "properties": null - } -}, -{ - "auditHeader": null, - "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.MLFeatureSnapshot": { - "urn": "urn:li:mlFeature:(test_feature_table_all_feature_dtypes,test_INT32_feature)", - "aspects": [ - { - "com.linkedin.pegasus2avro.ml.metadata.MLFeatureProperties": { - "description": null, - "dataType": "ORDINAL", - "version": null, - "sources": [ - "urn:li:dataset:(urn:li:dataPlatform:file,feast.*,PROD)" - ] - } - } - ] - } - }, - "proposedDelta": null, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "feast-test", - "properties": null - } -}, -{ - "auditHeader": null, - "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.MLFeatureSnapshot": { - "urn": "urn:li:mlFeature:(test_feature_table_all_feature_dtypes,test_INT64_LIST_feature)", - "aspects": [ - { - "com.linkedin.pegasus2avro.ml.metadata.MLFeatureProperties": { - "description": null, - "dataType": "SEQUENCE", - "version": null, - "sources": [ - "urn:li:dataset:(urn:li:dataPlatform:file,feast.*,PROD)" - ] - } - } - ] - } - }, - "proposedDelta": null, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "feast-test", - "properties": null - } -}, -{ - "auditHeader": null, - "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.MLFeatureSnapshot": { - "urn": "urn:li:mlFeature:(test_feature_table_all_feature_dtypes,test_INT64_feature)", - "aspects": [ - { - "com.linkedin.pegasus2avro.ml.metadata.MLFeatureProperties": { - "description": null, - "dataType": "ORDINAL", - "version": null, - "sources": [ - "urn:li:dataset:(urn:li:dataPlatform:file,feast.*,PROD)" - ] - } - } - ] - } - }, - "proposedDelta": null, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "feast-test", - "properties": null - } -}, -{ - "auditHeader": null, - "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.MLFeatureSnapshot": { - "urn": "urn:li:mlFeature:(test_feature_table_all_feature_dtypes,test_STRING_LIST_feature)", - "aspects": [ - { - "com.linkedin.pegasus2avro.ml.metadata.MLFeatureProperties": { - "description": null, - "dataType": "SEQUENCE", - "version": null, - "sources": [ - "urn:li:dataset:(urn:li:dataPlatform:file,feast.*,PROD)" - ] - } - } - ] - } - }, - "proposedDelta": null, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "feast-test", - "properties": null - } -}, -{ - "auditHeader": null, - "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.MLFeatureSnapshot": { - "urn": "urn:li:mlFeature:(test_feature_table_all_feature_dtypes,test_STRING_feature)", - "aspects": [ - { - "com.linkedin.pegasus2avro.ml.metadata.MLFeatureProperties": { - "description": null, - "dataType": "TEXT", - "version": null, - "sources": [ - "urn:li:dataset:(urn:li:dataPlatform:file,feast.*,PROD)" - ] - } - } - ] - } - }, - "proposedDelta": null, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "feast-test", - "properties": null - } -}, -{ - "auditHeader": null, - "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.MLFeatureTableSnapshot": { - "urn": "urn:li:mlFeatureTable:(urn:li:dataPlatform:feast,test_feature_table_all_feature_dtypes)", - "aspects": [ - { - "com.linkedin.pegasus2avro.common.BrowsePaths": { - "paths": [ - "feast/test_feature_table_all_feature_dtypes" - ] - } - }, - { - "com.linkedin.pegasus2avro.ml.metadata.MLFeatureTableProperties": { - "customProperties": {}, - "description": null, - "mlFeatures": [ - "urn:li:mlFeature:(test_feature_table_all_feature_dtypes,test_BOOL_LIST_feature)", - "urn:li:mlFeature:(test_feature_table_all_feature_dtypes,test_BOOL_feature)", - "urn:li:mlFeature:(test_feature_table_all_feature_dtypes,test_BYTES_LIST_feature)", - "urn:li:mlFeature:(test_feature_table_all_feature_dtypes,test_BYTES_feature)", - "urn:li:mlFeature:(test_feature_table_all_feature_dtypes,test_DOUBLE_LIST_feature)", - "urn:li:mlFeature:(test_feature_table_all_feature_dtypes,test_DOUBLE_feature)", - "urn:li:mlFeature:(test_feature_table_all_feature_dtypes,test_FLOAT_LIST_feature)", - "urn:li:mlFeature:(test_feature_table_all_feature_dtypes,test_FLOAT_feature)", - "urn:li:mlFeature:(test_feature_table_all_feature_dtypes,test_INT32_LIST_feature)", - "urn:li:mlFeature:(test_feature_table_all_feature_dtypes,test_INT32_feature)", - "urn:li:mlFeature:(test_feature_table_all_feature_dtypes,test_INT64_LIST_feature)", - "urn:li:mlFeature:(test_feature_table_all_feature_dtypes,test_INT64_feature)", - "urn:li:mlFeature:(test_feature_table_all_feature_dtypes,test_STRING_LIST_feature)", - "urn:li:mlFeature:(test_feature_table_all_feature_dtypes,test_STRING_feature)" - ], - "mlPrimaryKeys": [ - "urn:li:mlPrimaryKey:(test_feature_table_all_feature_dtypes,dummy_entity_1)", - "urn:li:mlPrimaryKey:(test_feature_table_all_feature_dtypes,dummy_entity_2)" - ] - } - } - ] - } - }, - "proposedDelta": null, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "feast-test", - "properties": null - } -}, -{ - "auditHeader": null, - "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.MLPrimaryKeySnapshot": { - "urn": "urn:li:mlPrimaryKey:(test_feature_table_no_labels,dummy_entity_2)", - "aspects": [ - { - "com.linkedin.pegasus2avro.ml.metadata.MLPrimaryKeyProperties": { - "description": "Dummy entity 2", - "dataType": "ORDINAL", - "version": null, - "sources": [ - "urn:li:dataset:(urn:li:dataPlatform:file,feast.*,PROD)" - ] - } - } - ] - } - }, - "proposedDelta": null, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "feast-test", - "properties": null - } -}, -{ - "auditHeader": null, - "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.MLFeatureSnapshot": { - "urn": "urn:li:mlFeature:(test_feature_table_no_labels,test_BYTES_feature)", - "aspects": [ - { - "com.linkedin.pegasus2avro.ml.metadata.MLFeatureProperties": { - "description": null, - "dataType": "BYTE", - "version": null, - "sources": [ - "urn:li:dataset:(urn:li:dataPlatform:file,feast.*,PROD)" - ] - } - } - ] - } - }, - "proposedDelta": null, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "feast-test", - "properties": null - } -}, -{ - "auditHeader": null, - "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.MLFeatureTableSnapshot": { - "urn": "urn:li:mlFeatureTable:(urn:li:dataPlatform:feast,test_feature_table_no_labels)", - "aspects": [ - { - "com.linkedin.pegasus2avro.common.BrowsePaths": { - "paths": [ - "feast/test_feature_table_no_labels" - ] - } - }, - { - "com.linkedin.pegasus2avro.ml.metadata.MLFeatureTableProperties": { - "customProperties": {}, - "description": null, - "mlFeatures": [ - "urn:li:mlFeature:(test_feature_table_no_labels,test_BYTES_feature)" - ], - "mlPrimaryKeys": [ - "urn:li:mlPrimaryKey:(test_feature_table_no_labels,dummy_entity_2)" - ] - } - } - ] - } - }, - "proposedDelta": null, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "feast-test", - "properties": null - } -}, -{ - "auditHeader": null, - "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.MLPrimaryKeySnapshot": { - "urn": "urn:li:mlPrimaryKey:(test_feature_table_single_feature,dummy_entity_1)", - "aspects": [ - { - "com.linkedin.pegasus2avro.ml.metadata.MLPrimaryKeyProperties": { - "description": "Dummy entity 1", - "dataType": "TEXT", - "version": null, - "sources": [ - "urn:li:dataset:(urn:li:dataPlatform:file,feast.*,PROD)" - ] - } - } - ] - } - }, - "proposedDelta": null, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "feast-test", - "properties": null - } -}, -{ - "auditHeader": null, - "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.MLFeatureSnapshot": { - "urn": "urn:li:mlFeature:(test_feature_table_single_feature,test_BYTES_feature)", - "aspects": [ - { - "com.linkedin.pegasus2avro.ml.metadata.MLFeatureProperties": { - "description": null, - "dataType": "BYTE", - "version": null, - "sources": [ - "urn:li:dataset:(urn:li:dataPlatform:file,feast.*,PROD)" - ] - } - } - ] - } - }, - "proposedDelta": null, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "feast-test", - "properties": null - } -}, -{ - "auditHeader": null, - "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.MLFeatureTableSnapshot": { - "urn": "urn:li:mlFeatureTable:(urn:li:dataPlatform:feast,test_feature_table_single_feature)", - "aspects": [ - { - "com.linkedin.pegasus2avro.common.BrowsePaths": { - "paths": [ - "feast/test_feature_table_single_feature" - ] - } - }, - { - "com.linkedin.pegasus2avro.ml.metadata.MLFeatureTableProperties": { - "customProperties": {}, - "description": null, - "mlFeatures": [ - "urn:li:mlFeature:(test_feature_table_single_feature,test_BYTES_feature)" - ], - "mlPrimaryKeys": [ - "urn:li:mlPrimaryKey:(test_feature_table_single_feature,dummy_entity_1)" - ] - } - } - ] - } - }, - "proposedDelta": null, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "feast-test", - "properties": null - } -} -] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/feast-legacy/make_tests.py b/metadata-ingestion/tests/integration/feast-legacy/make_tests.py deleted file mode 100644 index 59ec7c89d0a98..0000000000000 --- a/metadata-ingestion/tests/integration/feast-legacy/make_tests.py +++ /dev/null @@ -1,115 +0,0 @@ -import socket - -import feast - -FEAST_MIN_VERSION = "0.18.0" -if feast.__version__ <= FEAST_MIN_VERSION: - from feast import Client # type: ignore - from feast.data_format import ParquetFormat - from feast.data_source import FileSource # type: ignore - from feast.entity import Entity - from feast.feature import Feature - from feast.feature_table import FeatureTable # type: ignore - from feast.value_type import ValueType - - -if __name__ == "__main__": - if feast.__version__ > FEAST_MIN_VERSION: - raise Exception( - f"this code does not work with feast > {FEAST_MIN_VERSION}. Found {feast.__version__}" - ) - - test_client = Client(core_url="testfeast:6565") - - # create dummy entity since Feast demands it - entity_1 = Entity( - name="dummy_entity_1", - description="Dummy entity 1", - value_type=ValueType.STRING, - labels={"key": "val"}, - ) - - # create dummy entity since Feast demands it - entity_2 = Entity( - name="dummy_entity_2", - description="Dummy entity 2", - value_type=ValueType.INT32, - labels={"key": "val"}, - ) - - # commit entities - test_client.apply([entity_1, entity_2]) - - # dummy file source - batch_source = FileSource( - file_format=ParquetFormat(), - file_url="file://feast/*", - event_timestamp_column="ts_col", - created_timestamp_column="timestamp", - date_partition_column="date_partition_col", - ) - - # first feature table for testing, with all of Feast's datatypes - table_1 = FeatureTable( - name="test_feature_table_all_feature_dtypes", - features=[ - Feature(name="test_BYTES_feature", dtype=ValueType.BYTES), - Feature(name="test_STRING_feature", dtype=ValueType.STRING), - Feature(name="test_INT32_feature", dtype=ValueType.INT32), - Feature(name="test_INT64_feature", dtype=ValueType.INT64), - Feature(name="test_DOUBLE_feature", dtype=ValueType.DOUBLE), - Feature(name="test_FLOAT_feature", dtype=ValueType.FLOAT), - Feature(name="test_BOOL_feature", dtype=ValueType.BOOL), - Feature(name="test_BYTES_LIST_feature", dtype=ValueType.BYTES_LIST), - Feature(name="test_STRING_LIST_feature", dtype=ValueType.STRING_LIST), - Feature(name="test_INT32_LIST_feature", dtype=ValueType.INT32_LIST), - Feature(name="test_INT64_LIST_feature", dtype=ValueType.INT64_LIST), - Feature(name="test_DOUBLE_LIST_feature", dtype=ValueType.DOUBLE_LIST), - Feature(name="test_FLOAT_LIST_feature", dtype=ValueType.FLOAT_LIST), - Feature(name="test_BOOL_LIST_feature", dtype=ValueType.BOOL_LIST), - ], - entities=["dummy_entity_1", "dummy_entity_2"], - labels={"team": "matchmaking"}, - batch_source=batch_source, - ) - - # second feature table for testing, with just a single feature - table_2 = FeatureTable( - name="test_feature_table_single_feature", - features=[ - Feature(name="test_BYTES_feature", dtype=ValueType.BYTES), - ], - entities=["dummy_entity_1"], - labels={"team": "matchmaking"}, - batch_source=batch_source, - ) - - # third feature table for testing, no labels - table_3 = FeatureTable( - name="test_feature_table_no_labels", - features=[ - Feature(name="test_BYTES_feature", dtype=ValueType.BYTES), - ], - entities=["dummy_entity_2"], - labels={}, - batch_source=batch_source, - ) - - # commit the tables to the feature store - test_client.apply([table_1, table_2, table_3]) - - print("make_tests.py setup finished") - - sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) - - # listen to port 6789 once done so test script knows when to start ingestion - server_address = ("localhost", 6789) - sock.bind(server_address) - - sock.listen(1) - - print("make_tests.py listening on 6789") - - while True: - # Wait for a connection - connection, client_address = sock.accept() diff --git a/metadata-ingestion/tests/integration/feast-legacy/serving/online-serving.yml b/metadata-ingestion/tests/integration/feast-legacy/serving/online-serving.yml deleted file mode 100644 index f0cb272a2a924..0000000000000 --- a/metadata-ingestion/tests/integration/feast-legacy/serving/online-serving.yml +++ /dev/null @@ -1,6 +0,0 @@ -COMPOSE_PROJECT_NAME=feast -FEAST_VERSION=develop -FEAST_CORE_CONFIG=./core/core.yml -FEAST_ONLINE_SERVING_CONFIG=./serving/online-serving.yml -GCP_SERVICE_ACCOUNT=./gcp-service-accounts/placeholder.json -INGESTION_JAR_PATH=https://storage.googleapis.com/feast-jobs/spark/ingestion/feast-ingestion-spark-develop.jar diff --git a/metadata-ingestion/tests/integration/feast-legacy/test_feast.py b/metadata-ingestion/tests/integration/feast-legacy/test_feast.py deleted file mode 100644 index ce55ca336f149..0000000000000 --- a/metadata-ingestion/tests/integration/feast-legacy/test_feast.py +++ /dev/null @@ -1,57 +0,0 @@ -import pytest -from freezegun import freeze_time - -from datahub.ingestion.run.pipeline import Pipeline -from tests.test_helpers import mce_helpers - -# from datahub.ingestion.run.pipeline import Pipeline -# from tests.test_helpers import mce_helpers -from tests.test_helpers.docker_helpers import wait_for_port - -FROZEN_TIME = "2020-04-14 07:00:00" - - -# make sure that mock_time is excluded here because it messes with feast -@freeze_time(FROZEN_TIME) -@pytest.mark.integration_batch_1 -def test_feast_ingest(docker_compose_runner, pytestconfig, tmp_path): - test_resources_dir = pytestconfig.rootpath / "tests/integration/feast-legacy" - - with docker_compose_runner( - test_resources_dir / "docker-compose.yml", "feast" - ) as docker_services: - wait_for_port(docker_services, "testfeast", 6565, timeout=120) - - # container listens to this port once test cases have been setup - wait_for_port( - docker_services, "testfeast_setup", 6789, timeout=120, hostname="localhost" - ) - - # Run the metadata ingestion pipeline. - pipeline = Pipeline.create( - { - "run_id": "feast-test", - "source": { - "type": "feast-legacy", - "config": { - "core_url": "localhost:6565", - "use_local_build": True, - }, - }, - "sink": { - "type": "file", - "config": { - "filename": f"{tmp_path}/feast_mces.json", - }, - }, - } - ) - pipeline.run() - pipeline.raise_from_status() - - # Verify the output. - mce_helpers.check_golden_file( - pytestconfig, - output_path=tmp_path / "feast_mces.json", - golden_path=test_resources_dir / "feast_mces_golden.json", - ) diff --git a/metadata-ingestion/tests/integration/feast-legacy/wait-for-it.sh b/metadata-ingestion/tests/integration/feast-legacy/wait-for-it.sh deleted file mode 100755 index 5b551220d9ee5..0000000000000 --- a/metadata-ingestion/tests/integration/feast-legacy/wait-for-it.sh +++ /dev/null @@ -1,183 +0,0 @@ -#!/usr/bin/env bash -# Use this script to test if a given TCP host/port are available -# from https://github.com/vishnubob/wait-for-it - -WAITFORIT_cmdname=${0##*/} - -echoerr() { if [[ $WAITFORIT_QUIET -ne 1 ]]; then echo "$@" 1>&2; fi } - -usage() -{ - cat << USAGE >&2 -Usage: - $WAITFORIT_cmdname host:port [-s] [-t timeout] [-- command args] - -h HOST | --host=HOST Host or IP under test - -p PORT | --port=PORT TCP port under test - Alternatively, you specify the host and port as host:port - -s | --strict Only execute subcommand if the test succeeds - -q | --quiet Don't output any status messages - -t TIMEOUT | --timeout=TIMEOUT - Timeout in seconds, zero for no timeout - -- COMMAND ARGS Execute command with args after the test finishes -USAGE - exit 1 -} - -wait_for() -{ - if [[ $WAITFORIT_TIMEOUT -gt 0 ]]; then - echoerr "$WAITFORIT_cmdname: waiting $WAITFORIT_TIMEOUT seconds for $WAITFORIT_HOST:$WAITFORIT_PORT" - else - echoerr "$WAITFORIT_cmdname: waiting for $WAITFORIT_HOST:$WAITFORIT_PORT without a timeout" - fi - WAITFORIT_start_ts=$(date +%s) - while : - do - if [[ $WAITFORIT_ISBUSY -eq 1 ]]; then - nc -z $WAITFORIT_HOST $WAITFORIT_PORT - WAITFORIT_result=$? - else - (echo -n > /dev/tcp/$WAITFORIT_HOST/$WAITFORIT_PORT) >/dev/null 2>&1 - WAITFORIT_result=$? - fi - if [[ $WAITFORIT_result -eq 0 ]]; then - WAITFORIT_end_ts=$(date +%s) - echoerr "$WAITFORIT_cmdname: $WAITFORIT_HOST:$WAITFORIT_PORT is available after $((WAITFORIT_end_ts - WAITFORIT_start_ts)) seconds" - break - fi - sleep 1 - done - return $WAITFORIT_result -} - -wait_for_wrapper() -{ - # In order to support SIGINT during timeout: http://unix.stackexchange.com/a/57692 - if [[ $WAITFORIT_QUIET -eq 1 ]]; then - timeout $WAITFORIT_BUSYTIMEFLAG $WAITFORIT_TIMEOUT $0 --quiet --child --host=$WAITFORIT_HOST --port=$WAITFORIT_PORT --timeout=$WAITFORIT_TIMEOUT & - else - timeout $WAITFORIT_BUSYTIMEFLAG $WAITFORIT_TIMEOUT $0 --child --host=$WAITFORIT_HOST --port=$WAITFORIT_PORT --timeout=$WAITFORIT_TIMEOUT & - fi - WAITFORIT_PID=$! - trap "kill -INT -$WAITFORIT_PID" INT - wait $WAITFORIT_PID - WAITFORIT_RESULT=$? - if [[ $WAITFORIT_RESULT -ne 0 ]]; then - echoerr "$WAITFORIT_cmdname: timeout occurred after waiting $WAITFORIT_TIMEOUT seconds for $WAITFORIT_HOST:$WAITFORIT_PORT" - fi - return $WAITFORIT_RESULT -} - -# process arguments -while [[ $# -gt 0 ]] -do - case "$1" in - *:* ) - WAITFORIT_hostport=(${1//:/ }) - WAITFORIT_HOST=${WAITFORIT_hostport[0]} - WAITFORIT_PORT=${WAITFORIT_hostport[1]} - shift 1 - ;; - --child) - WAITFORIT_CHILD=1 - shift 1 - ;; - -q | --quiet) - WAITFORIT_QUIET=1 - shift 1 - ;; - -s | --strict) - WAITFORIT_STRICT=1 - shift 1 - ;; - -h) - WAITFORIT_HOST="$2" - if [[ $WAITFORIT_HOST == "" ]]; then break; fi - shift 2 - ;; - --host=*) - WAITFORIT_HOST="${1#*=}" - shift 1 - ;; - -p) - WAITFORIT_PORT="$2" - if [[ $WAITFORIT_PORT == "" ]]; then break; fi - shift 2 - ;; - --port=*) - WAITFORIT_PORT="${1#*=}" - shift 1 - ;; - -t) - WAITFORIT_TIMEOUT="$2" - if [[ $WAITFORIT_TIMEOUT == "" ]]; then break; fi - shift 2 - ;; - --timeout=*) - WAITFORIT_TIMEOUT="${1#*=}" - shift 1 - ;; - --) - shift - WAITFORIT_CLI=("$@") - break - ;; - --help) - usage - ;; - *) - echoerr "Unknown argument: $1" - usage - ;; - esac -done - -if [[ "$WAITFORIT_HOST" == "" || "$WAITFORIT_PORT" == "" ]]; then - echoerr "Error: you need to provide a host and port to test." - usage -fi - -WAITFORIT_TIMEOUT=${WAITFORIT_TIMEOUT:-15} -WAITFORIT_STRICT=${WAITFORIT_STRICT:-0} -WAITFORIT_CHILD=${WAITFORIT_CHILD:-0} -WAITFORIT_QUIET=${WAITFORIT_QUIET:-0} - -# Check to see if timeout is from busybox? -WAITFORIT_TIMEOUT_PATH=$(type -p timeout) -WAITFORIT_TIMEOUT_PATH=$(realpath $WAITFORIT_TIMEOUT_PATH 2>/dev/null || readlink -f $WAITFORIT_TIMEOUT_PATH) - -WAITFORIT_BUSYTIMEFLAG="" -if [[ $WAITFORIT_TIMEOUT_PATH =~ "busybox" ]]; then - WAITFORIT_ISBUSY=1 - # Check if busybox timeout uses -t flag - # (recent Alpine versions don't support -t anymore) - if timeout &>/dev/stdout | grep -q -e '-t '; then - WAITFORIT_BUSYTIMEFLAG="-t" - fi -else - WAITFORIT_ISBUSY=0 -fi - -if [[ $WAITFORIT_CHILD -gt 0 ]]; then - wait_for - WAITFORIT_RESULT=$? - exit $WAITFORIT_RESULT -else - if [[ $WAITFORIT_TIMEOUT -gt 0 ]]; then - wait_for_wrapper - WAITFORIT_RESULT=$? - else - wait_for - WAITFORIT_RESULT=$? - fi -fi - -if [[ $WAITFORIT_CLI != "" ]]; then - if [[ $WAITFORIT_RESULT -ne 0 && $WAITFORIT_STRICT -eq 1 ]]; then - echoerr "$WAITFORIT_cmdname: strict mode, refusing to execute subprocess" - exit $WAITFORIT_RESULT - fi - exec "${WAITFORIT_CLI[@]}" -else - exit $WAITFORIT_RESULT -fi \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/kafka-connect/docker-compose.override.yml b/metadata-ingestion/tests/integration/kafka-connect/docker-compose.override.yml index 77128adccaaae..e33e776be0d13 100644 --- a/metadata-ingestion/tests/integration/kafka-connect/docker-compose.override.yml +++ b/metadata-ingestion/tests/integration/kafka-connect/docker-compose.override.yml @@ -11,6 +11,7 @@ services: - zookeeper - broker - mysqldb + - mongo ports: - "58083:58083" # volumes: @@ -34,6 +35,8 @@ services: # #confluent-hub install --no-prompt wepay/kafka-connect-bigquery:1.6.8 # + confluent-hub install --no-prompt mongodb/kafka-connect-mongodb:1.8.0 + # curl -k -SL "https://dev.mysql.com/get/Downloads/Connector-J/mysql-connector-java-8.0.27.tar.gz" \ | tar -xzf - -C /usr/share/confluent-hub-components/confluentinc-kafka-connect-jdbc/lib \ --strip-components=1 mysql-connector-java-8.0.27/mysql-connector-java-8.0.27.jar @@ -71,5 +74,21 @@ services: ports: - "5432:5432" + mongo: + hostname: mongo + image: mongo:4.2.9 + container_name: "test_mongo" + ports: + - "27017:27017" + command: --replSet rs0 + environment: + - MONGO_INITDB_ROOT_USERNAME=admin + - MONGO_INITDB_ROOT_PASSWORD=admin + - MONGO_INITDB_DATABASE=test_db + - MONGO_INITDB_USERNAME=kafka-connector + - MONGO_INITDB_PASSWORD=password + volumes: + - ./../kafka-connect/setup/conf/mongo-init.sh:/docker-entrypoint-initdb.d/mongo-init.sh:ro + volumes: test_zkdata: \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/kafka-connect/kafka_connect_mongo_mces_golden.json b/metadata-ingestion/tests/integration/kafka-connect/kafka_connect_mongo_mces_golden.json new file mode 100644 index 0000000000000..d95529b32b9f6 --- /dev/null +++ b/metadata-ingestion/tests/integration/kafka-connect/kafka_connect_mongo_mces_golden.json @@ -0,0 +1,72 @@ +[ + { + "entityType": "dataFlow", + "entityUrn": "urn:li:dataFlow:(kafka-connect,source_mongodb_connector,PROD)", + "changeType": "UPSERT", + "aspectName": "dataFlowInfo", + "aspect": { + "value": "{\"customProperties\": {}, \"name\": \"source_mongodb_connector\", \"description\": \"Source connector using `com.mongodb.kafka.connect.MongoSourceConnector` plugin.\"}", + "contentType": "application/json" + }, + "systemMetadata": { + "lastObserved": 1635166800000, + "runId": "kafka-connect-run" + } + }, + { + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(kafka-connect,source_mongodb_connector,PROD),test_db.purchases)", + "changeType": "UPSERT", + "aspectName": "dataJobInfo", + "aspect": { + "value": "{\"customProperties\": {}, \"name\": \"source_mongodb_connector:test_db.purchases\", \"type\": {\"string\": \"COMMAND\"}}", + "contentType": "application/json" + }, + "systemMetadata": { + "lastObserved": 1635166800000, + "runId": "kafka-connect-run" + } + }, + { + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(kafka-connect,source_mongodb_connector,PROD),test_db.purchases)", + "changeType": "UPSERT", + "aspectName": "dataJobInputOutput", + "aspect": { + "value": "{\"inputDatasets\": [\"urn:li:dataset:(urn:li:dataPlatform:mongodb,test_db.purchases,PROD)\"], \"outputDatasets\": [\"urn:li:dataset:(urn:li:dataPlatform:kafka,mongodb.test_db.purchases,PROD)\"]}", + "contentType": "application/json" + }, + "systemMetadata": { + "lastObserved": 1635166800000, + "runId": "kafka-connect-run" + } + }, + { + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:kafka,mongodb.test_db.purchases,PROD)", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "value": "{\"platform\": \"urn:li:dataPlatform:kafka\"}", + "contentType": "application/json" + }, + "systemMetadata": { + "lastObserved": 1635166800000, + "runId": "kafka-connect-run" + } + }, + { + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,test_db.purchases,PROD)", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "value": "{\"platform\": \"urn:li:dataPlatform:mongodb\"}", + "contentType": "application/json" + }, + "systemMetadata": { + "lastObserved": 1635166800000, + "runId": "kafka-connect-run" + } + } +] diff --git a/metadata-ingestion/tests/integration/kafka-connect/setup/conf/mongo-init.sh b/metadata-ingestion/tests/integration/kafka-connect/setup/conf/mongo-init.sh new file mode 100644 index 0000000000000..acd8424e5e7c2 --- /dev/null +++ b/metadata-ingestion/tests/integration/kafka-connect/setup/conf/mongo-init.sh @@ -0,0 +1,22 @@ +#!/bin/bash + +mongo -- "$MONGO_INITDB_DATABASE" <<-EOJS + conn = new Mongo(); + db = conn.getDB("test_db"); + db.purchases.insertOne({ _id: 3, item: "lamp post", price: 12 }); + db.purchases.insertOne({ _id: 4, item: "lamp post", price: 13 }); +EOJS + + +{ +sleep 3 && +mongo -- "$MONGO_INITDB_DATABASE" <<-EOJS + var rootUser = '$MONGO_INITDB_ROOT_USERNAME'; + var rootPassword = '$MONGO_INITDB_ROOT_PASSWORD'; + var admin = db.getSiblingDB('admin'); + admin.auth(rootUser, rootPassword); +EOJS +} & + + + diff --git a/metadata-ingestion/tests/integration/kafka-connect/test_kafka_connect.py b/metadata-ingestion/tests/integration/kafka-connect/test_kafka_connect.py index 0547d354e1670..75a4ca89e5466 100644 --- a/metadata-ingestion/tests/integration/kafka-connect/test_kafka_connect.py +++ b/metadata-ingestion/tests/integration/kafka-connect/test_kafka_connect.py @@ -53,6 +53,7 @@ def test_kafka_connect_ingest(docker_compose_runner, pytestconfig, tmp_path, moc ).status_code == 200, ) + # Creating MySQL source with no transformations , only topic prefix r = requests.post( "http://localhost:58083/connectors", @@ -252,3 +253,88 @@ def test_kafka_connect_ingest(docker_compose_runner, pytestconfig, tmp_path, moc golden_path=test_resources_dir / "kafka_connect_mces_golden.json", ignore_paths=[], ) + + +@freeze_time(FROZEN_TIME) +@pytest.mark.integration_batch_1 +def test_kafka_connect_mongosourceconnect_ingest( + docker_compose_runner, pytestconfig, tmp_path, mock_time +): + test_resources_dir = pytestconfig.rootpath / "tests/integration/kafka-connect" + test_resources_dir_kafka = pytestconfig.rootpath / "tests/integration/kafka" + + # Share Compose configurations between files and projects + # https://docs.docker.com/compose/extends/ + docker_compose_file = [ + str(test_resources_dir_kafka / "docker-compose.yml"), + str(test_resources_dir / "docker-compose.override.yml"), + ] + with docker_compose_runner(docker_compose_file, "kafka-connect") as docker_services: + time.sleep(10) + # Run the setup.sql file to populate the database. + command = 'docker exec test_mongo mongo admin -u admin -p admin --eval "rs.initiate();"' + ret = subprocess.run( + command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE + ) + assert ret.returncode == 0 + time.sleep(10) + + wait_for_port(docker_services, "test_broker", 59092, timeout=120) + wait_for_port(docker_services, "test_connect", 58083, timeout=120) + docker_services.wait_until_responsive( + timeout=30, + pause=1, + check=lambda: requests.get( + "http://localhost:58083/connectors", + ).status_code + == 200, + ) + + # Creating MongoDB source + r = requests.post( + "http://localhost:58083/connectors", + headers={"Content-Type": "application/json"}, + data=r"""{ + "name": "source_mongodb_connector", + "config": { + "tasks.max": "1", + "connector.class": "com.mongodb.kafka.connect.MongoSourceConnector", + "connection.uri": "mongodb://admin:admin@test_mongo:27017", + "topic.prefix": "mongodb", + "database": "test_db", + "collection": "purchases", + "copy.existing": true, + "copy.existing.namespace.regex": "test_db.purchases", + "change.stream.full.document": "updateLookup", + "topic.creation.enable": "true", + "topic.creation.default.replication.factor": "-1", + "topic.creation.default.partitions": "-1", + "output.json.formatter": "com.mongodb.kafka.connect.source.json.formatter.SimplifiedJson", + "key.converter": "org.apache.kafka.connect.storage.StringConverter", + "value.converter": "org.apache.kafka.connect.storage.StringConverter", + "key.converter.schemas.enable": false, + "value.converter.schemas.enable": false, + "output.format.key": "schema", + "output.format.value": "json", + "output.schema.infer.value": false, + "publish.full.document.only":true + } + }""", + ) + r.raise_for_status() + assert r.status_code == 201 # Created + + # Give time for connectors to process the table data + time.sleep(60) + + # Run the metadata ingestion pipeline. + config_file = (test_resources_dir / "kafka_connect_to_file.yml").resolve() + run_datahub_cmd(["ingest", "-c", f"{config_file}"], tmp_path=tmp_path) + + # Verify the output. + mce_helpers.check_golden_file( + pytestconfig, + output_path=tmp_path / "kafka_connect_mces.json", + golden_path=test_resources_dir / "kafka_connect_mongo_mces_golden.json", + ignore_paths=[], + ) diff --git a/metadata-ingestion/tests/integration/mysql/mysql_mces_no_db_golden.json b/metadata-ingestion/tests/integration/mysql/mysql_mces_no_db_golden.json index 307ce8385e458..ee49fbbf2cceb 100644 --- a/metadata-ingestion/tests/integration/mysql/mysql_mces_no_db_golden.json +++ b/metadata-ingestion/tests/integration/mysql/mysql_mces_no_db_golden.json @@ -333,7 +333,7 @@ "changeType": "UPSERT", "aspectName": "datasetProfile", "aspect": { - "value": "{\"timestampMillis\": 1586847600000, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"rowCount\": 10, \"columnCount\": 6, \"fieldProfiles\": [{\"fieldPath\": \"emp_no\", \"uniqueCount\": 10, \"uniqueProportion\": 1, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"10001\", \"10002\", \"10003\", \"10004\", \"10005\", \"10006\", \"10007\", \"10008\", \"10009\", \"10010\"]}, {\"fieldPath\": \"birth_date\", \"uniqueCount\": 10, \"uniqueProportion\": 1, \"nullCount\": 0, \"nullProportion\": 0.0, \"min\": \"1952-04-19\", \"max\": \"1964-06-02\", \"sampleValues\": [\"1953-09-02\", \"1964-06-02\", \"1959-12-03\", \"1954-05-01\", \"1955-01-21\", \"1953-04-20\", \"1957-05-23\", \"1958-02-19\", \"1952-04-19\", \"1963-06-01\"]}, {\"fieldPath\": \"first_name\", \"uniqueCount\": 10, \"uniqueProportion\": 1, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"Georgi\", \"Bezalel\", \"Parto\", \"Chirstian\", \"Kyoichi\", \"Anneke\", \"Tzvetan\", \"Saniya\", \"Sumant\", \"Duangkaew\"]}, {\"fieldPath\": \"last_name\", \"uniqueCount\": 10, \"uniqueProportion\": 1, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"Facello\", \"Simmel\", \"Bamford\", \"Koblick\", \"Maliniak\", \"Preusig\", \"Zielinski\", \"Kalloufi\", \"Peac\", \"Piveteau\"]}, {\"fieldPath\": \"gender\", \"uniqueCount\": 2, \"uniqueProportion\": 0.2, \"nullCount\": 0, \"nullProportion\": 0.0, \"distinctValueFrequencies\": [{\"value\": \"M\", \"frequency\": 5}, {\"value\": \"F\", \"frequency\": 5}], \"sampleValues\": [\"M\", \"F\", \"M\", \"M\", \"M\", \"F\", \"F\", \"M\", \"F\", \"F\"]}, {\"fieldPath\": \"hire_date\", \"uniqueCount\": 10, \"uniqueProportion\": 1, \"nullCount\": 0, \"nullProportion\": 0.0, \"min\": \"1985-02-18\", \"max\": \"1994-09-15\", \"sampleValues\": [\"1986-06-26\", \"1985-11-21\", \"1986-08-28\", \"1986-12-01\", \"1989-09-12\", \"1989-06-02\", \"1989-02-10\", \"1994-09-15\", \"1985-02-18\", \"1989-08-24\"]}]}", + "value": "{\"timestampMillis\": 1586847600000, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"rowCount\": 10, \"columnCount\": 6, \"fieldProfiles\": [{\"fieldPath\": \"emp_no\", \"uniqueCount\": 10, \"uniqueProportion\": 1, \"nullCount\": 0, \"nullProportion\": 0.0, \"min\": \"10001\", \"max\": \"10010\", \"mean\": \"10005.5\", \"median\": \"10005.5\", \"stdev\": \"3.0276503540974917\", \"sampleValues\": [\"10001\", \"10002\", \"10003\", \"10004\", \"10005\", \"10006\", \"10007\", \"10008\", \"10009\", \"10010\"]}, {\"fieldPath\": \"birth_date\", \"uniqueCount\": 10, \"uniqueProportion\": 1, \"nullCount\": 0, \"nullProportion\": 0.0, \"min\": \"1952-04-19\", \"max\": \"1964-06-02\", \"sampleValues\": [\"1953-09-02\", \"1964-06-02\", \"1959-12-03\", \"1954-05-01\", \"1955-01-21\", \"1953-04-20\", \"1957-05-23\", \"1958-02-19\", \"1952-04-19\", \"1963-06-01\"]}, {\"fieldPath\": \"first_name\", \"uniqueCount\": 10, \"uniqueProportion\": 1, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"Georgi\", \"Bezalel\", \"Parto\", \"Chirstian\", \"Kyoichi\", \"Anneke\", \"Tzvetan\", \"Saniya\", \"Sumant\", \"Duangkaew\"]}, {\"fieldPath\": \"last_name\", \"uniqueCount\": 10, \"uniqueProportion\": 1, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"Facello\", \"Simmel\", \"Bamford\", \"Koblick\", \"Maliniak\", \"Preusig\", \"Zielinski\", \"Kalloufi\", \"Peac\", \"Piveteau\"]}, {\"fieldPath\": \"gender\", \"uniqueCount\": 2, \"uniqueProportion\": 0.2, \"nullCount\": 0, \"nullProportion\": 0.0, \"distinctValueFrequencies\": [{\"value\": \"M\", \"frequency\": 5}, {\"value\": \"F\", \"frequency\": 5}], \"sampleValues\": [\"M\", \"F\", \"M\", \"M\", \"M\", \"F\", \"F\", \"M\", \"F\", \"F\"]}, {\"fieldPath\": \"hire_date\", \"uniqueCount\": 10, \"uniqueProportion\": 1, \"nullCount\": 0, \"nullProportion\": 0.0, \"min\": \"1985-02-18\", \"max\": \"1994-09-15\", \"sampleValues\": [\"1986-06-26\", \"1985-11-21\", \"1986-08-28\", \"1986-12-01\", \"1989-09-12\", \"1989-06-02\", \"1989-02-10\", \"1994-09-15\", \"1985-02-18\", \"1989-08-24\"]}]}", "contentType": "application/json" }, "systemMetadata": { @@ -1264,7 +1264,7 @@ "changeType": "UPSERT", "aspectName": "datasetProfile", "aspect": { - "value": "{\"timestampMillis\": 1586847600000, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"rowCount\": 5, \"columnCount\": 6, \"fieldProfiles\": [{\"fieldPath\": \"id\", \"uniqueCount\": 5, \"uniqueProportion\": 1, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"1\", \"2\", \"3\", \"4\", \"5\"]}, {\"fieldPath\": \"company\", \"uniqueCount\": 5, \"uniqueProportion\": 1, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"Company A\", \"Company B\", \"Company C\", \"Company D\", \"Company E\"]}, {\"fieldPath\": \"last_name\", \"uniqueCount\": 5, \"uniqueProportion\": 1, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"Axen\", \"Bedecs\", \"Donnell\", \"Gratacos Solsona\", \"Lee\"]}, {\"fieldPath\": \"first_name\", \"uniqueCount\": 5, \"uniqueProportion\": 1, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"Anna\", \"Antonio\", \"Christina\", \"Martin\", \"Thomas\"]}, {\"fieldPath\": \"email_address\", \"uniqueCount\": 0, \"nullCount\": 5, \"nullProportion\": 1, \"sampleValues\": []}, {\"fieldPath\": \"priority\", \"uniqueCount\": 3, \"uniqueProportion\": 0.75, \"nullCount\": 1, \"nullProportion\": 0.2, \"min\": \"3.8\", \"max\": \"4.9\", \"mean\": \"4.175000011920929\", \"median\": \"4.0\", \"distinctValueFrequencies\": [{\"value\": \"3.8\", \"frequency\": 1}, {\"value\": \"4.0\", \"frequency\": 2}, {\"value\": \"4.9\", \"frequency\": 1}], \"sampleValues\": [\"4.0\", \"4.9\", \"4.0\", \"3.8\"]}]}", + "value": "{\"timestampMillis\": 1586847600000, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"rowCount\": 5, \"columnCount\": 6, \"fieldProfiles\": [{\"fieldPath\": \"id\", \"uniqueCount\": 5, \"uniqueProportion\": 1, \"nullCount\": 0, \"nullProportion\": 0.0, \"min\": \"1\", \"max\": \"5\", \"mean\": \"3.0\", \"median\": \"3\", \"stdev\": \"1.5811388300841898\", \"sampleValues\": [\"1\", \"2\", \"3\", \"4\", \"5\"]}, {\"fieldPath\": \"company\", \"uniqueCount\": 5, \"uniqueProportion\": 1, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"Company A\", \"Company B\", \"Company C\", \"Company D\", \"Company E\"]}, {\"fieldPath\": \"last_name\", \"uniqueCount\": 5, \"uniqueProportion\": 1, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"Axen\", \"Bedecs\", \"Donnell\", \"Gratacos Solsona\", \"Lee\"]}, {\"fieldPath\": \"first_name\", \"uniqueCount\": 5, \"uniqueProportion\": 1, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"Anna\", \"Antonio\", \"Christina\", \"Martin\", \"Thomas\"]}, {\"fieldPath\": \"email_address\", \"uniqueCount\": 0, \"nullCount\": 5, \"nullProportion\": 1, \"sampleValues\": []}, {\"fieldPath\": \"priority\", \"uniqueCount\": 3, \"uniqueProportion\": 0.75, \"nullCount\": 1, \"nullProportion\": 0.2, \"min\": \"3.8\", \"max\": \"4.9\", \"mean\": \"4.175000011920929\", \"median\": \"4.0\", \"stdev\": \"0.49244294899530355\", \"distinctValueFrequencies\": [{\"value\": \"3.8\", \"frequency\": 1}, {\"value\": \"4.0\", \"frequency\": 2}, {\"value\": \"4.9\", \"frequency\": 1}], \"sampleValues\": [\"4.0\", \"4.9\", \"4.0\", \"3.8\"]}]}", "contentType": "application/json" }, "systemMetadata": { @@ -1278,7 +1278,7 @@ "changeType": "UPSERT", "aspectName": "datasetProfile", "aspect": { - "value": "{\"timestampMillis\": 1586847600000, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"rowCount\": 0, \"columnCount\": 3, \"fieldProfiles\": [{\"fieldPath\": \"id\", \"uniqueCount\": 0, \"nullCount\": 0, \"sampleValues\": []}, {\"fieldPath\": \"description\", \"uniqueCount\": 0, \"nullCount\": 0, \"sampleValues\": []}, {\"fieldPath\": \"customer_id\", \"uniqueCount\": 0, \"nullCount\": 0, \"sampleValues\": []}]}", + "value": "{\"timestampMillis\": 1586847600000, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"rowCount\": 0, \"columnCount\": 3, \"fieldProfiles\": [{\"fieldPath\": \"id\", \"uniqueCount\": 0, \"nullCount\": 0, \"min\": \"None\", \"max\": \"None\", \"mean\": \"None\", \"median\": \"None\", \"stdev\": \"0.0\", \"sampleValues\": []}, {\"fieldPath\": \"description\", \"uniqueCount\": 0, \"nullCount\": 0, \"sampleValues\": []}, {\"fieldPath\": \"customer_id\", \"uniqueCount\": 0, \"nullCount\": 0, \"min\": \"None\", \"max\": \"None\", \"mean\": \"None\", \"median\": \"None\", \"stdev\": \"0.0\", \"sampleValues\": []}]}", "contentType": "application/json" }, "systemMetadata": { diff --git a/metadata-ingestion/tests/integration/mysql/mysql_mces_with_db_golden.json b/metadata-ingestion/tests/integration/mysql/mysql_mces_with_db_golden.json index 707591216d455..a5b66f8469f51 100644 --- a/metadata-ingestion/tests/integration/mysql/mysql_mces_with_db_golden.json +++ b/metadata-ingestion/tests/integration/mysql/mysql_mces_with_db_golden.json @@ -375,7 +375,7 @@ "changeType": "UPSERT", "aspectName": "datasetProfile", "aspect": { - "value": "{\"timestampMillis\": 1586847600000, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"rowCount\": 5, \"columnCount\": 6, \"fieldProfiles\": [{\"fieldPath\": \"id\", \"uniqueCount\": 5, \"uniqueProportion\": 1, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"1\", \"2\", \"3\", \"4\", \"5\"]}, {\"fieldPath\": \"company\", \"uniqueCount\": 5, \"uniqueProportion\": 1, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"Company A\", \"Company B\", \"Company C\", \"Company D\", \"Company E\"]}, {\"fieldPath\": \"last_name\", \"uniqueCount\": 5, \"uniqueProportion\": 1, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"Axen\", \"Bedecs\", \"Donnell\", \"Gratacos Solsona\", \"Lee\"]}, {\"fieldPath\": \"first_name\", \"uniqueCount\": 5, \"uniqueProportion\": 1, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"Anna\", \"Antonio\", \"Christina\", \"Martin\", \"Thomas\"]}, {\"fieldPath\": \"email_address\", \"uniqueCount\": 0, \"nullCount\": 5, \"nullProportion\": 1, \"sampleValues\": []}, {\"fieldPath\": \"priority\", \"uniqueCount\": 3, \"uniqueProportion\": 0.75, \"nullCount\": 1, \"nullProportion\": 0.2, \"min\": \"3.8\", \"max\": \"4.9\", \"mean\": \"4.175000011920929\", \"median\": \"4.0\", \"distinctValueFrequencies\": [{\"value\": \"3.8\", \"frequency\": 1}, {\"value\": \"4.0\", \"frequency\": 2}, {\"value\": \"4.9\", \"frequency\": 1}], \"sampleValues\": [\"4.0\", \"4.9\", \"4.0\", \"3.8\"]}]}", + "value": "{\"timestampMillis\": 1586847600000, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"rowCount\": 5, \"columnCount\": 6, \"fieldProfiles\": [{\"fieldPath\": \"id\", \"uniqueCount\": 5, \"uniqueProportion\": 1, \"nullCount\": 0, \"nullProportion\": 0.0, \"min\": \"1\", \"max\": \"5\", \"mean\": \"3.0\", \"median\": \"3\", \"stdev\": \"1.5811388300841898\", \"sampleValues\": [\"1\", \"2\", \"3\", \"4\", \"5\"]}, {\"fieldPath\": \"company\", \"uniqueCount\": 5, \"uniqueProportion\": 1, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"Company A\", \"Company B\", \"Company C\", \"Company D\", \"Company E\"]}, {\"fieldPath\": \"last_name\", \"uniqueCount\": 5, \"uniqueProportion\": 1, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"Axen\", \"Bedecs\", \"Donnell\", \"Gratacos Solsona\", \"Lee\"]}, {\"fieldPath\": \"first_name\", \"uniqueCount\": 5, \"uniqueProportion\": 1, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"Anna\", \"Antonio\", \"Christina\", \"Martin\", \"Thomas\"]}, {\"fieldPath\": \"email_address\", \"uniqueCount\": 0, \"nullCount\": 5, \"nullProportion\": 1, \"sampleValues\": []}, {\"fieldPath\": \"priority\", \"uniqueCount\": 3, \"uniqueProportion\": 0.75, \"nullCount\": 1, \"nullProportion\": 0.2, \"min\": \"3.8\", \"max\": \"4.9\", \"mean\": \"4.175000011920929\", \"median\": \"4.0\", \"stdev\": \"0.49244294899530355\", \"distinctValueFrequencies\": [{\"value\": \"3.8\", \"frequency\": 1}, {\"value\": \"4.0\", \"frequency\": 2}, {\"value\": \"4.9\", \"frequency\": 1}], \"sampleValues\": [\"4.0\", \"4.9\", \"4.0\", \"3.8\"]}]}", "contentType": "application/json" }, "systemMetadata": { @@ -389,7 +389,7 @@ "changeType": "UPSERT", "aspectName": "datasetProfile", "aspect": { - "value": "{\"timestampMillis\": 1586847600000, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"rowCount\": 0, \"columnCount\": 3, \"fieldProfiles\": [{\"fieldPath\": \"id\", \"uniqueCount\": 0, \"nullCount\": 0, \"sampleValues\": []}, {\"fieldPath\": \"description\", \"uniqueCount\": 0, \"nullCount\": 0, \"sampleValues\": []}, {\"fieldPath\": \"customer_id\", \"uniqueCount\": 0, \"nullCount\": 0, \"sampleValues\": []}]}", + "value": "{\"timestampMillis\": 1586847600000, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"rowCount\": 0, \"columnCount\": 3, \"fieldProfiles\": [{\"fieldPath\": \"id\", \"uniqueCount\": 0, \"nullCount\": 0, \"min\": \"None\", \"max\": \"None\", \"mean\": \"None\", \"median\": \"None\", \"stdev\": \"0.0\", \"sampleValues\": []}, {\"fieldPath\": \"description\", \"uniqueCount\": 0, \"nullCount\": 0, \"sampleValues\": []}, {\"fieldPath\": \"customer_id\", \"uniqueCount\": 0, \"nullCount\": 0, \"min\": \"None\", \"max\": \"None\", \"mean\": \"None\", \"median\": \"None\", \"stdev\": \"0.0\", \"sampleValues\": []}]}", "contentType": "application/json" }, "systemMetadata": { diff --git a/metadata-ingestion/tests/integration/powerbi_report_server/test_powerbi_report_server.py b/metadata-ingestion/tests/integration/powerbi_report_server/test_powerbi_report_server.py index 2dedfb091392e..1ce29f91e8da3 100644 --- a/metadata-ingestion/tests/integration/powerbi_report_server/test_powerbi_report_server.py +++ b/metadata-ingestion/tests/integration/powerbi_report_server/test_powerbi_report_server.py @@ -167,7 +167,7 @@ def default_source_config(): "workstation_name": "workstation", "host_port": "host_port", "server_alias": "server_alias", - "graphql_url": "http://localhost:8080/api/graphql", + "graphql_url": None, "report_virtual_directory_name": "Reports", "report_server_virtual_directory_name": "ReportServer", "env": "DEV", diff --git a/metadata-ingestion/tests/integration/snowflake-beta/snowflake_beta_golden.json b/metadata-ingestion/tests/integration/snowflake-beta/snowflake_beta_golden.json index 68c572788cfdf..27a2c089e45ac 100644 --- a/metadata-ingestion/tests/integration/snowflake-beta/snowflake_beta_golden.json +++ b/metadata-ingestion/tests/integration/snowflake-beta/snowflake_beta_golden.json @@ -5,7 +5,7 @@ "changeType": "UPSERT", "aspectName": "containerProperties", "aspect": { - "value": "{\"customProperties\": {\"platform\": \"snowflake\", \"instance\": \"PROD\", \"database\": \"test_db\"}, \"externalUrl\": \"https://app.snowflake.com/ap-south-1/abc12345/#/data/databases/TEST_DB/\", \"name\": \"TEST_DB\", \"description\": \"Comment for TEST_DB\"}", + "value": "{\"customProperties\": {\"platform\": \"snowflake\", \"instance\": \"PROD\", \"database\": \"test_db\"}, \"externalUrl\": \"https://app.snowflake.com/ap-south-1/abc12345/#/data/databases/TEST_DB/\", \"name\": \"TEST_DB\", \"description\": \"Comment for TEST_DB\", \"created\": {\"time\": 1623110400000}, \"lastModified\": {\"time\": 1623110400000}}", "contentType": "application/json" }, "systemMetadata": { @@ -61,7 +61,7 @@ "changeType": "UPSERT", "aspectName": "containerProperties", "aspect": { - "value": "{\"customProperties\": {\"platform\": \"snowflake\", \"instance\": \"PROD\", \"database\": \"test_db\", \"schema\": \"test_schema\"}, \"externalUrl\": \"https://app.snowflake.com/ap-south-1/abc12345/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/\", \"name\": \"TEST_SCHEMA\", \"description\": \"comment for TEST_DB.TEST_SCHEMA\"}", + "value": "{\"customProperties\": {\"platform\": \"snowflake\", \"instance\": \"PROD\", \"database\": \"test_db\", \"schema\": \"test_schema\"}, \"externalUrl\": \"https://app.snowflake.com/ap-south-1/abc12345/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/\", \"name\": \"TEST_SCHEMA\", \"description\": \"comment for TEST_DB.TEST_SCHEMA\", \"created\": {\"time\": 1623110400000}, \"lastModified\": {\"time\": 1623110400000}}", "contentType": "application/json" }, "systemMetadata": { @@ -159,7 +159,7 @@ "changeType": "UPSERT", "aspectName": "datasetProperties", "aspect": { - "value": "{\"customProperties\": {}, \"externalUrl\": \"https://app.snowflake.com/ap-south-1/abc12345/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_1/\", \"name\": \"TABLE_1\", \"qualifiedName\": \"test_db.test_schema.table_1\", \"description\": \"Comment for Table\", \"tags\": []}", + "value": "{\"customProperties\": {}, \"externalUrl\": \"https://app.snowflake.com/ap-south-1/abc12345/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_1/\", \"name\": \"TABLE_1\", \"qualifiedName\": \"test_db.test_schema.table_1\", \"description\": \"Comment for Table\", \"created\": {\"time\": 1623110400000}, \"lastModified\": {\"time\": 1623110400000}, \"tags\": []}", "contentType": "application/json" }, "systemMetadata": { @@ -243,7 +243,7 @@ "changeType": "UPSERT", "aspectName": "datasetProperties", "aspect": { - "value": "{\"customProperties\": {}, \"externalUrl\": \"https://app.snowflake.com/ap-south-1/abc12345/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_2/\", \"name\": \"TABLE_2\", \"qualifiedName\": \"test_db.test_schema.table_2\", \"description\": \"Comment for Table\", \"tags\": []}", + "value": "{\"customProperties\": {}, \"externalUrl\": \"https://app.snowflake.com/ap-south-1/abc12345/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_2/\", \"name\": \"TABLE_2\", \"qualifiedName\": \"test_db.test_schema.table_2\", \"description\": \"Comment for Table\", \"created\": {\"time\": 1623110400000}, \"lastModified\": {\"time\": 1623110400000}, \"tags\": []}", "contentType": "application/json" }, "systemMetadata": { @@ -327,7 +327,7 @@ "changeType": "UPSERT", "aspectName": "datasetProperties", "aspect": { - "value": "{\"customProperties\": {}, \"externalUrl\": \"https://app.snowflake.com/ap-south-1/abc12345/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_3/\", \"name\": \"TABLE_3\", \"qualifiedName\": \"test_db.test_schema.table_3\", \"description\": \"Comment for Table\", \"tags\": []}", + "value": "{\"customProperties\": {}, \"externalUrl\": \"https://app.snowflake.com/ap-south-1/abc12345/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_3/\", \"name\": \"TABLE_3\", \"qualifiedName\": \"test_db.test_schema.table_3\", \"description\": \"Comment for Table\", \"created\": {\"time\": 1623110400000}, \"lastModified\": {\"time\": 1623110400000}, \"tags\": []}", "contentType": "application/json" }, "systemMetadata": { @@ -411,7 +411,7 @@ "changeType": "UPSERT", "aspectName": "datasetProperties", "aspect": { - "value": "{\"customProperties\": {}, \"externalUrl\": \"https://app.snowflake.com/ap-south-1/abc12345/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_4/\", \"name\": \"TABLE_4\", \"qualifiedName\": \"test_db.test_schema.table_4\", \"description\": \"Comment for Table\", \"tags\": []}", + "value": "{\"customProperties\": {}, \"externalUrl\": \"https://app.snowflake.com/ap-south-1/abc12345/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_4/\", \"name\": \"TABLE_4\", \"qualifiedName\": \"test_db.test_schema.table_4\", \"description\": \"Comment for Table\", \"created\": {\"time\": 1623110400000}, \"lastModified\": {\"time\": 1623110400000}, \"tags\": []}", "contentType": "application/json" }, "systemMetadata": { @@ -495,7 +495,7 @@ "changeType": "UPSERT", "aspectName": "datasetProperties", "aspect": { - "value": "{\"customProperties\": {}, \"externalUrl\": \"https://app.snowflake.com/ap-south-1/abc12345/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_5/\", \"name\": \"TABLE_5\", \"qualifiedName\": \"test_db.test_schema.table_5\", \"description\": \"Comment for Table\", \"tags\": []}", + "value": "{\"customProperties\": {}, \"externalUrl\": \"https://app.snowflake.com/ap-south-1/abc12345/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_5/\", \"name\": \"TABLE_5\", \"qualifiedName\": \"test_db.test_schema.table_5\", \"description\": \"Comment for Table\", \"created\": {\"time\": 1623110400000}, \"lastModified\": {\"time\": 1623110400000}, \"tags\": []}", "contentType": "application/json" }, "systemMetadata": { @@ -579,7 +579,7 @@ "changeType": "UPSERT", "aspectName": "datasetProperties", "aspect": { - "value": "{\"customProperties\": {}, \"externalUrl\": \"https://app.snowflake.com/ap-south-1/abc12345/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_6/\", \"name\": \"TABLE_6\", \"qualifiedName\": \"test_db.test_schema.table_6\", \"description\": \"Comment for Table\", \"tags\": []}", + "value": "{\"customProperties\": {}, \"externalUrl\": \"https://app.snowflake.com/ap-south-1/abc12345/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_6/\", \"name\": \"TABLE_6\", \"qualifiedName\": \"test_db.test_schema.table_6\", \"description\": \"Comment for Table\", \"created\": {\"time\": 1623110400000}, \"lastModified\": {\"time\": 1623110400000}, \"tags\": []}", "contentType": "application/json" }, "systemMetadata": { @@ -663,7 +663,7 @@ "changeType": "UPSERT", "aspectName": "datasetProperties", "aspect": { - "value": "{\"customProperties\": {}, \"externalUrl\": \"https://app.snowflake.com/ap-south-1/abc12345/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_7/\", \"name\": \"TABLE_7\", \"qualifiedName\": \"test_db.test_schema.table_7\", \"description\": \"Comment for Table\", \"tags\": []}", + "value": "{\"customProperties\": {}, \"externalUrl\": \"https://app.snowflake.com/ap-south-1/abc12345/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_7/\", \"name\": \"TABLE_7\", \"qualifiedName\": \"test_db.test_schema.table_7\", \"description\": \"Comment for Table\", \"created\": {\"time\": 1623110400000}, \"lastModified\": {\"time\": 1623110400000}, \"tags\": []}", "contentType": "application/json" }, "systemMetadata": { @@ -747,7 +747,7 @@ "changeType": "UPSERT", "aspectName": "datasetProperties", "aspect": { - "value": "{\"customProperties\": {}, \"externalUrl\": \"https://app.snowflake.com/ap-south-1/abc12345/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_8/\", \"name\": \"TABLE_8\", \"qualifiedName\": \"test_db.test_schema.table_8\", \"description\": \"Comment for Table\", \"tags\": []}", + "value": "{\"customProperties\": {}, \"externalUrl\": \"https://app.snowflake.com/ap-south-1/abc12345/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_8/\", \"name\": \"TABLE_8\", \"qualifiedName\": \"test_db.test_schema.table_8\", \"description\": \"Comment for Table\", \"created\": {\"time\": 1623110400000}, \"lastModified\": {\"time\": 1623110400000}, \"tags\": []}", "contentType": "application/json" }, "systemMetadata": { @@ -831,7 +831,7 @@ "changeType": "UPSERT", "aspectName": "datasetProperties", "aspect": { - "value": "{\"customProperties\": {}, \"externalUrl\": \"https://app.snowflake.com/ap-south-1/abc12345/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_9/\", \"name\": \"TABLE_9\", \"qualifiedName\": \"test_db.test_schema.table_9\", \"description\": \"Comment for Table\", \"tags\": []}", + "value": "{\"customProperties\": {}, \"externalUrl\": \"https://app.snowflake.com/ap-south-1/abc12345/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_9/\", \"name\": \"TABLE_9\", \"qualifiedName\": \"test_db.test_schema.table_9\", \"description\": \"Comment for Table\", \"created\": {\"time\": 1623110400000}, \"lastModified\": {\"time\": 1623110400000}, \"tags\": []}", "contentType": "application/json" }, "systemMetadata": { @@ -915,7 +915,7 @@ "changeType": "UPSERT", "aspectName": "datasetProperties", "aspect": { - "value": "{\"customProperties\": {}, \"externalUrl\": \"https://app.snowflake.com/ap-south-1/abc12345/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_10/\", \"name\": \"TABLE_10\", \"qualifiedName\": \"test_db.test_schema.table_10\", \"description\": \"Comment for Table\", \"tags\": []}", + "value": "{\"customProperties\": {}, \"externalUrl\": \"https://app.snowflake.com/ap-south-1/abc12345/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_10/\", \"name\": \"TABLE_10\", \"qualifiedName\": \"test_db.test_schema.table_10\", \"description\": \"Comment for Table\", \"created\": {\"time\": 1623110400000}, \"lastModified\": {\"time\": 1623110400000}, \"tags\": []}", "contentType": "application/json" }, "systemMetadata": { diff --git a/metadata-ingestion/tests/integration/snowflake-beta/snowflake_privatelink_beta_golden.json b/metadata-ingestion/tests/integration/snowflake-beta/snowflake_privatelink_beta_golden.json index 542d07ebe6e3f..ae406291240e1 100644 --- a/metadata-ingestion/tests/integration/snowflake-beta/snowflake_privatelink_beta_golden.json +++ b/metadata-ingestion/tests/integration/snowflake-beta/snowflake_privatelink_beta_golden.json @@ -5,7 +5,7 @@ "changeType": "UPSERT", "aspectName": "containerProperties", "aspect": { - "value": "{\"customProperties\": {\"platform\": \"snowflake\", \"instance\": \"PROD\", \"database\": \"test_db\"}, \"externalUrl\": \"https://app.abc12345.ap-south-1.privatelink.snowflakecomputing.com/#/data/databases/TEST_DB/\", \"name\": \"TEST_DB\", \"description\": \"Comment for TEST_DB\"}", + "value": "{\"customProperties\": {\"platform\": \"snowflake\", \"instance\": \"PROD\", \"database\": \"test_db\"}, \"externalUrl\": \"https://app.abc12345.ap-south-1.privatelink.snowflakecomputing.com/#/data/databases/TEST_DB/\", \"name\": \"TEST_DB\", \"description\": \"Comment for TEST_DB\", \"created\": {\"time\": 1623110400000}, \"lastModified\": {\"time\": 1623110400000}}", "contentType": "application/json" }, "systemMetadata": { @@ -61,7 +61,7 @@ "changeType": "UPSERT", "aspectName": "containerProperties", "aspect": { - "value": "{\"customProperties\": {\"platform\": \"snowflake\", \"instance\": \"PROD\", \"database\": \"test_db\", \"schema\": \"test_schema\"}, \"externalUrl\": \"https://app.abc12345.ap-south-1.privatelink.snowflakecomputing.com/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/\", \"name\": \"TEST_SCHEMA\", \"description\": \"comment for TEST_DB.TEST_SCHEMA\"}", + "value": "{\"customProperties\": {\"platform\": \"snowflake\", \"instance\": \"PROD\", \"database\": \"test_db\", \"schema\": \"test_schema\"}, \"externalUrl\": \"https://app.abc12345.ap-south-1.privatelink.snowflakecomputing.com/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/\", \"name\": \"TEST_SCHEMA\", \"description\": \"comment for TEST_DB.TEST_SCHEMA\", \"created\": {\"time\": 1623110400000}, \"lastModified\": {\"time\": 1623110400000}}", "contentType": "application/json" }, "systemMetadata": { @@ -159,7 +159,7 @@ "changeType": "UPSERT", "aspectName": "datasetProperties", "aspect": { - "value": "{\"customProperties\": {}, \"externalUrl\": \"https://app.abc12345.ap-south-1.privatelink.snowflakecomputing.com/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_1/\", \"name\": \"TABLE_1\", \"qualifiedName\": \"test_db.test_schema.table_1\", \"description\": \"Comment for Table\", \"tags\": []}", + "value": "{\"customProperties\": {}, \"externalUrl\": \"https://app.abc12345.ap-south-1.privatelink.snowflakecomputing.com/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_1/\", \"name\": \"TABLE_1\", \"qualifiedName\": \"test_db.test_schema.table_1\", \"description\": \"Comment for Table\", \"created\": {\"time\": 1623110400000}, \"lastModified\": {\"time\": 1623110400000}, \"tags\": []}", "contentType": "application/json" }, "systemMetadata": { @@ -229,7 +229,7 @@ "changeType": "UPSERT", "aspectName": "datasetProperties", "aspect": { - "value": "{\"customProperties\": {}, \"externalUrl\": \"https://app.abc12345.ap-south-1.privatelink.snowflakecomputing.com/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_2/\", \"name\": \"TABLE_2\", \"qualifiedName\": \"test_db.test_schema.table_2\", \"description\": \"Comment for Table\", \"tags\": []}", + "value": "{\"customProperties\": {}, \"externalUrl\": \"https://app.abc12345.ap-south-1.privatelink.snowflakecomputing.com/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_2/\", \"name\": \"TABLE_2\", \"qualifiedName\": \"test_db.test_schema.table_2\", \"description\": \"Comment for Table\", \"created\": {\"time\": 1623110400000}, \"lastModified\": {\"time\": 1623110400000}, \"tags\": []}", "contentType": "application/json" }, "systemMetadata": { @@ -299,7 +299,7 @@ "changeType": "UPSERT", "aspectName": "datasetProperties", "aspect": { - "value": "{\"customProperties\": {}, \"externalUrl\": \"https://app.abc12345.ap-south-1.privatelink.snowflakecomputing.com/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_3/\", \"name\": \"TABLE_3\", \"qualifiedName\": \"test_db.test_schema.table_3\", \"description\": \"Comment for Table\", \"tags\": []}", + "value": "{\"customProperties\": {}, \"externalUrl\": \"https://app.abc12345.ap-south-1.privatelink.snowflakecomputing.com/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_3/\", \"name\": \"TABLE_3\", \"qualifiedName\": \"test_db.test_schema.table_3\", \"description\": \"Comment for Table\", \"created\": {\"time\": 1623110400000}, \"lastModified\": {\"time\": 1623110400000}, \"tags\": []}", "contentType": "application/json" }, "systemMetadata": { @@ -369,7 +369,7 @@ "changeType": "UPSERT", "aspectName": "datasetProperties", "aspect": { - "value": "{\"customProperties\": {}, \"externalUrl\": \"https://app.abc12345.ap-south-1.privatelink.snowflakecomputing.com/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_4/\", \"name\": \"TABLE_4\", \"qualifiedName\": \"test_db.test_schema.table_4\", \"description\": \"Comment for Table\", \"tags\": []}", + "value": "{\"customProperties\": {}, \"externalUrl\": \"https://app.abc12345.ap-south-1.privatelink.snowflakecomputing.com/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_4/\", \"name\": \"TABLE_4\", \"qualifiedName\": \"test_db.test_schema.table_4\", \"description\": \"Comment for Table\", \"created\": {\"time\": 1623110400000}, \"lastModified\": {\"time\": 1623110400000}, \"tags\": []}", "contentType": "application/json" }, "systemMetadata": { @@ -439,7 +439,7 @@ "changeType": "UPSERT", "aspectName": "datasetProperties", "aspect": { - "value": "{\"customProperties\": {}, \"externalUrl\": \"https://app.abc12345.ap-south-1.privatelink.snowflakecomputing.com/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_5/\", \"name\": \"TABLE_5\", \"qualifiedName\": \"test_db.test_schema.table_5\", \"description\": \"Comment for Table\", \"tags\": []}", + "value": "{\"customProperties\": {}, \"externalUrl\": \"https://app.abc12345.ap-south-1.privatelink.snowflakecomputing.com/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_5/\", \"name\": \"TABLE_5\", \"qualifiedName\": \"test_db.test_schema.table_5\", \"description\": \"Comment for Table\", \"created\": {\"time\": 1623110400000}, \"lastModified\": {\"time\": 1623110400000}, \"tags\": []}", "contentType": "application/json" }, "systemMetadata": { @@ -509,7 +509,7 @@ "changeType": "UPSERT", "aspectName": "datasetProperties", "aspect": { - "value": "{\"customProperties\": {}, \"externalUrl\": \"https://app.abc12345.ap-south-1.privatelink.snowflakecomputing.com/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_6/\", \"name\": \"TABLE_6\", \"qualifiedName\": \"test_db.test_schema.table_6\", \"description\": \"Comment for Table\", \"tags\": []}", + "value": "{\"customProperties\": {}, \"externalUrl\": \"https://app.abc12345.ap-south-1.privatelink.snowflakecomputing.com/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_6/\", \"name\": \"TABLE_6\", \"qualifiedName\": \"test_db.test_schema.table_6\", \"description\": \"Comment for Table\", \"created\": {\"time\": 1623110400000}, \"lastModified\": {\"time\": 1623110400000}, \"tags\": []}", "contentType": "application/json" }, "systemMetadata": { @@ -579,7 +579,7 @@ "changeType": "UPSERT", "aspectName": "datasetProperties", "aspect": { - "value": "{\"customProperties\": {}, \"externalUrl\": \"https://app.abc12345.ap-south-1.privatelink.snowflakecomputing.com/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_7/\", \"name\": \"TABLE_7\", \"qualifiedName\": \"test_db.test_schema.table_7\", \"description\": \"Comment for Table\", \"tags\": []}", + "value": "{\"customProperties\": {}, \"externalUrl\": \"https://app.abc12345.ap-south-1.privatelink.snowflakecomputing.com/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_7/\", \"name\": \"TABLE_7\", \"qualifiedName\": \"test_db.test_schema.table_7\", \"description\": \"Comment for Table\", \"created\": {\"time\": 1623110400000}, \"lastModified\": {\"time\": 1623110400000}, \"tags\": []}", "contentType": "application/json" }, "systemMetadata": { @@ -649,7 +649,7 @@ "changeType": "UPSERT", "aspectName": "datasetProperties", "aspect": { - "value": "{\"customProperties\": {}, \"externalUrl\": \"https://app.abc12345.ap-south-1.privatelink.snowflakecomputing.com/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_8/\", \"name\": \"TABLE_8\", \"qualifiedName\": \"test_db.test_schema.table_8\", \"description\": \"Comment for Table\", \"tags\": []}", + "value": "{\"customProperties\": {}, \"externalUrl\": \"https://app.abc12345.ap-south-1.privatelink.snowflakecomputing.com/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_8/\", \"name\": \"TABLE_8\", \"qualifiedName\": \"test_db.test_schema.table_8\", \"description\": \"Comment for Table\", \"created\": {\"time\": 1623110400000}, \"lastModified\": {\"time\": 1623110400000}, \"tags\": []}", "contentType": "application/json" }, "systemMetadata": { @@ -719,7 +719,7 @@ "changeType": "UPSERT", "aspectName": "datasetProperties", "aspect": { - "value": "{\"customProperties\": {}, \"externalUrl\": \"https://app.abc12345.ap-south-1.privatelink.snowflakecomputing.com/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_9/\", \"name\": \"TABLE_9\", \"qualifiedName\": \"test_db.test_schema.table_9\", \"description\": \"Comment for Table\", \"tags\": []}", + "value": "{\"customProperties\": {}, \"externalUrl\": \"https://app.abc12345.ap-south-1.privatelink.snowflakecomputing.com/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_9/\", \"name\": \"TABLE_9\", \"qualifiedName\": \"test_db.test_schema.table_9\", \"description\": \"Comment for Table\", \"created\": {\"time\": 1623110400000}, \"lastModified\": {\"time\": 1623110400000}, \"tags\": []}", "contentType": "application/json" }, "systemMetadata": { @@ -789,7 +789,7 @@ "changeType": "UPSERT", "aspectName": "datasetProperties", "aspect": { - "value": "{\"customProperties\": {}, \"externalUrl\": \"https://app.abc12345.ap-south-1.privatelink.snowflakecomputing.com/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_10/\", \"name\": \"TABLE_10\", \"qualifiedName\": \"test_db.test_schema.table_10\", \"description\": \"Comment for Table\", \"tags\": []}", + "value": "{\"customProperties\": {}, \"externalUrl\": \"https://app.abc12345.ap-south-1.privatelink.snowflakecomputing.com/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_10/\", \"name\": \"TABLE_10\", \"qualifiedName\": \"test_db.test_schema.table_10\", \"description\": \"Comment for Table\", \"created\": {\"time\": 1623110400000}, \"lastModified\": {\"time\": 1623110400000}, \"tags\": []}", "contentType": "application/json" }, "systemMetadata": { diff --git a/metadata-ingestion/tests/integration/snowflake-beta/test_snowflake_beta.py b/metadata-ingestion/tests/integration/snowflake-beta/test_snowflake_beta.py index cb09b529b0d38..95b5f63a0d95d 100644 --- a/metadata-ingestion/tests/integration/snowflake-beta/test_snowflake_beta.py +++ b/metadata-ingestion/tests/integration/snowflake-beta/test_snowflake_beta.py @@ -48,6 +48,15 @@ def default_query_results(query): "comment": "Comment for TEST_DB", } ] + elif query == SnowflakeQuery.get_databases("TEST_DB"): + return [ + { + "DATABASE_NAME": "TEST_DB", + "CREATED": datetime(2021, 6, 8, 0, 0, 0, 0), + "LAST_ALTERED": datetime(2021, 6, 8, 0, 0, 0, 0), + "COMMENT": "Comment for TEST_DB", + } + ] elif query == SnowflakeQuery.schemas_for_database("TEST_DB"): return [ { @@ -55,7 +64,13 @@ def default_query_results(query): "CREATED": datetime(2021, 6, 8, 0, 0, 0, 0), "LAST_ALTERED": datetime(2021, 6, 8, 0, 0, 0, 0), "COMMENT": "comment for TEST_DB.TEST_SCHEMA", - } + }, + { + "SCHEMA_NAME": "TEST2_SCHEMA", + "CREATED": datetime(2021, 6, 8, 0, 0, 0, 0), + "LAST_ALTERED": datetime(2021, 6, 8, 0, 0, 0, 0), + "COMMENT": "comment for TEST_DB.TEST_SCHEMA", + }, ] elif query == SnowflakeQuery.tables_for_database("TEST_DB"): return [ @@ -339,7 +354,8 @@ def test_snowflake_basic(pytestconfig, tmp_path, mock_time, mock_datahub_graph): username="TST_USR", password="TST_PWD", include_views=False, - table_pattern=AllowDenyPattern(allow=["test_db.test_schema.*"]), + match_fully_qualified_names=True, + schema_pattern=AllowDenyPattern(allow=["test_db.test_schema"]), include_technical_schema=True, include_table_lineage=True, include_view_lineage=False, @@ -408,7 +424,7 @@ def test_snowflake_private_link(pytestconfig, tmp_path, mock_time, mock_datahub_ username="TST_USR", password="TST_PWD", include_views=False, - table_pattern=AllowDenyPattern(allow=["test_db.test_schema.*"]), + schema_pattern=AllowDenyPattern(allow=["test_schema"]), include_technical_schema=True, include_table_lineage=False, include_view_lineage=False, diff --git a/metadata-ingestion/tests/integration/trino/trino_mces_golden.json b/metadata-ingestion/tests/integration/trino/trino_mces_golden.json index 9ca0ddbeb11b5..b0892c16a27f8 100644 --- a/metadata-ingestion/tests/integration/trino/trino_mces_golden.json +++ b/metadata-ingestion/tests/integration/trino/trino_mces_golden.json @@ -656,7 +656,7 @@ "changeType": "UPSERT", "aspectName": "datasetProfile", "aspect": { - "value": "{\"timestampMillis\": 1632398400000, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"rowCount\": 3, \"columnCount\": 6, \"fieldProfiles\": [{\"fieldPath\": \"id\", \"uniqueCount\": 3, \"uniqueProportion\": 1, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"1\", \"2\", \"3\"]}, {\"fieldPath\": \"name\", \"uniqueCount\": 3, \"uniqueProportion\": 1, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"Book 1\", \"Book 2\", \"Book 3\"]}, {\"fieldPath\": \"author\", \"uniqueCount\": 3, \"uniqueProportion\": 1, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"ABC\", \"PQR\", \"XYZ\"]}, {\"fieldPath\": \"publisher\", \"uniqueCount\": 0, \"nullCount\": 3, \"nullProportion\": 1, \"sampleValues\": []}, {\"fieldPath\": \"tags\", \"nullCount\": 3, \"nullProportion\": 1, \"sampleValues\": []}, {\"fieldPath\": \"genre_ids\", \"uniqueCount\": 0, \"nullCount\": 3, \"nullProportion\": 1, \"sampleValues\": []}]}", + "value": "{\"timestampMillis\": 1632398400000, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"rowCount\": 3, \"columnCount\": 6, \"fieldProfiles\": [{\"fieldPath\": \"id\", \"uniqueCount\": 3, \"uniqueProportion\": 1, \"nullCount\": 0, \"nullProportion\": 0.0, \"min\": \"1\", \"max\": \"3\", \"mean\": \"2.0\", \"median\": \"2\", \"stdev\": \"1.0\", \"sampleValues\": [\"1\", \"2\", \"3\"]}, {\"fieldPath\": \"name\", \"uniqueCount\": 3, \"uniqueProportion\": 1, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"Book 1\", \"Book 2\", \"Book 3\"]}, {\"fieldPath\": \"author\", \"uniqueCount\": 3, \"uniqueProportion\": 1, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"ABC\", \"PQR\", \"XYZ\"]}, {\"fieldPath\": \"publisher\", \"uniqueCount\": 0, \"nullCount\": 3, \"nullProportion\": 1, \"sampleValues\": []}, {\"fieldPath\": \"tags\", \"nullCount\": 3, \"nullProportion\": 1, \"sampleValues\": []}, {\"fieldPath\": \"genre_ids\", \"uniqueCount\": 0, \"nullCount\": 3, \"nullProportion\": 1, \"sampleValues\": []}]}", "contentType": "application/json" }, "systemMetadata": { @@ -670,7 +670,7 @@ "changeType": "UPSERT", "aspectName": "datasetProfile", "aspect": { - "value": "{\"timestampMillis\": 1632398400000, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"rowCount\": 2, \"columnCount\": 4, \"fieldProfiles\": [{\"fieldPath\": \"book_id\", \"uniqueCount\": 2, \"uniqueProportion\": 1, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"1\", \"2\"]}, {\"fieldPath\": \"member_id\", \"uniqueCount\": 2, \"uniqueProportion\": 1, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"1\", \"2\"]}, {\"fieldPath\": \"issue_date\", \"uniqueCount\": 1, \"uniqueProportion\": 0.5, \"nullCount\": 0, \"nullProportion\": 0.0, \"min\": \"2021-09-27\", \"max\": \"2021-09-27\", \"distinctValueFrequencies\": [{\"value\": \"2021-09-27\", \"frequency\": 2}], \"sampleValues\": [\"2021-09-27\", \"2021-09-27\"]}, {\"fieldPath\": \"return_date\", \"uniqueCount\": 1, \"uniqueProportion\": 1, \"nullCount\": 1, \"nullProportion\": 0.5, \"min\": \"2021-09-27\", \"max\": \"2021-09-27\", \"sampleValues\": [\"2021-09-27\"]}]}", + "value": "{\"timestampMillis\": 1632398400000, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"rowCount\": 2, \"columnCount\": 4, \"fieldProfiles\": [{\"fieldPath\": \"book_id\", \"uniqueCount\": 2, \"uniqueProportion\": 1, \"nullCount\": 0, \"nullProportion\": 0.0, \"min\": \"1\", \"max\": \"2\", \"mean\": \"1.5\", \"median\": \"2\", \"stdev\": \"0.7071067811865476\", \"sampleValues\": [\"1\", \"2\"]}, {\"fieldPath\": \"member_id\", \"uniqueCount\": 2, \"uniqueProportion\": 1, \"nullCount\": 0, \"nullProportion\": 0.0, \"min\": \"1\", \"max\": \"2\", \"mean\": \"1.5\", \"median\": \"2\", \"stdev\": \"0.7071067811865476\", \"sampleValues\": [\"1\", \"2\"]}, {\"fieldPath\": \"issue_date\", \"uniqueCount\": 1, \"uniqueProportion\": 0.5, \"nullCount\": 0, \"nullProportion\": 0.0, \"min\": \"2021-09-27\", \"max\": \"2021-09-27\", \"distinctValueFrequencies\": [{\"value\": \"2021-09-27\", \"frequency\": 2}], \"sampleValues\": [\"2021-09-27\", \"2021-09-27\"]}, {\"fieldPath\": \"return_date\", \"uniqueCount\": 1, \"uniqueProportion\": 1, \"nullCount\": 1, \"nullProportion\": 0.5, \"min\": \"2021-09-27\", \"max\": \"2021-09-27\", \"sampleValues\": [\"2021-09-27\"]}]}", "contentType": "application/json" }, "systemMetadata": { @@ -684,7 +684,7 @@ "changeType": "UPSERT", "aspectName": "datasetProfile", "aspect": { - "value": "{\"timestampMillis\": 1632398400000, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"rowCount\": 2, \"columnCount\": 2, \"fieldProfiles\": [{\"fieldPath\": \"id\", \"uniqueCount\": 2, \"uniqueProportion\": 1, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"1\", \"2\"]}, {\"fieldPath\": \"name\", \"uniqueCount\": 2, \"uniqueProportion\": 1, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"Member 1\", \"Member 2\"]}]}", + "value": "{\"timestampMillis\": 1632398400000, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"rowCount\": 2, \"columnCount\": 2, \"fieldProfiles\": [{\"fieldPath\": \"id\", \"uniqueCount\": 2, \"uniqueProportion\": 1, \"nullCount\": 0, \"nullProportion\": 0.0, \"min\": \"1\", \"max\": \"2\", \"mean\": \"1.5\", \"median\": \"2\", \"stdev\": \"0.7071067811865476\", \"sampleValues\": [\"1\", \"2\"]}, {\"fieldPath\": \"name\", \"uniqueCount\": 2, \"uniqueProportion\": 1, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"Member 1\", \"Member 2\"]}]}", "contentType": "application/json" }, "systemMetadata": { @@ -698,7 +698,7 @@ "changeType": "UPSERT", "aspectName": "datasetProfile", "aspect": { - "value": "{\"timestampMillis\": 1632398400000, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"rowCount\": 1, \"columnCount\": 6, \"fieldProfiles\": [{\"fieldPath\": \"id\", \"uniqueCount\": 1, \"uniqueProportion\": 1, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"2\"]}, {\"fieldPath\": \"name\", \"uniqueCount\": 1, \"uniqueProportion\": 1, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"Book 2\"]}, {\"fieldPath\": \"author\", \"uniqueCount\": 1, \"uniqueProportion\": 1, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"PQR\"]}, {\"fieldPath\": \"publisher\", \"uniqueCount\": 0, \"nullCount\": 1, \"nullProportion\": 1, \"sampleValues\": []}, {\"fieldPath\": \"member_id\", \"uniqueCount\": 1, \"uniqueProportion\": 1, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"2\"]}, {\"fieldPath\": \"issue_date\", \"uniqueCount\": 1, \"uniqueProportion\": 1, \"nullCount\": 0, \"nullProportion\": 0.0, \"min\": \"2021-09-27\", \"max\": \"2021-09-27\", \"sampleValues\": [\"2021-09-27\"]}]}", + "value": "{\"timestampMillis\": 1632398400000, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"rowCount\": 1, \"columnCount\": 6, \"fieldProfiles\": [{\"fieldPath\": \"id\", \"uniqueCount\": 1, \"uniqueProportion\": 1, \"nullCount\": 0, \"nullProportion\": 0.0, \"min\": \"2\", \"max\": \"2\", \"mean\": \"2.0\", \"median\": \"2\", \"stdev\": \"0.0\", \"sampleValues\": [\"2\"]}, {\"fieldPath\": \"name\", \"uniqueCount\": 1, \"uniqueProportion\": 1, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"Book 2\"]}, {\"fieldPath\": \"author\", \"uniqueCount\": 1, \"uniqueProportion\": 1, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"PQR\"]}, {\"fieldPath\": \"publisher\", \"uniqueCount\": 0, \"nullCount\": 1, \"nullProportion\": 1, \"sampleValues\": []}, {\"fieldPath\": \"member_id\", \"uniqueCount\": 1, \"uniqueProportion\": 1, \"nullCount\": 0, \"nullProportion\": 0.0, \"min\": \"2\", \"max\": \"2\", \"mean\": \"2.0\", \"median\": \"2\", \"stdev\": \"0.0\", \"sampleValues\": [\"2\"]}, {\"fieldPath\": \"issue_date\", \"uniqueCount\": 1, \"uniqueProportion\": 1, \"nullCount\": 0, \"nullProportion\": 0.0, \"min\": \"2021-09-27\", \"max\": \"2021-09-27\", \"sampleValues\": [\"2021-09-27\"]}]}", "contentType": "application/json" }, "systemMetadata": { diff --git a/metadata-ingestion/tests/unit/test_bigquery_lineage.py b/metadata-ingestion/tests/unit/test_bigquery_lineage.py index a27c61be7a4ce..a7717aec82fc1 100644 --- a/metadata-ingestion/tests/unit/test_bigquery_lineage.py +++ b/metadata-ingestion/tests/unit/test_bigquery_lineage.py @@ -24,7 +24,7 @@ def test_parse_view_lineage(): created=datetime.datetime.now(), last_altered=datetime.datetime.now(), comment="", - ddl=ddl, + view_definition=ddl, ) tables = extractor.parse_view_lineage("my_project", "my_dataset", view) assert 1 == len(tables) @@ -42,7 +42,7 @@ def test_parse_view_lineage_with_two_part_table_name(): created=datetime.datetime.now(), last_altered=datetime.datetime.now(), comment="", - ddl=ddl, + view_definition=ddl, ) tables = extractor.parse_view_lineage("my_project", "my_dataset", view) assert 1 == len(tables) @@ -60,7 +60,7 @@ def test_one_part_table(): created=datetime.datetime.now(), last_altered=datetime.datetime.now(), comment="", - ddl=ddl, + view_definition=ddl, ) tables = extractor.parse_view_lineage("my_project", "my_dataset", view) assert 1 == len(tables) @@ -78,7 +78,7 @@ def test_create_statement_with_multiple_table(): created=datetime.datetime.now(), last_altered=datetime.datetime.now(), comment="", - ddl=ddl, + view_definition=ddl, ) tables = extractor.parse_view_lineage("my_project", "my_dataset", view) tables.sort(key=lambda e: e.get_table_name()) diff --git a/metadata-ingestion/tests/unit/test_snowflake_beta_source.py b/metadata-ingestion/tests/unit/test_snowflake_beta_source.py index 6c429fb168453..ed58e4aeb1a50 100644 --- a/metadata-ingestion/tests/unit/test_snowflake_beta_source.py +++ b/metadata-ingestion/tests/unit/test_snowflake_beta_source.py @@ -91,6 +91,20 @@ def test_account_id_is_added_when_host_port_is_present(): assert config.account_id == "acctname" +def test_account_id_with_snowflake_host_suffix(): + config = SnowflakeV2Config.parse_obj( + { + "username": "user", + "password": "password", + "account_id": "https://acctname.snowflakecomputing.com", + "database_pattern": {"allow": {"^demo$"}}, + "warehouse": "COMPUTE_WH", + "role": "sysadmin", + } + ) + config.account_id == "acctname" + + def test_snowflake_uri_default_authentication(): config = SnowflakeV2Config.parse_obj( diff --git a/metadata-ingestion/tests/unit/test_snowflake_source.py b/metadata-ingestion/tests/unit/test_snowflake_source.py deleted file mode 100644 index 631cf2d9db67d..0000000000000 --- a/metadata-ingestion/tests/unit/test_snowflake_source.py +++ /dev/null @@ -1,410 +0,0 @@ -from unittest.mock import MagicMock, patch - -import pytest - -from datahub.configuration.common import ConfigurationError, OauthConfiguration -from datahub.ingestion.api.source import SourceCapability -from datahub.ingestion.source.sql.snowflake import SnowflakeConfig, SnowflakeSource - - -def test_snowflake_source_throws_error_on_account_id_missing(): - with pytest.raises(ConfigurationError): - SnowflakeConfig.parse_obj( - { - "username": "user", - "password": "password", - } - ) - - -def test_snowflake_throws_error_on_client_id_missing_if_using_oauth(): - oauth_dict = { - "provider": "microsoft", - "scopes": ["https://microsoft.com/f4b353d5-ef8d/.default"], - "client_secret": "6Hb9apkbc6HD7", - "authority_url": "https://login.microsoftonline.com/yourorganisation.com", - } - # assert that this is a valid oauth config on its own - OauthConfiguration.parse_obj(oauth_dict) - with pytest.raises(ValueError): - SnowflakeConfig.parse_obj( - { - "account_id": "test", - "authentication_type": "OAUTH_AUTHENTICATOR", - "oauth_config": oauth_dict, - } - ) - - -def test_snowflake_throws_error_on_client_secret_missing_if_use_certificate_is_false(): - oauth_dict = { - "client_id": "882e9831-7ea51cb2b954", - "provider": "microsoft", - "scopes": ["https://microsoft.com/f4b353d5-ef8d/.default"], - "use_certificate": False, - "authority_url": "https://login.microsoftonline.com/yourorganisation.com", - } - OauthConfiguration.parse_obj(oauth_dict) - - with pytest.raises(ValueError): - SnowflakeConfig.parse_obj( - { - "account_id": "test", - "authentication_type": "OAUTH_AUTHENTICATOR", - "oauth_config": oauth_dict, - } - ) - - -def test_snowflake_throws_error_on_encoded_oauth_private_key_missing_if_use_certificate_is_true(): - oauth_dict = { - "client_id": "882e9831-7ea51cb2b954", - "provider": "microsoft", - "scopes": ["https://microsoft.com/f4b353d5-ef8d/.default"], - "use_certificate": True, - "authority_url": "https://login.microsoftonline.com/yourorganisation.com", - "encoded_oauth_public_key": "fkdsfhkshfkjsdfiuwrwfkjhsfskfhksjf==", - } - OauthConfiguration.parse_obj(oauth_dict) - with pytest.raises(ValueError): - SnowflakeConfig.parse_obj( - { - "account_id": "test", - "authentication_type": "OAUTH_AUTHENTICATOR", - "oauth_config": oauth_dict, - } - ) - - -def test_account_id_is_added_when_host_port_is_present(): - config = SnowflakeConfig.parse_obj( - { - "username": "user", - "password": "password", - "host_port": "acctname", - "database_pattern": {"allow": {"^demo$"}}, - "warehouse": "COMPUTE_WH", - "role": "sysadmin", - } - ) - assert config.account_id == "acctname" - - -def test_snowflake_uri_default_authentication(): - - config = SnowflakeConfig.parse_obj( - { - "username": "user", - "password": "password", - "account_id": "acctname", - "database_pattern": {"allow": {"^demo$"}}, - "warehouse": "COMPUTE_WH", - "role": "sysadmin", - } - ) - - assert ( - config.get_sql_alchemy_url() - == "snowflake://user:password@acctname/?authenticator=SNOWFLAKE&warehouse=COMPUTE_WH&role" - "=sysadmin&application=acryl_datahub" - ) - - -def test_snowflake_uri_external_browser_authentication(): - - config = SnowflakeConfig.parse_obj( - { - "username": "user", - "account_id": "acctname", - "database_pattern": {"allow": {"^demo$"}}, - "warehouse": "COMPUTE_WH", - "role": "sysadmin", - "authentication_type": "EXTERNAL_BROWSER_AUTHENTICATOR", - } - ) - - assert ( - config.get_sql_alchemy_url() - == "snowflake://user@acctname/?authenticator=EXTERNALBROWSER&warehouse=COMPUTE_WH&role" - "=sysadmin&application=acryl_datahub" - ) - - -def test_snowflake_uri_key_pair_authentication(): - - config = SnowflakeConfig.parse_obj( - { - "username": "user", - "account_id": "acctname", - "database_pattern": {"allow": {"^demo$"}}, - "warehouse": "COMPUTE_WH", - "role": "sysadmin", - "authentication_type": "KEY_PAIR_AUTHENTICATOR", - "private_key_path": "/a/random/path", - "private_key_password": "a_random_password", - } - ) - - assert ( - config.get_sql_alchemy_url() - == "snowflake://user@acctname/?authenticator=SNOWFLAKE_JWT&warehouse=COMPUTE_WH&role" - "=sysadmin&application=acryl_datahub" - ) - - -def test_options_contain_connect_args(): - config = SnowflakeConfig.parse_obj( - { - "username": "user", - "password": "password", - "host_port": "acctname", - "database_pattern": {"allow": {"^demo$"}}, - "warehouse": "COMPUTE_WH", - "role": "sysadmin", - } - ) - connect_args = config.get_options().get("connect_args") - assert connect_args is not None - - -@patch("snowflake.connector.connect") -def test_test_connection_failure(mock_connect): - mock_connect.side_effect = Exception("Failed to connect to snowflake") - config = { - "username": "user", - "password": "password", - "account_id": "missing", - "warehouse": "COMPUTE_WH", - "role": "sysadmin", - } - report = SnowflakeSource.test_connection(config) - assert report is not None - assert report.basic_connectivity - assert not report.basic_connectivity.capable - assert report.basic_connectivity.failure_reason - assert "Failed to connect to snowflake" in report.basic_connectivity.failure_reason - - -@patch("snowflake.connector.connect") -def test_test_connection_basic_success(mock_connect): - - config = { - "username": "user", - "password": "password", - "account_id": "missing", - "warehouse": "COMPUTE_WH", - "role": "sysadmin", - } - report = SnowflakeSource.test_connection(config) - assert report is not None - assert report.basic_connectivity - assert report.basic_connectivity.capable - assert report.basic_connectivity.failure_reason is None - - -def setup_mock_connect(mock_connect, query_results=None): - def default_query_results(query): - if query == "select current_role()": - return [("TEST_ROLE",)] - elif query == "select current_secondary_roles()": - return [('{"roles":"","value":""}',)] - elif query == "select current_warehouse()": - return [("TEST_WAREHOUSE")] - # Unreachable code - raise Exception() - - connection_mock = MagicMock() - cursor_mock = MagicMock() - cursor_mock.execute.side_effect = ( - query_results if query_results is not None else default_query_results - ) - connection_mock.cursor.return_value = cursor_mock - mock_connect.return_value = connection_mock - - -@patch("snowflake.connector.connect") -def test_test_connection_no_warehouse(mock_connect): - def query_results(query): - if query == "select current_role()": - return [("TEST_ROLE",)] - elif query == "select current_secondary_roles()": - return [('{"roles":"","value":""}',)] - elif query == "select current_warehouse()": - return [(None,)] - elif query == 'show grants to role "TEST_ROLE"': - return [ - ("", "USAGE", "DATABASE", "DB1"), - ("", "USAGE", "SCHEMA", "DB1.SCHEMA1"), - ("", "REFERENCES", "TABLE", "DB1.SCHEMA1.TABLE1"), - ] - elif query == 'show grants to role "PUBLIC"': - return [] - # Unreachable code - raise Exception() - - config = { - "username": "user", - "password": "password", - "account_id": "missing", - "warehouse": "COMPUTE_WH", - "role": "sysadmin", - } - setup_mock_connect(mock_connect, query_results) - report = SnowflakeSource.test_connection(config) - assert report is not None - assert report.basic_connectivity - assert report.basic_connectivity.capable - assert report.basic_connectivity.failure_reason is None - - assert report.capability_report - assert report.capability_report[SourceCapability.CONTAINERS].capable - assert not report.capability_report[SourceCapability.SCHEMA_METADATA].capable - failure_reason = report.capability_report[ - SourceCapability.SCHEMA_METADATA - ].failure_reason - assert failure_reason - - assert "Current role does not have permissions to use warehouse" in failure_reason - - -@patch("snowflake.connector.connect") -def test_test_connection_capability_schema_failure(mock_connect): - def query_results(query): - if query == "select current_role()": - return [("TEST_ROLE",)] - elif query == "select current_secondary_roles()": - return [('{"roles":"","value":""}',)] - elif query == "select current_warehouse()": - return [("TEST_WAREHOUSE",)] - elif query == 'show grants to role "TEST_ROLE"': - return [("", "USAGE", "DATABASE", "DB1")] - elif query == 'show grants to role "PUBLIC"': - return [] - # Unreachable code - raise Exception() - - setup_mock_connect(mock_connect, query_results) - - config = { - "username": "user", - "password": "password", - "account_id": "missing", - "warehouse": "COMPUTE_WH", - "role": "sysadmin", - } - report = SnowflakeSource.test_connection(config) - assert report is not None - assert report.basic_connectivity - assert report.basic_connectivity.capable - assert report.basic_connectivity.failure_reason is None - assert report.capability_report - - assert report.capability_report[SourceCapability.CONTAINERS].capable - assert not report.capability_report[SourceCapability.SCHEMA_METADATA].capable - assert ( - report.capability_report[SourceCapability.SCHEMA_METADATA].failure_reason - is not None - ) - - -@patch("snowflake.connector.connect") -def test_test_connection_capability_schema_success(mock_connect): - def query_results(query): - if query == "select current_role()": - return [("TEST_ROLE",)] - elif query == "select current_secondary_roles()": - return [('{"roles":"","value":""}',)] - elif query == "select current_warehouse()": - return [("TEST_WAREHOUSE")] - elif query == 'show grants to role "TEST_ROLE"': - return [ - ["", "USAGE", "DATABASE", "DB1"], - ["", "USAGE", "SCHEMA", "DB1.SCHEMA1"], - ["", "REFERENCES", "TABLE", "DB1.SCHEMA1.TABLE1"], - ] - elif query == 'show grants to role "PUBLIC"': - return [] - # Unreachable code - raise Exception() - - setup_mock_connect(mock_connect, query_results) - - config = { - "username": "user", - "password": "password", - "account_id": "missing", - "warehouse": "COMPUTE_WH", - "role": "sysadmin", - } - report = SnowflakeSource.test_connection(config) - assert report is not None - assert report.basic_connectivity - assert report.basic_connectivity.capable - assert report.basic_connectivity.failure_reason is None - assert report.capability_report - - assert report.capability_report[SourceCapability.CONTAINERS].capable - assert report.capability_report[SourceCapability.SCHEMA_METADATA].capable - assert report.capability_report[SourceCapability.DESCRIPTIONS].capable - assert not report.capability_report[SourceCapability.DATA_PROFILING].capable - assert ( - report.capability_report[SourceCapability.DATA_PROFILING].failure_reason - is not None - ) - assert not report.capability_report[SourceCapability.LINEAGE_COARSE].capable - assert ( - report.capability_report[SourceCapability.LINEAGE_COARSE].failure_reason - is not None - ) - - -@patch("snowflake.connector.connect") -def test_test_connection_capability_all_success(mock_connect): - def query_results(query): - if query == "select current_role()": - return [("TEST_ROLE",)] - elif query == "select current_secondary_roles()": - return [('{"roles":"","value":""}',)] - elif query == "select current_warehouse()": - return [("TEST_WAREHOUSE")] - elif query == 'show grants to role "TEST_ROLE"': - return [ - ("", "USAGE", "DATABASE", "DB1"), - ("", "USAGE", "SCHEMA", "DB1.SCHEMA1"), - ("", "SELECT", "TABLE", "DB1.SCHEMA1.TABLE1"), - ("", "USAGE", "ROLE", "TEST_USAGE_ROLE"), - ] - elif query == 'show grants to role "PUBLIC"': - return [] - elif query == 'show grants to role "TEST_USAGE_ROLE"': - return [ - ["", "USAGE", "DATABASE", "SNOWFLAKE"], - ["", "USAGE", "SCHEMA", "ACCOUNT_USAGE"], - ["", "USAGE", "VIEW", "SNOWFLAKE.ACCOUNT_USAGE.QUERY_HISTORY"], - ["", "USAGE", "VIEW", "SNOWFLAKE.ACCOUNT_USAGE.ACCESS_HISTORY"], - ["", "USAGE", "VIEW", "SNOWFLAKE.ACCOUNT_USAGE.OBJECT_DEPENDENCIES"], - ] - # Unreachable code - raise Exception() - - setup_mock_connect(mock_connect, query_results) - - config = { - "username": "user", - "password": "password", - "account_id": "missing", - "warehouse": "COMPUTE_WH", - "role": "sysadmin", - } - report = SnowflakeSource.test_connection(config) - assert report is not None - assert report.basic_connectivity - assert report.basic_connectivity.capable - assert report.basic_connectivity.failure_reason is None - assert report.capability_report - - assert report.capability_report[SourceCapability.CONTAINERS].capable - assert report.capability_report[SourceCapability.SCHEMA_METADATA].capable - assert report.capability_report[SourceCapability.DATA_PROFILING].capable - assert report.capability_report[SourceCapability.DESCRIPTIONS].capable - assert report.capability_report[SourceCapability.LINEAGE_COARSE].capable diff --git a/metadata-integration/java/datahub-protobuf/README.md b/metadata-integration/java/datahub-protobuf/README.md index ee9cac40fadde..daea8d438679c 100644 --- a/metadata-integration/java/datahub-protobuf/README.md +++ b/metadata-integration/java/datahub-protobuf/README.md @@ -623,17 +623,17 @@ usage: Proto2DataHub You can run it like a standard java jar application: ```shell -java -jar build/libs/datahub-protobuf-0.8.45-SNAPSHOT.jar --descriptor ../datahub-protobuf-example/build/descriptors/main.dsc --directory ../datahub-protobuf-example/schema/protobuf/v1/clickstream/ --transport rest +java -jar build/libs/datahub-protobuf-0.8.45-SNAPSHOT.jar --descriptor ../datahub-protobuf-example/build/descriptors/main.dsc --directory ../datahub-protobuf-example/schema/protobuf/ --transport rest ``` or using gradle ```shell -../../../gradlew run --args="--descriptor ../datahub-protobuf-example/build/descriptors/main.dsc --directory ../datahub-protobuf-example/schema/protobuf/v1/clickstream/ --transport rest" +../../../gradlew run --args="--descriptor ../datahub-protobuf-example/build/descriptors/main.dsc --directory ../datahub-protobuf-example/schema/protobuf/ --transport rest" ``` Result: ``` -java -jar build/libs/datahub-protobuf-0.8.45-SNAPSHOT.jar --descriptor ../datahub-protobuf-example/build/descriptors/main.dsc --directory ../datahub-protobuf-example/schema/protobuf/v1/clickstream/ --transport rest +java -jar build/libs/datahub-protobuf-0.8.45-SNAPSHOT.jar --descriptor ../datahub-protobuf-example/build/descriptors/main.dsc --directory ../datahub-protobuf-example/schema/protobuf/ --transport rest SLF4J: Failed to load class "org.slf4j.impl.StaticLoggerBinder". SLF4J: Defaulting to no-operation (NOP) logger implementation SLF4J: See http://www.slf4j.org/codes.html#StaticLoggerBinder for further details. @@ -667,4 +667,4 @@ export DATAHUB_TOKEN=... # publishSchema task will publish all the protobuf files into DataHub ./gradlew publishSchema -``` \ No newline at end of file +``` diff --git a/metadata-integration/java/datahub-protobuf/build.gradle b/metadata-integration/java/datahub-protobuf/build.gradle index 7f3616840b9d8..c02892e1c3976 100644 --- a/metadata-integration/java/datahub-protobuf/build.gradle +++ b/metadata-integration/java/datahub-protobuf/build.gradle @@ -34,7 +34,7 @@ dependencies { implementation externalDependency.jgrapht implementation externalDependency.gson implementation externalDependency.commonsCli - + implementation externalDependency.httpAsyncClient implementation externalDependency.slf4jApi compileOnly externalDependency.lombok annotationProcessor externalDependency.lombok @@ -110,7 +110,7 @@ shadowJar { relocate 'javassist', 'datahub.shaded.javassist' relocate 'edu.umd.cs.findbugs', 'datahub.shaded.findbugs' relocate 'org.antlr', 'datahub.shaded.org.antlr' - relocate 'antlr', 'datahub.shaded.antlr' + relocate 'antlr', 'datahub.shaded.antlr' relocate 'org.apache.commons', 'datahub.shaded.org.apache.commons' relocate 'org.apache.http', 'datahub.shaded.org.apache.http' relocate 'org.reflections', 'datahub.shaded.org.reflections' diff --git a/metadata-io/src/main/java/com/linkedin/metadata/config/AuthPluginConfiguration.java b/metadata-io/src/main/java/com/linkedin/metadata/config/AuthPluginConfiguration.java new file mode 100644 index 0000000000000..6a5c13970517a --- /dev/null +++ b/metadata-io/src/main/java/com/linkedin/metadata/config/AuthPluginConfiguration.java @@ -0,0 +1,12 @@ +package com.linkedin.metadata.config; + +import lombok.Data; + + +@Data +public class AuthPluginConfiguration { + /** + * Plugin base directory path, default to /etc/datahub/plugins/auth + */ + String path; +} \ No newline at end of file diff --git a/metadata-io/src/main/java/com/linkedin/metadata/config/DatahubConfiguration.java b/metadata-io/src/main/java/com/linkedin/metadata/config/DataHubConfiguration.java similarity index 79% rename from metadata-io/src/main/java/com/linkedin/metadata/config/DatahubConfiguration.java rename to metadata-io/src/main/java/com/linkedin/metadata/config/DataHubConfiguration.java index 0f267760daedf..2374686b76d01 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/config/DatahubConfiguration.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/config/DataHubConfiguration.java @@ -5,9 +5,11 @@ * POJO representing the "datahub" configuration block in application.yml. */ @Data -public class DatahubConfiguration { +public class DataHubConfiguration { /** * Indicates the type of server that has been deployed: quickstart, prod, or a custom configuration */ public String serverType; + + private PluginConfiguration plugin; } \ No newline at end of file diff --git a/metadata-io/src/main/java/com/linkedin/metadata/config/EntityRegistryPluginConfiguration.java b/metadata-io/src/main/java/com/linkedin/metadata/config/EntityRegistryPluginConfiguration.java new file mode 100644 index 0000000000000..4b00346a469c3 --- /dev/null +++ b/metadata-io/src/main/java/com/linkedin/metadata/config/EntityRegistryPluginConfiguration.java @@ -0,0 +1,9 @@ +package com.linkedin.metadata.config; + +import lombok.Data; + + +@Data +public class EntityRegistryPluginConfiguration { + String path; +} \ No newline at end of file diff --git a/metadata-io/src/main/java/com/linkedin/metadata/config/PluginConfiguration.java b/metadata-io/src/main/java/com/linkedin/metadata/config/PluginConfiguration.java new file mode 100644 index 0000000000000..0645c1d7ea96c --- /dev/null +++ b/metadata-io/src/main/java/com/linkedin/metadata/config/PluginConfiguration.java @@ -0,0 +1,29 @@ +package com.linkedin.metadata.config; + +import lombok.Data; + + +@Data +public class PluginConfiguration { + /** + * Plugin security mode, either RESTRICTED or LENIENT + * + * Note: Ideally the pluginSecurityMode should be of type com.datahub.plugin.common.SecurityMode from metadata-service/plugin, + * However avoiding to include metadata-service/plugin as dependency in this module (i.e. metadata-io) as some modules + * from metadata-service/ are dependent on metadata-io, so it might create a circular dependency + */ + private String pluginSecurityMode; + + /** + * Directory path of entity registry, default to /etc/datahub/plugins/models + */ + private EntityRegistryPluginConfiguration entityRegistry; + /** + * The location where the Retention config files live + */ + private RetentionPluginConfiguration retention; + /** + * Plugin framework's plugin base directory path, default to /etc/datahub/plugins/auth + */ + private AuthPluginConfiguration auth; +} \ No newline at end of file diff --git a/metadata-io/src/main/java/com/linkedin/metadata/config/RetentionPluginConfiguration.java b/metadata-io/src/main/java/com/linkedin/metadata/config/RetentionPluginConfiguration.java new file mode 100644 index 0000000000000..dde7ede34c659 --- /dev/null +++ b/metadata-io/src/main/java/com/linkedin/metadata/config/RetentionPluginConfiguration.java @@ -0,0 +1,9 @@ +package com.linkedin.metadata.config; + +import lombok.Data; + + +@Data +public class RetentionPluginConfiguration { + String path; +} \ No newline at end of file diff --git a/metadata-io/src/main/java/com/linkedin/metadata/graph/Edge.java b/metadata-io/src/main/java/com/linkedin/metadata/graph/Edge.java index 853f98ef9d651..11e93b233b4bd 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/graph/Edge.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/graph/Edge.java @@ -11,4 +11,8 @@ public class Edge { private Urn source; private Urn destination; private String relationshipType; + private Long createdOn; + private Urn createdActor; + private Long updatedOn; + private Urn updatedActor; } diff --git a/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ElasticSearchGraphService.java b/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ElasticSearchGraphService.java index a32ff7e8efb2f..074d552418a98 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ElasticSearchGraphService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ElasticSearchGraphService.java @@ -76,6 +76,18 @@ private String toDocument(@Nonnull final Edge edge) { searchDocument.set("source", sourceObject); searchDocument.set("destination", destinationObject); searchDocument.put("relationshipType", edge.getRelationshipType()); + if (edge.getCreatedOn() != null) { + searchDocument.put("createdOn", edge.getCreatedOn()); + } + if (edge.getCreatedActor() != null) { + searchDocument.put("createdActor", edge.getCreatedActor().toString()); + } + if (edge.getUpdatedOn() != null) { + searchDocument.put("updatedOn", edge.getUpdatedOn()); + } + if (edge.getUpdatedActor() != null) { + searchDocument.put("updatedActor", edge.getUpdatedActor().toString()); + } return searchDocument.toString(); } diff --git a/metadata-io/src/test/java/com/linkedin/metadata/graph/GraphServiceTestBase.java b/metadata-io/src/test/java/com/linkedin/metadata/graph/GraphServiceTestBase.java index f0cbdfdad36a5..7e08cf12f9ef6 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/graph/GraphServiceTestBase.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/graph/GraphServiceTestBase.java @@ -202,17 +202,17 @@ protected GraphService getPopulatedGraphService() throws Exception { GraphService service = getGraphService(); List edges = Arrays.asList( - new Edge(datasetTwoUrn, datasetOneUrn, downstreamOf), - new Edge(datasetThreeUrn, datasetTwoUrn, downstreamOf), - new Edge(datasetFourUrn, datasetTwoUrn, downstreamOf), + new Edge(datasetTwoUrn, datasetOneUrn, downstreamOf, null, null, null, null), + new Edge(datasetThreeUrn, datasetTwoUrn, downstreamOf, null, null, null, null), + new Edge(datasetFourUrn, datasetTwoUrn, downstreamOf, null, null, null, null), - new Edge(datasetOneUrn, userOneUrn, hasOwner), - new Edge(datasetTwoUrn, userOneUrn, hasOwner), - new Edge(datasetThreeUrn, userTwoUrn, hasOwner), - new Edge(datasetFourUrn, userTwoUrn, hasOwner), + new Edge(datasetOneUrn, userOneUrn, hasOwner, null, null, null, null), + new Edge(datasetTwoUrn, userOneUrn, hasOwner, null, null, null, null), + new Edge(datasetThreeUrn, userTwoUrn, hasOwner, null, null, null, null), + new Edge(datasetFourUrn, userTwoUrn, hasOwner, null, null, null, null), - new Edge(userOneUrn, userTwoUrn, knowsUser), - new Edge(userTwoUrn, userOneUrn, knowsUser) + new Edge(userOneUrn, userTwoUrn, knowsUser, null, null, null, null), + new Edge(userTwoUrn, userOneUrn, knowsUser, null, null, null, null) ); edges.forEach(service::addEdge); @@ -225,25 +225,25 @@ protected GraphService getLineagePopulatedGraphService() throws Exception { GraphService service = getGraphService(); List edges = Arrays.asList( - new Edge(datasetTwoUrn, datasetOneUrn, downstreamOf), - new Edge(datasetThreeUrn, datasetTwoUrn, downstreamOf), - new Edge(datasetFourUrn, datasetTwoUrn, downstreamOf), - - new Edge(datasetOneUrn, userOneUrn, hasOwner), - new Edge(datasetTwoUrn, userOneUrn, hasOwner), - new Edge(datasetThreeUrn, userTwoUrn, hasOwner), - new Edge(datasetFourUrn, userTwoUrn, hasOwner), - - new Edge(userOneUrn, userTwoUrn, knowsUser), - new Edge(userTwoUrn, userOneUrn, knowsUser), - - new Edge(dataJobOneUrn, datasetOneUrn, consumes), - new Edge(dataJobOneUrn, datasetTwoUrn, consumes), - new Edge(dataJobOneUrn, datasetThreeUrn, produces), - new Edge(dataJobOneUrn, datasetFourUrn, produces), - new Edge(dataJobTwoUrn, datasetOneUrn, consumes), - new Edge(dataJobTwoUrn, datasetTwoUrn, consumes), - new Edge(dataJobTwoUrn, dataJobOneUrn, downstreamOf) + new Edge(datasetTwoUrn, datasetOneUrn, downstreamOf, null, null, null, null), + new Edge(datasetThreeUrn, datasetTwoUrn, downstreamOf, null, null, null, null), + new Edge(datasetFourUrn, datasetTwoUrn, downstreamOf, null, null, null, null), + + new Edge(datasetOneUrn, userOneUrn, hasOwner, null, null, null, null), + new Edge(datasetTwoUrn, userOneUrn, hasOwner, null, null, null, null), + new Edge(datasetThreeUrn, userTwoUrn, hasOwner, null, null, null, null), + new Edge(datasetFourUrn, userTwoUrn, hasOwner, null, null, null, null), + + new Edge(userOneUrn, userTwoUrn, knowsUser, null, null, null, null), + new Edge(userTwoUrn, userOneUrn, knowsUser, null, null, null, null), + + new Edge(dataJobOneUrn, datasetOneUrn, consumes, null, null, null, null), + new Edge(dataJobOneUrn, datasetTwoUrn, consumes, null, null, null, null), + new Edge(dataJobOneUrn, datasetThreeUrn, produces, null, null, null, null), + new Edge(dataJobOneUrn, datasetFourUrn, produces, null, null, null, null), + new Edge(dataJobTwoUrn, datasetOneUrn, consumes, null, null, null, null), + new Edge(dataJobTwoUrn, datasetTwoUrn, consumes, null, null, null, null), + new Edge(dataJobTwoUrn, dataJobOneUrn, downstreamOf, null, null, null, null) ); edges.forEach(service::addEdge); @@ -295,24 +295,24 @@ public Object[][] getAddEdgeTests() { Arrays.asList() }, new Object[]{ - Arrays.asList(new Edge(datasetOneUrn, datasetTwoUrn, downstreamOf)), + Arrays.asList(new Edge(datasetOneUrn, datasetTwoUrn, downstreamOf, null, null, null, null)), Arrays.asList(downstreamOfDatasetTwoRelatedEntity), Arrays.asList(downstreamOfDatasetOneRelatedEntity) }, new Object[]{ Arrays.asList( - new Edge(datasetOneUrn, datasetTwoUrn, downstreamOf), - new Edge(datasetTwoUrn, datasetThreeUrn, downstreamOf) + new Edge(datasetOneUrn, datasetTwoUrn, downstreamOf, null, null, null, null), + new Edge(datasetTwoUrn, datasetThreeUrn, downstreamOf, null, null, null, null) ), Arrays.asList(downstreamOfDatasetTwoRelatedEntity, downstreamOfDatasetThreeRelatedEntity), Arrays.asList(downstreamOfDatasetOneRelatedEntity, downstreamOfDatasetTwoRelatedEntity) }, new Object[]{ Arrays.asList( - new Edge(datasetOneUrn, datasetTwoUrn, downstreamOf), - new Edge(datasetOneUrn, userOneUrn, hasOwner), - new Edge(datasetTwoUrn, userTwoUrn, hasOwner), - new Edge(userOneUrn, userTwoUrn, knowsUser) + new Edge(datasetOneUrn, datasetTwoUrn, downstreamOf, null, null, null, null), + new Edge(datasetOneUrn, userOneUrn, hasOwner, null, null, null, null), + new Edge(datasetTwoUrn, userTwoUrn, hasOwner, null, null, null, null), + new Edge(userOneUrn, userTwoUrn, knowsUser, null, null, null, null) ), Arrays.asList( downstreamOfDatasetTwoRelatedEntity, @@ -328,9 +328,9 @@ public Object[][] getAddEdgeTests() { }, new Object[]{ Arrays.asList( - new Edge(userOneUrn, userOneUrn, knowsUser), - new Edge(userOneUrn, userOneUrn, knowsUser), - new Edge(userOneUrn, userOneUrn, knowsUser) + new Edge(userOneUrn, userOneUrn, knowsUser, null, null, null, null), + new Edge(userOneUrn, userOneUrn, knowsUser, null, null, null, null), + new Edge(userOneUrn, userOneUrn, knowsUser, null, null, null, null) ), Arrays.asList(knowsUserOneRelatedEntity), Arrays.asList(knowsUserOneRelatedEntity) @@ -922,12 +922,12 @@ public void testFindRelatedEntitiesNullSourceType() throws Exception { doTestFindRelatedEntitiesEntityType(anyType, ImmutableList.of("null"), downstreamOf, outgoingRelationships, service); doTestFindRelatedEntitiesEntityType(anyType, null, downstreamOf, outgoingRelationships, service); - service.addEdge(new Edge(datasetTwoUrn, datasetOneUrn, downstreamOf)); + service.addEdge(new Edge(datasetTwoUrn, datasetOneUrn, downstreamOf, null, null, null, null)); syncAfterWrite(); doTestFindRelatedEntitiesEntityType(anyType, ImmutableList.of("null"), downstreamOf, outgoingRelationships, service); doTestFindRelatedEntitiesEntityType(anyType, null, downstreamOf, outgoingRelationships, service, downstreamOfDatasetOneRelatedEntity); - service.addEdge(new Edge(datasetOneUrn, nullUrn, downstreamOf)); + service.addEdge(new Edge(datasetOneUrn, nullUrn, downstreamOf, null, null, null, null)); syncAfterWrite(); doTestFindRelatedEntitiesEntityType(anyType, ImmutableList.of("null"), downstreamOf, outgoingRelationships, service, nullRelatedEntity); doTestFindRelatedEntitiesEntityType(anyType, null, downstreamOf, outgoingRelationships, service, nullRelatedEntity, downstreamOfDatasetOneRelatedEntity); @@ -944,12 +944,12 @@ public void testFindRelatedEntitiesNullDestinationType() throws Exception { doTestFindRelatedEntitiesEntityType(anyType, ImmutableList.of("null"), downstreamOf, outgoingRelationships, service); doTestFindRelatedEntitiesEntityType(anyType, null, downstreamOf, outgoingRelationships, service); - service.addEdge(new Edge(datasetTwoUrn, datasetOneUrn, downstreamOf)); + service.addEdge(new Edge(datasetTwoUrn, datasetOneUrn, downstreamOf, null, null, null, null)); syncAfterWrite(); doTestFindRelatedEntitiesEntityType(anyType, ImmutableList.of("null"), downstreamOf, outgoingRelationships, service); doTestFindRelatedEntitiesEntityType(anyType, null, downstreamOf, outgoingRelationships, service, downstreamOfDatasetOneRelatedEntity); - service.addEdge(new Edge(datasetOneUrn, nullUrn, downstreamOf)); + service.addEdge(new Edge(datasetOneUrn, nullUrn, downstreamOf, null, null, null, null)); syncAfterWrite(); doTestFindRelatedEntitiesEntityType(anyType, ImmutableList.of("null"), downstreamOf, outgoingRelationships, service, nullRelatedEntity); doTestFindRelatedEntitiesEntityType(anyType, null, downstreamOf, outgoingRelationships, service, nullRelatedEntity, downstreamOfDatasetOneRelatedEntity); @@ -1424,7 +1424,7 @@ private List getFullyConnectedGraph(int nodes, List relationshipTy int destinationType = destinationNode % 3; Urn destination = createFromString("urn:li:type" + destinationType + ":(urn:li:node" + destinationNode + ")"); - edges.add(new Edge(source, destination, relationship)); + edges.add(new Edge(source, destination, relationship, null, null, null, null)); } } } diff --git a/metadata-io/src/test/java/com/linkedin/metadata/graph/elastic/ElasticSearchGraphServiceTest.java b/metadata-io/src/test/java/com/linkedin/metadata/graph/elastic/ElasticSearchGraphServiceTest.java index 9f812dbf65fc5..b210bedbff84d 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/graph/elastic/ElasticSearchGraphServiceTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/graph/elastic/ElasticSearchGraphServiceTest.java @@ -188,7 +188,7 @@ public void testRemoveEdgesFromNodeNoRelationshipTypes() { public void testRemoveEdge() throws Exception { DatasetUrn datasetUrn = new DatasetUrn(new DataPlatformUrn("snowflake"), "test", FabricType.TEST); TagUrn tagUrn = new TagUrn("newTag"); - Edge edge = new Edge(datasetUrn, tagUrn, TAG_RELATIONSHIP); + Edge edge = new Edge(datasetUrn, tagUrn, TAG_RELATIONSHIP, null, null, null, null); getGraphService().addEdge(edge); syncAfterWrite(); RelatedEntitiesResult result = getGraphService().findRelatedEntities(Collections.singletonList(datasetType), diff --git a/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/GraphIndexUtils.java b/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/GraphIndexUtils.java new file mode 100644 index 0000000000000..9f071bc467ed0 --- /dev/null +++ b/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/GraphIndexUtils.java @@ -0,0 +1,129 @@ +package com.linkedin.metadata.kafka.hook; + +import com.datahub.util.RecordUtils; +import com.linkedin.common.urn.Urn; +import com.linkedin.data.schema.PathSpec; +import com.linkedin.data.template.RecordTemplate; +import com.linkedin.metadata.graph.Edge; +import com.linkedin.metadata.models.RelationshipFieldSpec; +import com.linkedin.mxe.MetadataChangeLog; +import lombok.extern.slf4j.Slf4j; + +import javax.annotation.Nonnull; +import javax.annotation.Nullable; +import java.net.URISyntaxException; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.Optional; + +@Slf4j +public class GraphIndexUtils { + + private GraphIndexUtils() { } + + @Nullable + private static List getActorList(@Nullable final String path, @Nonnull final RecordTemplate aspect) { + if (path == null) { + return null; + } + final PathSpec actorPathSpec = new PathSpec(path.split("/")); + final Optional value = RecordUtils.getFieldValue(aspect, actorPathSpec); + return (List) value.orElse(null); + } + + @Nullable + private static List getTimestampList(@Nullable final String path, @Nonnull final RecordTemplate aspect) { + if (path == null) { + return null; + } + final PathSpec timestampPathSpec = new PathSpec(path.split("/")); + final Optional value = RecordUtils.getFieldValue(aspect, timestampPathSpec); + return (List) value.orElse(null); + } + + @Nullable + private static boolean isValueListValid(@Nullable final List entryList, final int valueListSize) { + if (entryList == null) { + log.warn("Unable to get entry as entryList is null"); + return false; + } + if (valueListSize != entryList.size()) { + log.warn("Unable to get entry for graph edge as values list and entry list have differing sizes"); + return false; + } + return true; + } + + @Nullable + private static Long getTimestamp(@Nullable final List timestampList, final int index, final int valueListSize) { + if (isValueListValid(timestampList, valueListSize)) { + return timestampList.get(index); + } + return null; + } + + @Nullable + private static Urn getActor(@Nullable final List actorList, final int index, final int valueListSize) { + if (isValueListValid(actorList, valueListSize)) { + return actorList.get(index); + } + return null; + } + + /** + * Used to create new edges for the graph db, adding all the metadata associated with each edge based on the aspect. + * Returns a list of Edges to be consumed by the graph service. + */ + @Nonnull + public static List extractGraphEdges( + @Nonnull final Map.Entry> extractedFieldsEntry, + @Nonnull final RecordTemplate aspect, + @Nonnull final Urn urn, + @Nonnull final MetadataChangeLog event + ) { + final List edgesToAdd = new ArrayList<>(); + final String createdOnPath = extractedFieldsEntry.getKey().getRelationshipAnnotation().getCreatedOn(); + final String createdActorPath = extractedFieldsEntry.getKey().getRelationshipAnnotation().getCreatedActor(); + final String updatedOnPath = extractedFieldsEntry.getKey().getRelationshipAnnotation().getUpdatedOn(); + final String updatedActorPath = extractedFieldsEntry.getKey().getRelationshipAnnotation().getUpdatedActor(); + + final List createdOnList = getTimestampList(createdOnPath, aspect); + final List createdActorList = getActorList(createdActorPath, aspect); + final List updatedOnList = getTimestampList(updatedOnPath, aspect); + final List updatedActorList = getActorList(updatedActorPath, aspect); + + int index = 0; + for (Object fieldValue : extractedFieldsEntry.getValue()) { + Long createdOn = getTimestamp(createdOnList, index, extractedFieldsEntry.getValue().size()); + Urn createdActor = getActor(createdActorList, index, extractedFieldsEntry.getValue().size()); + final Long updatedOn = getTimestamp(updatedOnList, index, extractedFieldsEntry.getValue().size()); + final Urn updatedActor = getActor(updatedActorList, index, extractedFieldsEntry.getValue().size()); + + if (createdOn == null && event.hasSystemMetadata()) { + createdOn = event.getSystemMetadata().getLastObserved(); + } + if (createdActor == null && event.hasCreated()) { + createdActor = event.getCreated().getActor(); + } + + try { + edgesToAdd.add( + new Edge( + urn, + Urn.createFromString(fieldValue.toString()), + extractedFieldsEntry.getKey().getRelationshipName(), + createdOn, + createdActor, + updatedOn, + updatedActor + ) + ); + } catch (URISyntaxException e) { + log.error("Invalid destination urn: {}", fieldValue.toString(), e); + } + index++; + } + return edgesToAdd; + } +} diff --git a/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/UpdateIndicesHook.java b/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/UpdateIndicesHook.java index 9ec1d755a7afb..bf06e99560715 100644 --- a/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/UpdateIndicesHook.java +++ b/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/UpdateIndicesHook.java @@ -43,7 +43,6 @@ import com.linkedin.mxe.SystemMetadata; import com.linkedin.util.Pair; import java.io.UnsupportedEncodingException; -import java.net.URISyntaxException; import java.net.URLEncoder; import java.util.ArrayList; import java.util.HashMap; @@ -170,9 +169,9 @@ private void handleUpdateChangeEvent(@Nonnull final MetadataChangeLog event) { // Step 2. For all aspects, attempt to update Graph if (_diffMode) { - updateGraphServiceDiff(urn, aspectSpec, previousAspect, aspect); + updateGraphServiceDiff(urn, aspectSpec, previousAspect, aspect, event); } else { - updateGraphService(urn, aspectSpec, aspect); + updateGraphService(urn, aspectSpec, aspect, event); } } @@ -212,11 +211,12 @@ private void handleDeleteChangeEvent(@Nonnull final MetadataChangeLog event) { if (!aspectSpec.isTimeseries()) { deleteSystemMetadata(urn, aspectSpec, isDeletingKey); - deleteGraphData(urn, aspectSpec, aspect, isDeletingKey); + deleteGraphData(urn, aspectSpec, aspect, isDeletingKey, event); deleteSearchData(urn, entitySpec.getName(), aspectSpec, aspect, isDeletingKey); } } + // TODO: remove this method once we implement sourceOverride when creating graph edges private void updateFineGrainedEdgesAndRelationships( RecordTemplate aspect, List edgesToAdd, @@ -231,7 +231,8 @@ private void updateFineGrainedEdgesAndRelationships( // for every downstream, create an edge with each of the upstreams for (Urn downstream : fineGrainedLineage.getDownstreams()) { for (Urn upstream : fineGrainedLineage.getUpstreams()) { - edgesToAdd.add(new Edge(downstream, upstream, DOWNSTREAM_OF)); + // TODO: add edges uniformly across aspects + edgesToAdd.add(new Edge(downstream, upstream, DOWNSTREAM_OF, null, null, null, null)); Set relationshipTypes = urnToRelationshipTypesBeingAdded.getOrDefault(downstream, new HashSet<>()); relationshipTypes.add(DOWNSTREAM_OF); urnToRelationshipTypesBeingAdded.put(downstream, relationshipTypes); @@ -248,6 +249,7 @@ private Urn generateSchemaFieldUrn(@Nonnull final String resourceUrn, @Nonnull f return EntityKeyUtils.convertEntityKeyToUrn(key, Constants.SCHEMA_FIELD_ENTITY_NAME); } + // TODO: remove this method once we implement sourceOverride and update inputFields aspect private void updateInputFieldEdgesAndRelationships( @Nonnull final Urn urn, @Nonnull final InputFields inputFields, @@ -258,7 +260,8 @@ private void updateInputFieldEdgesAndRelationships( for (final InputField field : inputFields.getFields()) { if (field.hasSchemaFieldUrn() && field.hasSchemaField() && field.getSchemaField().hasFieldPath()) { final Urn sourceFieldUrn = generateSchemaFieldUrn(urn.toString(), field.getSchemaField().getFieldPath()); - edgesToAdd.add(new Edge(sourceFieldUrn, field.getSchemaFieldUrn(), DOWNSTREAM_OF)); + // TODO: add edges uniformly across aspects + edgesToAdd.add(new Edge(sourceFieldUrn, field.getSchemaFieldUrn(), DOWNSTREAM_OF, null, null, null, null)); final Set relationshipTypes = urnToRelationshipTypesBeingAdded.getOrDefault(sourceFieldUrn, new HashSet<>()); relationshipTypes.add(DOWNSTREAM_OF); urnToRelationshipTypesBeingAdded.put(sourceFieldUrn, relationshipTypes); @@ -267,7 +270,12 @@ private void updateInputFieldEdgesAndRelationships( } } - private Pair, HashMap>> getEdgesAndRelationshipTypesFromAspect(Urn urn, AspectSpec aspectSpec, @Nonnull RecordTemplate aspect) { + private Pair, HashMap>> getEdgesAndRelationshipTypesFromAspect( + @Nonnull final Urn urn, + @Nonnull final AspectSpec aspectSpec, + @Nonnull final RecordTemplate aspect, + @Nonnull final MetadataChangeLog event + ) { final List edgesToAdd = new ArrayList<>(); final HashMap> urnToRelationshipTypesBeingAdded = new HashMap<>(); @@ -288,14 +296,8 @@ private Pair, HashMap>> getEdgesAndRelationshipTypes Set relationshipTypes = urnToRelationshipTypesBeingAdded.getOrDefault(urn, new HashSet<>()); relationshipTypes.add(entry.getKey().getRelationshipName()); urnToRelationshipTypesBeingAdded.put(urn, relationshipTypes); - for (Object fieldValue : entry.getValue()) { - try { - edgesToAdd.add( - new Edge(urn, Urn.createFromString(fieldValue.toString()), entry.getKey().getRelationshipName())); - } catch (URISyntaxException e) { - log.error("Invalid destination urn: {}", fieldValue.toString(), e); - } - } + final List newEdges = GraphIndexUtils.extractGraphEdges(entry, aspect, urn, event); + edgesToAdd.addAll(newEdges); } return Pair.of(edgesToAdd, urnToRelationshipTypesBeingAdded); } @@ -303,9 +305,14 @@ private Pair, HashMap>> getEdgesAndRelationshipTypes /** * Process snapshot and update graph index */ - private void updateGraphService(Urn urn, AspectSpec aspectSpec, RecordTemplate aspect) { + private void updateGraphService( + @Nonnull final Urn urn, + @Nonnull final AspectSpec aspectSpec, + @Nonnull final RecordTemplate aspect, + @Nonnull final MetadataChangeLog event + ) { Pair, HashMap>> edgeAndRelationTypes = - getEdgesAndRelationshipTypesFromAspect(urn, aspectSpec, aspect); + getEdgesAndRelationshipTypesFromAspect(urn, aspectSpec, aspect, event); final List edgesToAdd = edgeAndRelationTypes.getFirst(); final HashMap> urnToRelationshipTypesBeingAdded = edgeAndRelationTypes.getSecond(); @@ -320,17 +327,23 @@ private void updateGraphService(Urn urn, AspectSpec aspectSpec, RecordTemplate a } } - private void updateGraphServiceDiff(Urn urn, AspectSpec aspectSpec, @Nullable RecordTemplate oldAspect, @Nonnull RecordTemplate newAspect) { + private void updateGraphServiceDiff( + @Nonnull final Urn urn, + @Nonnull final AspectSpec aspectSpec, + @Nullable final RecordTemplate oldAspect, + @Nonnull final RecordTemplate newAspect, + @Nonnull final MetadataChangeLog event + ) { Pair, HashMap>> oldEdgeAndRelationTypes = null; if (oldAspect != null) { - oldEdgeAndRelationTypes = getEdgesAndRelationshipTypesFromAspect(urn, aspectSpec, oldAspect); + oldEdgeAndRelationTypes = getEdgesAndRelationshipTypesFromAspect(urn, aspectSpec, oldAspect, event); } final List oldEdges = oldEdgeAndRelationTypes != null ? oldEdgeAndRelationTypes.getFirst() : Collections.emptyList(); final Set oldEdgeSet = new HashSet<>(oldEdges); Pair, HashMap>> newEdgeAndRelationTypes = - getEdgesAndRelationshipTypesFromAspect(urn, aspectSpec, newAspect); + getEdgesAndRelationshipTypesFromAspect(urn, aspectSpec, newAspect, event); final List newEdges = newEdgeAndRelationTypes.getFirst(); final Set newEdgeSet = new HashSet<>(newEdges); @@ -419,14 +432,20 @@ private void deleteSystemMetadata(Urn urn, AspectSpec aspectSpec, Boolean isKeyA } } - private void deleteGraphData(Urn urn, AspectSpec aspectSpec, RecordTemplate aspect, Boolean isKeyAspect) { + private void deleteGraphData( + @Nonnull final Urn urn, + @Nonnull final AspectSpec aspectSpec, + @Nonnull final RecordTemplate aspect, + @Nonnull final Boolean isKeyAspect, + @Nonnull final MetadataChangeLog event + ) { if (isKeyAspect) { _graphService.removeNode(urn); return; } Pair, HashMap>> edgeAndRelationTypes = - getEdgesAndRelationshipTypesFromAspect(urn, aspectSpec, aspect); + getEdgesAndRelationshipTypesFromAspect(urn, aspectSpec, aspect, event); final HashMap> urnToRelationshipTypesBeingAdded = edgeAndRelationTypes.getSecond(); if (urnToRelationshipTypesBeingAdded.size() > 0) { diff --git a/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/GraphIndexUtilsTest.java b/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/GraphIndexUtilsTest.java new file mode 100644 index 0000000000000..40b0670d73f6a --- /dev/null +++ b/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/GraphIndexUtilsTest.java @@ -0,0 +1,119 @@ +package com.linkedin.metadata.kafka.hook; + +import com.linkedin.common.AuditStamp; +import com.linkedin.common.urn.DatasetUrn; +import com.linkedin.common.urn.Urn; +import com.linkedin.common.urn.UrnUtils; +import com.linkedin.data.template.RecordTemplate; +import com.linkedin.dataset.DatasetLineageType; +import com.linkedin.dataset.Upstream; +import com.linkedin.dataset.UpstreamArray; +import com.linkedin.dataset.UpstreamLineage; +import com.linkedin.events.metadata.ChangeType; +import com.linkedin.metadata.Constants; +import com.linkedin.metadata.graph.Edge; +import com.linkedin.metadata.models.AspectSpec; +import com.linkedin.metadata.models.EntitySpec; +import com.linkedin.metadata.models.RelationshipFieldSpec; +import com.linkedin.metadata.models.extractor.FieldExtractor; +import com.linkedin.metadata.models.registry.ConfigEntityRegistry; +import com.linkedin.metadata.models.registry.EntityRegistry; +import com.linkedin.metadata.utils.GenericRecordUtils; +import com.linkedin.mxe.MetadataChangeLog; +import com.linkedin.mxe.SystemMetadata; +import org.testng.Assert; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + +import java.util.ArrayList; +import java.util.List; +import java.util.Map; + +public class GraphIndexUtilsTest { + + private static final String UPSTREAM_RELATIONSHIP_PATH = "/upstreams/*/dataset"; + private static final long CREATED_EVENT_TIME = 123L; + private static final long UPDATED_EVENT_TIME = 234L; + private Urn _datasetUrn; + private DatasetUrn _upstreamDataset1; + private DatasetUrn _upstreamDataset2; + private static final String CREATED_ACTOR_URN = "urn:li:corpuser:creating"; + private static final String UPDATED_ACTOR_URN = "urn:li:corpuser:updating"; + private EntityRegistry _mockRegistry; + private Urn _createdActorUrn; + private Urn _updatedActorUrn; + + @BeforeMethod + public void setupTest() { + _createdActorUrn = UrnUtils.getUrn(CREATED_ACTOR_URN); + _updatedActorUrn = UrnUtils.getUrn(UPDATED_ACTOR_URN); + _datasetUrn = UrnUtils.getUrn("urn:li:dataset:(urn:li:dataPlatform:bigquery,my-proj.jaffle_shop.customers,PROD)"); + _upstreamDataset1 = UrnUtils.toDatasetUrn("snowflake", "test", "DEV"); + _upstreamDataset2 = UrnUtils.toDatasetUrn("snowflake", "test2", "DEV"); + _mockRegistry = new ConfigEntityRegistry( + UpdateIndicesHookTest.class.getClassLoader().getResourceAsStream("test-entity-registry.yml")); + } + + @Test + public void testExtractGraphEdgesDefault() { + UpstreamLineage upstreamLineage = createUpstreamLineage(); + MetadataChangeLog event = createMCL(upstreamLineage); + + EntitySpec entitySpec = _mockRegistry.getEntitySpec(event.getEntityType()); + AspectSpec aspectSpec = entitySpec.getAspectSpec(event.getAspectName()); + + Map> extractedFields = + FieldExtractor.extractFields(upstreamLineage, aspectSpec.getRelationshipFieldSpecs()); + + for (Map.Entry> entry : extractedFields.entrySet()) { + // check specifically for the upstreams relationship entry + if (entry.getKey().getPath().toString().equals(UPSTREAM_RELATIONSHIP_PATH)) { + List edgesToAdd = GraphIndexUtils.extractGraphEdges(entry, upstreamLineage, _datasetUrn, event); + List expectedEdgesToAdd = new ArrayList<>(); + // edges contain default created event time and created actor from system metadata + Edge edge1 = new Edge(_datasetUrn, _upstreamDataset1, entry.getKey().getRelationshipName(), CREATED_EVENT_TIME, _createdActorUrn, null, null); + Edge edge2 = new Edge(_datasetUrn, _upstreamDataset2, entry.getKey().getRelationshipName(), CREATED_EVENT_TIME, _createdActorUrn, null, null); + expectedEdgesToAdd.add(edge1); + expectedEdgesToAdd.add(edge2); + Assert.assertEquals(expectedEdgesToAdd.size(), edgesToAdd.size()); + Assert.assertTrue(edgesToAdd.containsAll(expectedEdgesToAdd)); + Assert.assertTrue(expectedEdgesToAdd.containsAll(edgesToAdd)); + } + } + } + + private UpstreamLineage createUpstreamLineage() { + UpstreamLineage upstreamLineage = new UpstreamLineage(); + UpstreamArray upstreams = new UpstreamArray(); + Upstream upstream1 = new Upstream(); + upstream1.setDataset(_upstreamDataset1); + upstream1.setAuditStamp(new AuditStamp().setActor(_updatedActorUrn).setTime(UPDATED_EVENT_TIME)); + upstream1.setType(DatasetLineageType.TRANSFORMED); + Upstream upstream2 = new Upstream(); + upstream2.setDataset(_upstreamDataset2); + upstream2.setAuditStamp(new AuditStamp().setActor(_updatedActorUrn).setTime(UPDATED_EVENT_TIME)); + upstream2.setType(DatasetLineageType.TRANSFORMED); + upstreams.add(upstream1); + upstreams.add(upstream2); + upstreamLineage.setUpstreams(upstreams); + + return upstreamLineage; + } + + private MetadataChangeLog createMCL(RecordTemplate aspect) { + MetadataChangeLog event = new MetadataChangeLog(); + event.setEntityType(Constants.DATASET_ENTITY_NAME); + event.setAspectName(Constants.UPSTREAM_LINEAGE_ASPECT_NAME); + event.setChangeType(ChangeType.UPSERT); + + event.setAspect(GenericRecordUtils.serializeAspect(aspect)); + event.setEntityUrn(_datasetUrn); + + SystemMetadata systemMetadata = new SystemMetadata(); + systemMetadata.setLastObserved(CREATED_EVENT_TIME); + event.setSystemMetadata(systemMetadata); + event.setCreated(new AuditStamp().setActor(_createdActorUrn).setTime(CREATED_EVENT_TIME)); + + return event; + } +} diff --git a/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/UpdateIndicesHookTest.java b/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/UpdateIndicesHookTest.java index d9feb9c1dc47e..1aafd854b8aae 100644 --- a/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/UpdateIndicesHookTest.java +++ b/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/UpdateIndicesHookTest.java @@ -89,7 +89,7 @@ public void testFineGrainedLineageEdgesAreAdded() throws Exception { MetadataChangeLog event = createUpstreamLineageMCL(upstreamUrn, downstreamUrn); _updateIndicesHook.invoke(event); - Edge edge = new Edge(downstreamUrn, upstreamUrn, DOWNSTREAM_OF); + Edge edge = new Edge(downstreamUrn, upstreamUrn, DOWNSTREAM_OF, null, null, null, null); Mockito.verify(_mockGraphService, Mockito.times(1)).addEdge(Mockito.eq(edge)); Mockito.verify(_mockGraphService, Mockito.times(1)).removeEdgesFromNode( Mockito.eq(downstreamUrn), @@ -117,7 +117,7 @@ public void testInputFieldsEdgesAreAdded() throws Exception { Urn downstreamUrn = UrnUtils.getUrn(String.format("urn:li:schemaField:(%s,%s)", TEST_CHART_URN, downstreamFieldPath)); - Edge edge = new Edge(downstreamUrn, upstreamUrn, DOWNSTREAM_OF); + Edge edge = new Edge(downstreamUrn, upstreamUrn, DOWNSTREAM_OF, null, null, null, null); Mockito.verify(_mockGraphService, Mockito.times(1)).addEdge(Mockito.eq(edge)); Mockito.verify(_mockGraphService, Mockito.times(1)).removeEdgesFromNode( Mockito.eq(downstreamUrn), diff --git a/metadata-models/src/main/pegasus/com/linkedin/metadata/key/DataHubStepStateKey.pdl b/metadata-models/src/main/pegasus/com/linkedin/metadata/key/DataHubStepStateKey.pdl new file mode 100644 index 0000000000000..65b2bc2f7e351 --- /dev/null +++ b/metadata-models/src/main/pegasus/com/linkedin/metadata/key/DataHubStepStateKey.pdl @@ -0,0 +1,14 @@ +namespace com.linkedin.metadata.key + +/** + * Key for a DataHub Step State + */ +@Aspect = { + "name": "dataHubStepStateKey" +} +record DataHubStepStateKey { + /** + * A unique id for the state + */ + id: string +} \ No newline at end of file diff --git a/metadata-models/src/main/pegasus/com/linkedin/step/DataHubStepStateProperties.pdl b/metadata-models/src/main/pegasus/com/linkedin/step/DataHubStepStateProperties.pdl new file mode 100644 index 0000000000000..bdb2f30abc7e3 --- /dev/null +++ b/metadata-models/src/main/pegasus/com/linkedin/step/DataHubStepStateProperties.pdl @@ -0,0 +1,21 @@ +namespace com.linkedin.step + +import com.linkedin.common.AuditStamp + +/** + * The properties associated with a DataHub step state + */ +@Aspect = { + "name": "dataHubStepStateProperties" +} +record DataHubStepStateProperties { + /** + * Description of the secret + */ + properties: map[string, string] = { } + + /** + * Audit stamp describing the last person to update it. + */ + lastModified: AuditStamp +} \ No newline at end of file diff --git a/metadata-models/src/main/resources/entity-registry.yml b/metadata-models/src/main/resources/entity-registry.yml index 781476873ba0a..52ac991ce2fca 100644 --- a/metadata-models/src/main/resources/entity-registry.yml +++ b/metadata-models/src/main/resources/entity-registry.yml @@ -151,6 +151,7 @@ entities: keyAspect: glossaryNodeKey aspects: - glossaryNodeInfo + - institutionalMemory - ownership - status - name: dataHubIngestionSource @@ -270,4 +271,9 @@ entities: keyAspect: postKey aspects: - postInfo + - name: dataHubStepState + category: core + keyAspect: dataHubStepStateKey + aspects: + - dataHubStepStateProperties events: diff --git a/metadata-service/auth-api/build.gradle b/metadata-service/auth-api/build.gradle index 74d55abd18ba7..2e9210804bed9 100644 --- a/metadata-service/auth-api/build.gradle +++ b/metadata-service/auth-api/build.gradle @@ -2,6 +2,7 @@ apply plugin: 'java' dependencies { compile project(path: ':metadata-models') + compile project(path: ':metadata-auth:auth-api') compile externalDependency.guava implementation externalDependency.slf4jApi compileOnly externalDependency.lombok diff --git a/metadata-service/auth-api/src/main/java/com/datahub/authentication/AuthenticationConfiguration.java b/metadata-service/auth-api/src/main/java/com/datahub/authentication/AuthenticationConfiguration.java index 4c19ff3c733a1..f9cf1b01e1762 100644 --- a/metadata-service/auth-api/src/main/java/com/datahub/authentication/AuthenticationConfiguration.java +++ b/metadata-service/auth-api/src/main/java/com/datahub/authentication/AuthenticationConfiguration.java @@ -13,7 +13,7 @@ public class AuthenticationConfiguration { */ private boolean enabled; /** - * List of configurations for {@link Authenticator}s to be registered + * List of configurations for {@link com.datahub.plugins.auth.authentication.Authenticator}s to be registered */ private List authenticators; /** diff --git a/metadata-service/auth-api/src/main/java/com/datahub/authentication/AuthenticationConstants.java b/metadata-service/auth-api/src/main/java/com/datahub/authentication/AuthenticationConstants.java index 95f4dbcc7d875..96a3f1b8f56bd 100644 --- a/metadata-service/auth-api/src/main/java/com/datahub/authentication/AuthenticationConstants.java +++ b/metadata-service/auth-api/src/main/java/com/datahub/authentication/AuthenticationConstants.java @@ -32,6 +32,6 @@ public class AuthenticationConstants { public static final String ENTITY_SERVICE = "entityService"; public static final String TOKEN_SERVICE = "tokenService"; - - private AuthenticationConstants() { } + private AuthenticationConstants() { + } } diff --git a/metadata-service/auth-api/src/main/java/com/datahub/authentication/AuthenticatorConfiguration.java b/metadata-service/auth-api/src/main/java/com/datahub/authentication/AuthenticatorConfiguration.java index dd7b421b31d86..2d3cf5f588d7d 100644 --- a/metadata-service/auth-api/src/main/java/com/datahub/authentication/AuthenticatorConfiguration.java +++ b/metadata-service/auth-api/src/main/java/com/datahub/authentication/AuthenticatorConfiguration.java @@ -5,12 +5,12 @@ /** - * POJO representing {@link Authenticator} configurations provided in the application.yml. + * POJO representing {@link com.datahub.plugins.auth.authentication.Authenticator} configurations provided in the application.yml. */ @Data public class AuthenticatorConfiguration { /** - * A fully-qualified class name for the {@link Authenticator} implementation to be registered. + * A fully-qualified class name for the {@link com.datahub.plugins.auth.authentication.Authenticator} implementation to be registered. */ private String type; /** diff --git a/metadata-service/auth-api/src/main/java/com/datahub/authorization/AuthorizationConfiguration.java b/metadata-service/auth-api/src/main/java/com/datahub/authorization/AuthorizationConfiguration.java index 72383167edcbd..2770fc5c41aa0 100644 --- a/metadata-service/auth-api/src/main/java/com/datahub/authorization/AuthorizationConfiguration.java +++ b/metadata-service/auth-api/src/main/java/com/datahub/authorization/AuthorizationConfiguration.java @@ -1,8 +1,10 @@ package com.datahub.authorization; +import com.datahub.plugins.auth.authorization.Authorizer; import java.util.List; import lombok.Data; + /** * POJO representing the "authentication" configuration block in application.yml. */ diff --git a/metadata-service/auth-api/src/main/java/com/datahub/authorization/AuthorizerConfiguration.java b/metadata-service/auth-api/src/main/java/com/datahub/authorization/AuthorizerConfiguration.java index b83253cd2af04..65cd6c17c739c 100644 --- a/metadata-service/auth-api/src/main/java/com/datahub/authorization/AuthorizerConfiguration.java +++ b/metadata-service/auth-api/src/main/java/com/datahub/authorization/AuthorizerConfiguration.java @@ -1,5 +1,6 @@ package com.datahub.authorization; +import com.datahub.plugins.auth.authorization.Authorizer; import java.util.Map; import lombok.Data; @@ -21,5 +22,4 @@ public class AuthorizerConfiguration { * A set of authorizer-specific configurations passed through during "init" of the authorizer. */ private Map configs; - } diff --git a/metadata-service/auth-api/src/main/java/com/datahub/authorization/AuthorizerContext.java b/metadata-service/auth-api/src/main/java/com/datahub/authorization/AuthorizerContext.java deleted file mode 100644 index 65c0c08a216ac..0000000000000 --- a/metadata-service/auth-api/src/main/java/com/datahub/authorization/AuthorizerContext.java +++ /dev/null @@ -1,17 +0,0 @@ -package com.datahub.authorization; - -import lombok.AllArgsConstructor; -import lombok.Data; - - -/** - * Context provided to an Authorizer on initialization. - */ -@Data -@AllArgsConstructor -public class AuthorizerContext { - /** - * A utility for resolving a {@link ResourceSpec} to resolved resource field values. - */ - private ResourceSpecResolver resourceSpecResolver; -} diff --git a/metadata-service/auth-filter/build.gradle b/metadata-service/auth-filter/build.gradle index 8de29b75e35a6..569f15befe6d6 100644 --- a/metadata-service/auth-filter/build.gradle +++ b/metadata-service/auth-filter/build.gradle @@ -1,7 +1,8 @@ apply plugin: 'java' dependencies { - compile project(':metadata-service:auth-api'); + compile project(':metadata-auth:auth-api'); + compile project(path: ':metadata-service:auth-api') compile project(path: ':metadata-service:factories') compile externalDependency.servletApi diff --git a/metadata-service/auth-filter/src/main/java/com/datahub/auth/authentication/filter/AuthenticationFilter.java b/metadata-service/auth-filter/src/main/java/com/datahub/auth/authentication/filter/AuthenticationFilter.java new file mode 100644 index 0000000000000..1e875aeb65b31 --- /dev/null +++ b/metadata-service/auth-filter/src/main/java/com/datahub/auth/authentication/filter/AuthenticationFilter.java @@ -0,0 +1,250 @@ +package com.datahub.auth.authentication.filter; + +import com.datahub.authentication.authenticator.AuthenticatorChain; +import com.datahub.authentication.authenticator.DataHubSystemAuthenticator; +import com.datahub.authentication.authenticator.NoOpAuthenticator; +import com.datahub.authentication.token.StatefulTokenService; +import com.datahub.plugins.PluginConstant; +import com.datahub.authentication.Authentication; +import com.datahub.authentication.AuthenticationConfiguration; +import com.datahub.authentication.AuthenticationContext; +import com.datahub.authentication.AuthenticationException; +import com.datahub.authentication.AuthenticationRequest; +import com.datahub.plugins.auth.authentication.Authenticator; +import com.datahub.authentication.AuthenticatorConfiguration; +import com.datahub.authentication.AuthenticatorContext; +import com.datahub.plugins.common.PluginConfig; +import com.datahub.plugins.common.PluginPermissionManager; +import com.datahub.plugins.common.PluginType; +import com.datahub.plugins.common.SecurityMode; +import com.datahub.plugins.configuration.Config; +import com.datahub.plugins.configuration.ConfigProvider; +import com.datahub.plugins.factory.PluginConfigFactory; +import com.datahub.plugins.loader.IsolatedClassLoader; +import com.datahub.plugins.loader.PluginPermissionManagerImpl; +import com.google.common.collect.ImmutableMap; +import com.linkedin.gms.factory.config.ConfigurationProvider; +import com.linkedin.metadata.entity.EntityService; +import java.io.IOException; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.stream.Collectors; +import javax.inject.Inject; +import javax.inject.Named; +import javax.servlet.Filter; +import javax.servlet.FilterChain; +import javax.servlet.FilterConfig; +import javax.servlet.ServletException; +import javax.servlet.ServletRequest; +import javax.servlet.ServletResponse; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; +import lombok.extern.slf4j.Slf4j; +import org.springframework.web.context.support.SpringBeanAutowiringSupport; + +import static com.datahub.authentication.AuthenticationConstants.*; + + +/** + * A servlet {@link Filter} for authenticating requests inbound to the Metadata Service. This filter is applied to the + * GraphQL Servlet, the Rest.li Servlet, and the Auth (token) Servlet. + */ +@Slf4j +public class AuthenticationFilter implements Filter { + + @Inject + private ConfigurationProvider configurationProvider; + + @Inject + @Named("entityService") + private EntityService _entityService; + + @Inject + @Named("dataHubTokenService") + private StatefulTokenService _tokenService; + + private AuthenticatorChain authenticatorChain; + + @Override + public void init(FilterConfig filterConfig) throws ServletException { + SpringBeanAutowiringSupport.processInjectionBasedOnCurrentContext(this); + buildAuthenticatorChain(); + } + + @Override + public void doFilter(ServletRequest request, ServletResponse response, FilterChain chain) + throws IOException, ServletException { + AuthenticationRequest context = buildAuthContext((HttpServletRequest) request); + Authentication authentication = null; + try { + authentication = this.authenticatorChain.authenticate(context); + } catch (AuthenticationException e) { + // For AuthenticationExpiredExceptions, terminate and provide that feedback to the user + log.debug("Failed to authenticate request. Received an AuthenticationExpiredException from authenticator chain.", + e); + ((HttpServletResponse) response).sendError(HttpServletResponse.SC_UNAUTHORIZED, e.getMessage()); + return; + } + + if (authentication != null) { + // Successfully authenticated. + log.debug(String.format("Successfully authenticated request for Actor with type: %s, id: %s", + authentication.getActor().getType(), authentication.getActor().getId())); + AuthenticationContext.setAuthentication(authentication); + chain.doFilter(request, response); + } else { + // Reject request + log.debug("Failed to authenticate request. Received 'null' Authentication value from authenticator chain."); + ((HttpServletResponse) response).sendError(HttpServletResponse.SC_UNAUTHORIZED, + "Unauthorized to perform this action."); + return; + } + AuthenticationContext.remove(); + } + + @Override + public void destroy() { + // Nothing + } + + /** + * Constructs an {@link AuthenticatorChain} via the provided {@link AuthenticationConfiguration}. + * + * The process is simple: For each configured {@link Authenticator}, attempt to instantiate the class using a default (zero-arg) + * constructor, then call it's initialize method passing in a freeform block of associated configurations as a {@link Map}. Finally, + * register the {@link Authenticator} in the authenticator chain. + */ + private void buildAuthenticatorChain() { + + authenticatorChain = new AuthenticatorChain(); + + boolean isAuthEnabled = this.configurationProvider.getAuthentication().isEnabled(); + + // Create authentication context object to pass to authenticator instances. They can use it as needed. + final AuthenticatorContext authenticatorContext = new AuthenticatorContext( + ImmutableMap.of(ENTITY_SERVICE, this._entityService, TOKEN_SERVICE, this._tokenService)); + + if (isAuthEnabled) { + log.info("Auth is enabled. Building authenticator chain..."); + this.registerNativeAuthenticator(authenticatorChain, authenticatorContext); // Register native authenticators + this.registerPlugins(authenticatorChain); // Register plugin authenticators + } else { + // Authentication is not enabled. Populate authenticator chain with a purposely permissive Authenticator. + log.info("Auth is disabled. Building no-op authenticator chain..."); + final NoOpAuthenticator noOpAuthenticator = new NoOpAuthenticator(); + noOpAuthenticator.init( + ImmutableMap.of(SYSTEM_CLIENT_ID_CONFIG, this.configurationProvider.getAuthentication().getSystemClientId()), + authenticatorContext); + authenticatorChain.register(noOpAuthenticator); + } + } + + private AuthenticationRequest buildAuthContext(HttpServletRequest request) { + return new AuthenticationRequest(Collections.list(request.getHeaderNames()) + .stream() + .collect(Collectors.toMap(headerName -> headerName, request::getHeader))); + } + + private void registerPlugins(AuthenticatorChain authenticatorChain) { + // TODO: Introduce plugin factory to reduce duplicate code around authentication and authorization processing + + ClassLoader contextClassLoader = Thread.currentThread().getContextClassLoader(); + Path pluginBaseDirectory = Paths.get(configurationProvider.getDatahub().getPlugin().getAuth().getPath()); + Optional optionalConfig = (new ConfigProvider(pluginBaseDirectory)).load(); + optionalConfig.ifPresent((config) -> { + log.info("Processing authenticator plugin from auth plugin directory {}", pluginBaseDirectory); + PluginConfigFactory authenticatorPluginPluginConfigFactory = + new PluginConfigFactory(config); + + List authorizers = + authenticatorPluginPluginConfigFactory.loadPluginConfigs(PluginType.AUTHENTICATOR); + // Filter enabled authenticator plugins + List enabledAuthenticators = authorizers.stream().filter(pluginConfig -> { + if (!pluginConfig.getEnabled()) { + log.info(String.format("Authenticator %s is not enabled", pluginConfig.getName())); + } + return pluginConfig.getEnabled(); + }).collect(Collectors.toList()); + + SecurityMode securityMode = + SecurityMode.valueOf(this.configurationProvider.getDatahub().getPlugin().getPluginSecurityMode()); + // Create permission manager with security mode + PluginPermissionManager permissionManager = new PluginPermissionManagerImpl(securityMode); + + // Initiate Authenticators + enabledAuthenticators.forEach((pluginConfig) -> { + IsolatedClassLoader isolatedClassLoader = new IsolatedClassLoader(permissionManager, pluginConfig); + // Create context + AuthenticatorContext context = new AuthenticatorContext( + ImmutableMap.of(PluginConstant.PLUGIN_HOME, pluginConfig.getPluginHomeDirectory().toString())); + + try { + Thread.currentThread().setContextClassLoader((ClassLoader) isolatedClassLoader); + Authenticator authenticator = (Authenticator) isolatedClassLoader.instantiatePlugin(Authenticator.class); + log.info("Initializing plugin {}", pluginConfig.getName()); + authenticator.init(pluginConfig.getConfigs().orElse(Collections.emptyMap()), context); + authenticatorChain.register(authenticator); + log.info("Plugin {} is initialized", pluginConfig.getName()); + } catch (ClassNotFoundException e) { + throw new RuntimeException(String.format("Plugin className %s not found", pluginConfig.getClassName()), e); + } finally { + Thread.currentThread().setContextClassLoader(contextClassLoader); + } + }); + }); + } + + private void registerNativeAuthenticator(AuthenticatorChain authenticatorChain, AuthenticatorContext authenticatorContext) { + log.info("Registering native authenticators"); + // Register system authenticator + DataHubSystemAuthenticator systemAuthenticator = new DataHubSystemAuthenticator(); + systemAuthenticator.init( + ImmutableMap.of(SYSTEM_CLIENT_ID_CONFIG, this.configurationProvider.getAuthentication().getSystemClientId(), + SYSTEM_CLIENT_SECRET_CONFIG, this.configurationProvider.getAuthentication().getSystemClientSecret()), + authenticatorContext); + authenticatorChain.register(systemAuthenticator); // Always register authenticator for internal system. + + // Register authenticator define in application.yml + final List authenticatorConfigurations = + this.configurationProvider.getAuthentication().getAuthenticators(); + for (AuthenticatorConfiguration internalAuthenticatorConfig : authenticatorConfigurations) { + final String type = internalAuthenticatorConfig.getType(); + final Map configs = internalAuthenticatorConfig.getConfigs(); + + log.debug(String.format("Found configs for Authenticator of type %s: %s ", type, configs)); + + // Instantiate the Authenticator class. + Class clazz = null; + try { + clazz = (Class) Class.forName(type); + } catch (ClassNotFoundException e) { + throw new RuntimeException( + String.format("Failed to find Authenticator class with name %s on the classpath.", type)); + } + + // Ensure class conforms to the correct type. + if (!Authenticator.class.isAssignableFrom(clazz)) { + throw new IllegalArgumentException(String.format( + "Failed to instantiate invalid Authenticator with class name %s. Class does not implement the 'Authenticator' interface", + clazz.getCanonicalName())); + } + + // Else construct an instance of the class, each class should have an empty constructor. + try { + final Authenticator authenticator = clazz.newInstance(); + // Successfully created authenticator. Now init and register it. + log.debug(String.format("Initializing Authenticator with name %s", type)); + authenticator.init(configs, authenticatorContext); + log.info(String.format("Registering Authenticator with name %s", type)); + authenticatorChain.register(authenticator); + } catch (Exception e) { + throw new RuntimeException( + String.format("Failed to instantiate Authenticator with class name %s", clazz.getCanonicalName()), e); + } + } + } +} \ No newline at end of file diff --git a/metadata-service/auth-filter/src/main/java/com/datahub/authentication/filter/AuthenticationFilter.java b/metadata-service/auth-filter/src/main/java/com/datahub/authentication/filter/AuthenticationFilter.java deleted file mode 100644 index 7bae8bcd3dc0a..0000000000000 --- a/metadata-service/auth-filter/src/main/java/com/datahub/authentication/filter/AuthenticationFilter.java +++ /dev/null @@ -1,189 +0,0 @@ -package com.datahub.authentication.filter; - -import com.datahub.authentication.Authentication; -import com.datahub.authentication.AuthenticationConfiguration; -import com.datahub.authentication.AuthenticationContext; -import com.datahub.authentication.AuthenticationException; -import com.datahub.authentication.AuthenticationRequest; -import com.datahub.authentication.Authenticator; -import com.datahub.authentication.AuthenticatorConfiguration; -import com.datahub.authentication.AuthenticatorContext; -import com.datahub.authentication.authenticator.AuthenticatorChain; -import com.datahub.authentication.authenticator.DataHubSystemAuthenticator; -import com.datahub.authentication.authenticator.NoOpAuthenticator; -import com.datahub.authentication.token.StatefulTokenService; -import com.google.common.collect.ImmutableMap; -import com.linkedin.gms.factory.config.ConfigurationProvider; -import com.linkedin.metadata.entity.EntityService; -import java.io.IOException; -import java.util.Collections; -import java.util.List; -import java.util.Map; -import java.util.stream.Collectors; -import javax.inject.Inject; -import javax.inject.Named; -import javax.servlet.Filter; -import javax.servlet.FilterChain; -import javax.servlet.FilterConfig; -import javax.servlet.ServletException; -import javax.servlet.ServletRequest; -import javax.servlet.ServletResponse; -import javax.servlet.http.HttpServletRequest; -import javax.servlet.http.HttpServletResponse; -import lombok.extern.slf4j.Slf4j; -import org.springframework.web.context.support.SpringBeanAutowiringSupport; - -import static com.datahub.authentication.AuthenticationConstants.*; - - -/** - * A servlet {@link Filter} for authenticating requests inbound to the Metadata Service. This filter is applied to the - * GraphQL Servlet, the Rest.li Servlet, and the Auth (token) Servlet. - */ -@Slf4j -public class AuthenticationFilter implements Filter { - - @Inject - private ConfigurationProvider configurationProvider; - - @Inject - @Named("entityService") - private EntityService _entityService; - - @Inject - @Named("dataHubTokenService") - private StatefulTokenService _tokenService; - - private AuthenticatorChain authenticatorChain; - - @Override - public void init(FilterConfig filterConfig) throws ServletException { - SpringBeanAutowiringSupport.processInjectionBasedOnCurrentContext(this); - buildAuthenticatorChain(); - } - - @Override - public void doFilter( - ServletRequest request, - ServletResponse response, - FilterChain chain) - throws IOException, ServletException { - AuthenticationRequest context = buildAuthContext((HttpServletRequest) request); - Authentication authentication = null; - try { - authentication = this.authenticatorChain.authenticate(context); - } catch (AuthenticationException e) { - // For AuthenticationExpiredExceptions, terminate and provide that feedback to the user - log.debug("Failed to authenticate request. Received an AuthenticationExpiredException from authenticator chain.", e); - ((HttpServletResponse) response).sendError(HttpServletResponse.SC_UNAUTHORIZED, e.getMessage()); - return; - } - - if (authentication != null) { - // Successfully authenticated. - log.debug(String.format("Successfully authenticated request for Actor with type: %s, id: %s", - authentication.getActor().getType(), - authentication.getActor().getId())); - AuthenticationContext.setAuthentication(authentication); - chain.doFilter(request, response); - } else { - // Reject request - log.debug("Failed to authenticate request. Received 'null' Authentication value from authenticator chain."); - ((HttpServletResponse) response).sendError(HttpServletResponse.SC_UNAUTHORIZED, "Unauthorized to perform this action."); - return; - } - AuthenticationContext.remove(); - } - - @Override - public void destroy() { - // Nothing - } - - /** - * Constructs an {@link AuthenticatorChain} via the provided {@link AuthenticationConfiguration}. - * - * The process is simple: For each configured {@link Authenticator}, attempt to instantiate the class using a default (zero-arg) - * constructor, then call it's initialize method passing in a freeform block of associated configurations as a {@link Map}. Finally, - * register the {@link Authenticator} in the authenticator chain. - */ - private void buildAuthenticatorChain() { - - authenticatorChain = new AuthenticatorChain(); - - boolean isAuthEnabled = this.configurationProvider.getAuthentication().isEnabled(); - - // Create authentication context object to pass to authenticator instances. They can use it as needed. - final AuthenticatorContext authenticatorContext = new AuthenticatorContext(ImmutableMap.of( - ENTITY_SERVICE, - this._entityService, - TOKEN_SERVICE, - this._tokenService - )); - - if (isAuthEnabled) { - log.info("Auth is enabled. Building authenticator chain..."); - - // First register the required system authenticator - DataHubSystemAuthenticator systemAuthenticator = new DataHubSystemAuthenticator(); - systemAuthenticator.init(ImmutableMap.of( - SYSTEM_CLIENT_ID_CONFIG, this.configurationProvider.getAuthentication().getSystemClientId(), - SYSTEM_CLIENT_SECRET_CONFIG, this.configurationProvider.getAuthentication().getSystemClientSecret() - ), authenticatorContext); - authenticatorChain.register(systemAuthenticator); // Always register authenticator for internal system. - - // Then create a list of authenticators based on provided configs. - final List authenticatorConfigurations = this.configurationProvider.getAuthentication().getAuthenticators(); - - for (AuthenticatorConfiguration config : authenticatorConfigurations) { - final String type = config.getType(); - final Map configs = config.getConfigs(); - - log.debug(String.format("Found configs for Authenticator of type %s: %s ", type, configs)); - - // Instantiate the Authenticator class. - Class clazz = null; - try { - clazz = (Class) Class.forName(type); - } catch (ClassNotFoundException e) { - throw new RuntimeException( - String.format("Failed to find Authenticator class with name %s on the classpath.", type)); - } - - // Ensure class conforms to the correct type. - if (!Authenticator.class.isAssignableFrom(clazz)) { - throw new IllegalArgumentException( - String.format( - "Failed to instantiate invalid Authenticator with class name %s. Class does not implement the 'Authenticator' interface", - clazz.getCanonicalName())); - } - - // Else construct an instance of the class, each class should have an empty constructor. - try { - final Authenticator authenticator = clazz.newInstance(); - // Successfully created authenticator. Now init and register it. - log.debug(String.format("Initializing Authenticator with name %s", type)); - authenticator.init(configs, authenticatorContext); - log.info(String.format("Registering Authenticator with name %s", type)); - authenticatorChain.register(authenticator); - } catch (Exception e) { - throw new RuntimeException(String.format("Failed to instantiate Authenticator with class name %s", clazz.getCanonicalName()), e); - } - } - } else { - // Authentication is not enabled. Populate authenticator chain with a purposely permissive Authenticator. - log.info("Auth is disabled. Building no-op authenticator chain..."); - final NoOpAuthenticator noOpAuthenticator = new NoOpAuthenticator(); - noOpAuthenticator.init(ImmutableMap.of( - SYSTEM_CLIENT_ID_CONFIG, - this.configurationProvider.getAuthentication().getSystemClientId()), authenticatorContext); - authenticatorChain.register(noOpAuthenticator); - } - } - - private AuthenticationRequest buildAuthContext(HttpServletRequest request) { - return new AuthenticationRequest(Collections.list(request.getHeaderNames()) - .stream() - .collect(Collectors.toMap(headerName -> headerName, request::getHeader))); - } -} diff --git a/metadata-service/auth-impl/build.gradle b/metadata-service/auth-impl/build.gradle index 5116c668c7d90..c0aa02b27ddb1 100644 --- a/metadata-service/auth-impl/build.gradle +++ b/metadata-service/auth-impl/build.gradle @@ -7,6 +7,7 @@ compileJava { dependencies { compile project(path: ':metadata-models') + compile project(path: ':metadata-auth:auth-api') compile project(path: ':metadata-service:auth-api') compile project(path: ':metadata-io') diff --git a/metadata-service/auth-impl/src/main/java/com/datahub/authentication/authenticator/AuthenticatorChain.java b/metadata-service/auth-impl/src/main/java/com/datahub/authentication/authenticator/AuthenticatorChain.java index 641f3797d3cd8..5d9c63d49c00d 100644 --- a/metadata-service/auth-impl/src/main/java/com/datahub/authentication/authenticator/AuthenticatorChain.java +++ b/metadata-service/auth-impl/src/main/java/com/datahub/authentication/authenticator/AuthenticatorChain.java @@ -4,7 +4,7 @@ import com.datahub.authentication.AuthenticationException; import com.datahub.authentication.AuthenticationExpiredException; -import com.datahub.authentication.Authenticator; +import com.datahub.plugins.auth.authentication.Authenticator; import com.datahub.authentication.AuthenticationRequest; import com.linkedin.util.Pair; import javax.annotation.Nonnull; @@ -14,6 +14,7 @@ import java.util.Objects; import lombok.extern.slf4j.Slf4j; + /** * A configurable chain of {@link Authenticator}s executed in series to attempt to authenticate an inbound request. * @@ -44,11 +45,18 @@ public void register(@Nonnull final Authenticator authenticator) { @Nullable public Authentication authenticate(@Nonnull final AuthenticationRequest context) throws AuthenticationException { Objects.requireNonNull(context); + ClassLoader contextClassLoader = Thread.currentThread().getContextClassLoader(); List> authenticationFailures = new ArrayList<>(); for (final Authenticator authenticator : this.authenticators) { try { log.debug(String.format("Executing Authenticator with class name %s", authenticator.getClass().getCanonicalName())); + // The library came with plugin can use the contextClassLoader to load the classes. For example apache-ranger library does this. + // Here we need to set our IsolatedClassLoader as contextClassLoader to resolve such class loading request from plugin's home directory, + // otherwise plugin's internal library wouldn't be able to find their dependent classes + Thread.currentThread().setContextClassLoader(authenticator.getClass().getClassLoader()); Authentication result = authenticator.authenticate(context); + // reset + Thread.currentThread().setContextClassLoader(contextClassLoader); if (result != null) { // Authentication was successful - Short circuit return result; @@ -63,6 +71,8 @@ public Authentication authenticate(@Nonnull final AuthenticationRequest context) log.debug(String.format( "Caught exception while attempting to authenticate request using Authenticator %s", authenticator.getClass().getCanonicalName()), e); + } finally { + Thread.currentThread().setContextClassLoader(contextClassLoader); } } // No authentication resolved. Return null. diff --git a/metadata-service/auth-impl/src/main/java/com/datahub/authentication/authenticator/DataHubSystemAuthenticator.java b/metadata-service/auth-impl/src/main/java/com/datahub/authentication/authenticator/DataHubSystemAuthenticator.java index 923eba896ca37..80efd67ce6e27 100644 --- a/metadata-service/auth-impl/src/main/java/com/datahub/authentication/authenticator/DataHubSystemAuthenticator.java +++ b/metadata-service/auth-impl/src/main/java/com/datahub/authentication/authenticator/DataHubSystemAuthenticator.java @@ -5,7 +5,7 @@ import com.datahub.authentication.Authentication; import com.datahub.authentication.AuthenticationRequest; import com.datahub.authentication.AuthenticationException; -import com.datahub.authentication.Authenticator; +import com.datahub.plugins.auth.authentication.Authenticator; import com.datahub.authentication.AuthenticatorContext; import javax.annotation.Nonnull; import java.util.Collections; diff --git a/metadata-service/auth-impl/src/main/java/com/datahub/authentication/authenticator/DataHubTokenAuthenticator.java b/metadata-service/auth-impl/src/main/java/com/datahub/authentication/authenticator/DataHubTokenAuthenticator.java index af785c1af0a12..e7e776999f34e 100644 --- a/metadata-service/auth-impl/src/main/java/com/datahub/authentication/authenticator/DataHubTokenAuthenticator.java +++ b/metadata-service/auth-impl/src/main/java/com/datahub/authentication/authenticator/DataHubTokenAuthenticator.java @@ -1,5 +1,9 @@ package com.datahub.authentication.authenticator; +import com.datahub.authentication.token.StatefulTokenService; +import com.datahub.authentication.token.StatelessTokenService; +import com.datahub.authentication.token.TokenClaims; +import com.datahub.authentication.token.TokenExpiredException; import com.datahub.authentication.Actor; import com.datahub.authentication.Authentication; @@ -7,12 +11,8 @@ import com.datahub.authentication.AuthenticationExpiredException; import com.datahub.authentication.AuthenticationRequest; import com.datahub.authentication.AuthenticationException; -import com.datahub.authentication.Authenticator; +import com.datahub.plugins.auth.authentication.Authenticator; import com.datahub.authentication.AuthenticatorContext; -import com.datahub.authentication.token.StatefulTokenService; -import com.datahub.authentication.token.TokenClaims; -import com.datahub.authentication.token.TokenExpiredException; -import com.datahub.authentication.token.StatelessTokenService; import com.linkedin.metadata.entity.EntityService; import javax.annotation.Nonnull; import java.util.Map; @@ -33,9 +33,9 @@ @Slf4j public class DataHubTokenAuthenticator implements Authenticator { - static final String SIGNING_KEY_CONFIG_NAME = "signingKey"; - static final String SALT_CONFIG_NAME = "salt"; - static final String SIGNING_ALG_CONFIG_NAME = "signingAlg"; + public static final String SIGNING_KEY_CONFIG_NAME = "signingKey"; + public static final String SALT_CONFIG_NAME = "salt"; + public static final String SIGNING_ALG_CONFIG_NAME = "signingAlg"; static final String DEFAULT_SIGNING_ALG = "HS256"; static final String DEFAULT_ISSUER = "datahub-metadata-service"; diff --git a/metadata-service/auth-impl/src/main/java/com/datahub/authentication/authenticator/NoOpAuthenticator.java b/metadata-service/auth-impl/src/main/java/com/datahub/authentication/authenticator/NoOpAuthenticator.java index 076a571223f97..4e1b3cf7f73aa 100644 --- a/metadata-service/auth-impl/src/main/java/com/datahub/authentication/authenticator/NoOpAuthenticator.java +++ b/metadata-service/auth-impl/src/main/java/com/datahub/authentication/authenticator/NoOpAuthenticator.java @@ -4,7 +4,7 @@ import com.datahub.authentication.ActorType; import com.datahub.authentication.Authentication; import com.datahub.authentication.AuthenticationException; -import com.datahub.authentication.Authenticator; +import com.datahub.plugins.auth.authentication.Authenticator; import com.datahub.authentication.AuthenticationRequest; import com.datahub.authentication.AuthenticatorContext; import com.linkedin.common.urn.Urn; diff --git a/metadata-service/auth-impl/src/main/java/com/datahub/authentication/token/StatefulTokenService.java b/metadata-service/auth-impl/src/main/java/com/datahub/authentication/token/StatefulTokenService.java index e4a76f83b6423..e37a351e0365a 100644 --- a/metadata-service/auth-impl/src/main/java/com/datahub/authentication/token/StatefulTokenService.java +++ b/metadata-service/auth-impl/src/main/java/com/datahub/authentication/token/StatefulTokenService.java @@ -30,8 +30,6 @@ import org.apache.commons.codec.digest.DigestUtils; import org.apache.commons.lang.ArrayUtils; -import static com.datahub.authentication.token.TokenClaims.*; - /** * Service responsible for generating JWT tokens & managing the associated metadata entities in GMS for use within @@ -92,10 +90,10 @@ public String generateAccessToken(@Nonnull final TokenType type, @Nonnull final Map claims = new HashMap<>(); // Only stateful token service generates v2 tokens. - claims.put(TOKEN_VERSION_CLAIM_NAME, String.valueOf(TokenVersion.TWO.numericValue)); - claims.put(TOKEN_TYPE_CLAIM_NAME, type.toString()); - claims.put(ACTOR_TYPE_CLAIM_NAME, actor.getType()); - claims.put(ACTOR_ID_CLAIM_NAME, actor.getId()); + claims.put(TokenClaims.TOKEN_VERSION_CLAIM_NAME, String.valueOf(TokenVersion.TWO.numericValue)); + claims.put(TokenClaims.TOKEN_TYPE_CLAIM_NAME, type.toString()); + claims.put(TokenClaims.ACTOR_TYPE_CLAIM_NAME, actor.getType()); + claims.put(TokenClaims.ACTOR_ID_CLAIM_NAME, actor.getId()); final String accessToken = super.generateAccessToken(actor.getId(), claims, expiresInMs); final String tokenHash = this.hash(accessToken); diff --git a/metadata-service/auth-impl/src/main/java/com/datahub/authentication/token/StatelessTokenService.java b/metadata-service/auth-impl/src/main/java/com/datahub/authentication/token/StatelessTokenService.java index 6ed22bfbe3ef0..fa8ec8c818734 100644 --- a/metadata-service/auth-impl/src/main/java/com/datahub/authentication/token/StatelessTokenService.java +++ b/metadata-service/auth-impl/src/main/java/com/datahub/authentication/token/StatelessTokenService.java @@ -21,8 +21,6 @@ import javax.annotation.Nullable; import javax.crypto.spec.SecretKeySpec; -import static com.datahub.authentication.token.TokenClaims.*; - /** * Service responsible for generating JWT tokens for use within DataHub in stateless way. @@ -83,10 +81,10 @@ public String generateAccessToken( Objects.requireNonNull(actor); Map claims = new HashMap<>(); - claims.put(TOKEN_VERSION_CLAIM_NAME, String.valueOf(TokenVersion.ONE.numericValue)); // Hardcode version 1 for now. - claims.put(TOKEN_TYPE_CLAIM_NAME, type.toString()); - claims.put(ACTOR_TYPE_CLAIM_NAME, actor.getType()); - claims.put(ACTOR_ID_CLAIM_NAME, actor.getId()); + claims.put(TokenClaims.TOKEN_VERSION_CLAIM_NAME, String.valueOf(TokenVersion.ONE.numericValue)); // Hardcode version 1 for now. + claims.put(TokenClaims.TOKEN_TYPE_CLAIM_NAME, type.toString()); + claims.put(TokenClaims.ACTOR_TYPE_CLAIM_NAME, actor.getType()); + claims.put(TokenClaims.ACTOR_ID_CLAIM_NAME, actor.getId()); return generateAccessToken(actor.getId(), claims, expiresInMs); } @@ -135,10 +133,10 @@ public TokenClaims validateAccessToken(@Nonnull final String accessToken) throws .parseClaimsJws(accessToken); validateTokenAlgorithm(jws.getHeader().getAlgorithm()); final Claims claims = jws.getBody(); - final String tokenVersion = claims.get(TOKEN_VERSION_CLAIM_NAME, String.class); - final String tokenType = claims.get(TOKEN_TYPE_CLAIM_NAME, String.class); - final String actorId = claims.get(ACTOR_ID_CLAIM_NAME, String.class); - final String actorType = claims.get(ACTOR_TYPE_CLAIM_NAME, String.class); + final String tokenVersion = claims.get(TokenClaims.TOKEN_VERSION_CLAIM_NAME, String.class); + final String tokenType = claims.get(TokenClaims.TOKEN_TYPE_CLAIM_NAME, String.class); + final String actorId = claims.get(TokenClaims.ACTOR_ID_CLAIM_NAME, String.class); + final String actorType = claims.get(TokenClaims.ACTOR_TYPE_CLAIM_NAME, String.class); if (tokenType != null && actorId != null && actorType != null) { return new TokenClaims( TokenVersion.fromNumericStringValue(tokenVersion), diff --git a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/AuthorizerChain.java b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/AuthorizerChain.java index 59f1e7dbb707e..d62c37160f816 100644 --- a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/AuthorizerChain.java +++ b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/AuthorizerChain.java @@ -1,5 +1,6 @@ package com.datahub.authorization; +import com.datahub.plugins.auth.authorization.Authorizer; import com.linkedin.common.urn.Urn; import java.util.ArrayList; import java.util.Collections; @@ -45,12 +46,25 @@ public void init(@Nonnull Map authorizerConfig, @Nonnull Authori @Nullable public AuthorizationResult authorize(@Nonnull final AuthorizationRequest request) { Objects.requireNonNull(request); + // Save contextClassLoader + ClassLoader contextClassLoader = Thread.currentThread().getContextClassLoader(); + for (final Authorizer authorizer : this.authorizers) { try { log.debug("Executing Authorizer with class name {}", authorizer.getClass().getCanonicalName()); + log.debug("Authorization Request: {}", request.toString()); + // The library came with plugin can use the contextClassLoader to load the classes. For example apache-ranger library does this. + // Here we need to set our IsolatedClassLoader as contextClassLoader to resolve such class loading request from plugin's home directory, + // otherwise plugin's internal library wouldn't be able to find their dependent classes + Thread.currentThread().setContextClassLoader(authorizer.getClass().getClassLoader()); AuthorizationResult result = authorizer.authorize(request); + // reset + Thread.currentThread().setContextClassLoader(contextClassLoader); + if (AuthorizationResult.Type.ALLOW.equals(result.type)) { // Authorization was successful - Short circuit + log.debug("Authorization is successful"); + return result; } else { log.debug("Received DENY result from Authorizer with class name {}. message: {}", @@ -59,6 +73,8 @@ public AuthorizationResult authorize(@Nonnull final AuthorizationRequest request } catch (Exception e) { log.error("Caught exception while attempting to authorize request using Authorizer {}. Skipping authorizer.", authorizer.getClass().getCanonicalName(), e); + } finally { + Thread.currentThread().setContextClassLoader(contextClassLoader); } } // Return failed Authorization result. diff --git a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/DataHubAuthorizer.java b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/DataHubAuthorizer.java index 6c53b3711054f..690528059b555 100644 --- a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/DataHubAuthorizer.java +++ b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/DataHubAuthorizer.java @@ -1,6 +1,7 @@ package com.datahub.authorization; import com.datahub.authentication.Authentication; +import com.datahub.plugins.auth.authorization.Authorizer; import com.google.common.annotations.VisibleForTesting; import com.linkedin.common.urn.Urn; import com.linkedin.common.urn.UrnUtils; diff --git a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/DefaultResourceSpecResolver.java b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/DefaultResourceSpecResolver.java index 051ba1d8db6dc..cd4e0b0967829 100644 --- a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/DefaultResourceSpecResolver.java +++ b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/DefaultResourceSpecResolver.java @@ -1,10 +1,10 @@ package com.datahub.authorization; +import com.datahub.authorization.fieldresolverprovider.EntityTypeFieldResolverProvider; +import com.datahub.authorization.fieldresolverprovider.OwnerFieldResolverProvider; import com.datahub.authentication.Authentication; import com.datahub.authorization.fieldresolverprovider.DomainFieldResolverProvider; -import com.datahub.authorization.fieldresolverprovider.EntityTypeFieldResolverProvider; import com.datahub.authorization.fieldresolverprovider.EntityUrnFieldResolverProvider; -import com.datahub.authorization.fieldresolverprovider.OwnerFieldResolverProvider; import com.datahub.authorization.fieldresolverprovider.ResourceFieldResolverProvider; import com.google.common.collect.ImmutableList; import com.linkedin.entity.client.EntityClient; diff --git a/metadata-service/auth-impl/src/test/java/com/datahub/authentication/authenticator/AuthenticatorChainTest.java b/metadata-service/auth-impl/src/test/java/com/datahub/authentication/authenticator/AuthenticatorChainTest.java index 1c51c33ecb0ae..df97fc6aae8e5 100644 --- a/metadata-service/auth-impl/src/test/java/com/datahub/authentication/authenticator/AuthenticatorChainTest.java +++ b/metadata-service/auth-impl/src/test/java/com/datahub/authentication/authenticator/AuthenticatorChainTest.java @@ -4,7 +4,7 @@ import com.datahub.authentication.AuthenticationException; import com.datahub.authentication.AuthenticationExpiredException; -import com.datahub.authentication.Authenticator; +import com.datahub.plugins.auth.authentication.Authenticator; import com.datahub.authentication.AuthenticationRequest; import org.mockito.Mockito; import org.testng.annotations.Test; diff --git a/metadata-service/auth-impl/src/test/java/com/datahub/authentication/token/StatefulTokenServiceTest.java b/metadata-service/auth-impl/src/test/java/com/datahub/authentication/token/StatefulTokenServiceTest.java index 2d2c16518b7fc..75a9114529d7a 100644 --- a/metadata-service/auth-impl/src/test/java/com/datahub/authentication/token/StatefulTokenServiceTest.java +++ b/metadata-service/auth-impl/src/test/java/com/datahub/authentication/token/StatefulTokenServiceTest.java @@ -117,7 +117,8 @@ public void testGenerateAccessTokenSessionToken() throws Exception { @Test public void testValidateAccessTokenFailsDueToExpiration() { - StatefulTokenService tokenService = new StatefulTokenService(TEST_SIGNING_KEY, "HS256", null, mockService, TEST_SALTING_KEY); + StatefulTokenService + tokenService = new StatefulTokenService(TEST_SIGNING_KEY, "HS256", null, mockService, TEST_SALTING_KEY); // Generate token that expires immediately. Date date = new Date(); //This method returns the time in millis diff --git a/metadata-service/auth-impl/src/test/java/com/datahub/authorization/DataHubAuthorizerTest.java b/metadata-service/auth-impl/src/test/java/com/datahub/authorization/DataHubAuthorizerTest.java index 9989a8e14a822..1dcf49c6287c9 100644 --- a/metadata-service/auth-impl/src/test/java/com/datahub/authorization/DataHubAuthorizerTest.java +++ b/metadata-service/auth-impl/src/test/java/com/datahub/authorization/DataHubAuthorizerTest.java @@ -1,5 +1,4 @@ package com.datahub.authorization; - import com.datahub.authentication.Actor; import com.datahub.authentication.ActorType; import com.datahub.authentication.Authentication; @@ -285,6 +284,6 @@ private Ownership createOwnershipAspect(final List userOwners, final List getUserGroups(String userIdentifier) { - List groups = new ArrayList(); - - try { - - UserByName userByName = this.rangerRestClientWrapper.getUserByName(userIdentifier); - // userByName.id is (integer) apache ranger user identifier - groups = this.rangerRestClientWrapper.getUserById(userByName.getId()).getGroupNameList(); - - log.debug(StringUtils.format("User %s groups %s", userIdentifier, groups.toString())); - } catch (Exception e) { - throw new RuntimeException(e); - } - - return groups.stream().collect(Collectors.toSet()); - } - - public Set getUserRoles(String userIdentifier) { - List roles = new ArrayList(); - try { - roles = this.rangerRestClientWrapper.getUserRole(userIdentifier); - log.debug(StringUtils.format("User %s roles %s", userIdentifier, roles.toString())); - } catch (Exception e) { - throw new RuntimeException(e); - } - - return roles.stream().collect(Collectors.toSet()); - } - - public RangerAccessResult isAccessAllowed(RangerAccessRequest rangerAccessRequest) { - return rangerBasePlugin.isAccessAllowed(rangerAccessRequest); - } -} diff --git a/metadata-service/auth-ranger-impl/src/main/java/com/datahub/authorization/ranger/RangerAuthorizer.java b/metadata-service/auth-ranger-impl/src/main/java/com/datahub/authorization/ranger/RangerAuthorizer.java deleted file mode 100644 index 178e94610ae8b..0000000000000 --- a/metadata-service/auth-ranger-impl/src/main/java/com/datahub/authorization/ranger/RangerAuthorizer.java +++ /dev/null @@ -1,90 +0,0 @@ -package com.datahub.authorization.ranger; - -import com.datahub.authorization.AuthorizationRequest; -import com.datahub.authorization.AuthorizationResult; -import com.datahub.authorization.AuthorizedActors; -import com.datahub.authorization.Authorizer; -import com.datahub.authorization.AuthorizerContext; -import com.datahub.authorization.ResourceSpec; -import com.linkedin.common.urn.Urn; -import com.linkedin.common.urn.UrnUtils; -import java.util.ArrayList; -import java.util.Map; -import java.util.Optional; -import java.util.Set; -import javax.annotation.Nonnull; -import lombok.extern.slf4j.Slf4j; -import org.apache.ranger.plugin.policyengine.RangerAccessRequest; -import org.apache.ranger.plugin.policyengine.RangerAccessRequestImpl; -import org.apache.ranger.plugin.policyengine.RangerAccessResourceImpl; -import org.apache.ranger.plugin.policyengine.RangerAccessResult; - - -@Slf4j -public class RangerAuthorizer implements Authorizer { - - private AuthorizerConfig authorizerConfig; - private DataHubRangerClient dataHubRangerClient; - - public RangerAuthorizer() { - } - - @Override - public void init(@Nonnull Map authorizerConfigMap, @Nonnull final AuthorizerContext ctx) { - this.authorizerConfig = AuthorizerConfig.builder() - .username((String) authorizerConfigMap.get(AuthorizerConfig.CONFIG_USERNAME)) - .password((String) authorizerConfigMap.get(AuthorizerConfig.CONFIG_PASSWORD)) - .build(); - - this.dataHubRangerClient = this.newDataHubRangerClient(); - this.dataHubRangerClient.init(); - } - - public DataHubRangerClient newDataHubRangerClient() { - return new DataHubRangerClient(this.authorizerConfig); - } - - @Override - public AuthorizationResult authorize(AuthorizationRequest request) { - - String userIdentifier = UrnUtils.getUrn(request.getActorUrn()).getId(); - - Set roles = this.dataHubRangerClient.getUserRoles(userIdentifier); - // getUserGroups is internally calling two API to get group information of Actor - Set groups = this.dataHubRangerClient.getUserGroups(userIdentifier); - - // set ResourceSpec default to "platform" - ResourceSpec resourceSpec = request.getResourceSpec().orElse(new ResourceSpec("platform", "platform")); - - // user has requested access to specific resource - log.debug(String.format("User \"%s\" requested access", userIdentifier)); - log.debug(String.format("Access is requested for resource type: %s", resourceSpec.getType())); - log.debug(String.format("Access is requested for resource : %s", resourceSpec.getResource())); - log.debug(String.format("Requested privilege : %s", request.getPrivilege())); - - // Convert resource type to lowercase as ranger doesn't support capital letter in resource type - RangerAccessResourceImpl rangerAccessResource = new RangerAccessResourceImpl(); - rangerAccessResource.setValue(resourceSpec.getType().toLowerCase(), resourceSpec.getResource()); - RangerAccessRequest rangerAccessRequest = - new RangerAccessRequestImpl(rangerAccessResource, request.getPrivilege(), userIdentifier, groups, roles); - - // Check with Apache Ranger if access is allowed to the user - RangerAccessResult accessResult = this.dataHubRangerClient.isAccessAllowed(rangerAccessRequest); - AuthorizationResult.Type result = AuthorizationResult.Type.DENY; - - if (accessResult != null && accessResult.getIsAllowed()) { - result = AuthorizationResult.Type.ALLOW; - } - - String message = String.format("Access to resource \"%s\" for privilege \"%s\" is \"%s\" for user \"%s\"", - resourceSpec.getResource(), request.getPrivilege(), result, userIdentifier); - log.debug(message); - return new AuthorizationResult(request, result, message); - } - - @Override - public AuthorizedActors authorizedActors(String privilege, Optional resourceSpec) { - log.info("Apache Ranger authorizer authorizedActors"); - return new AuthorizedActors(privilege, new ArrayList(), new ArrayList(), true, true); - } -} diff --git a/metadata-service/auth-ranger-impl/src/main/java/com/datahub/authorization/ranger/RangerRestClientWrapper.java b/metadata-service/auth-ranger-impl/src/main/java/com/datahub/authorization/ranger/RangerRestClientWrapper.java deleted file mode 100644 index 03b4f234046e1..0000000000000 --- a/metadata-service/auth-ranger-impl/src/main/java/com/datahub/authorization/ranger/RangerRestClientWrapper.java +++ /dev/null @@ -1,52 +0,0 @@ -package com.datahub.authorization.ranger; - -import com.datahub.authorization.ranger.response.UserById; -import com.datahub.authorization.ranger.response.UserByName; -import com.sun.jersey.api.client.ClientResponse; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import org.apache.hadoop.util.StringUtils; -import org.apache.ranger.authorization.hadoop.config.RangerPluginConfig; -import org.apache.ranger.plugin.util.RangerRESTClient; - - -/** - * Class to wrap Apache Ranger Rest Client - */ - -public class RangerRestClientWrapper { - private static final String URI_BASE = "/service"; - private static final String USER_ROLES = URI_BASE + "/roles/roles/user/%s"; - private static final String USER_BY_NAME = URI_BASE + "/xusers/users/userName/%s"; - private static final String USER_BY_ID = URI_BASE + "/xusers/users/%d"; - - private final RangerRESTClient rangerRESTClient; - - public RangerRestClientWrapper(String rangerUrl, String rangerSslConfig, String userName, String password, - RangerPluginConfig pluginConfig) { - this.rangerRESTClient = new RangerRESTClient(rangerUrl, rangerSslConfig, pluginConfig); - this.rangerRESTClient.setBasicAuthInfo(userName, password); - } - - public UserByName getUserByName(String userName) throws Exception { - ClientResponse clientResponse = this.rangerRESTClient.get(StringUtils.format(USER_BY_NAME, userName), null); - Map userByNameMap = clientResponse.getEntity(new HashMap().getClass()); - UserByName userByNameResponse = new UserByName(userByNameMap); - return userByNameResponse; - } - - public UserById getUserById(Integer id) throws Exception { - ClientResponse clientResponse = this.rangerRESTClient.get(StringUtils.format(USER_BY_ID, id), null); - Map userByIdMap = clientResponse.getEntity(new HashMap().getClass()); - UserById userByIdResponse = new UserById(userByIdMap); - return userByIdResponse; - } - - public List getUserRole(String username) throws Exception { - ClientResponse clientResponse = - this.rangerRESTClient.get(StringUtils.format(USER_ROLES, username), null); - return clientResponse.getEntity((new ArrayList()).getClass()); - } -} \ No newline at end of file diff --git a/metadata-service/auth-ranger-impl/src/main/java/com/datahub/authorization/ranger/response/UserById.java b/metadata-service/auth-ranger-impl/src/main/java/com/datahub/authorization/ranger/response/UserById.java deleted file mode 100644 index cd4f91a0aa837..0000000000000 --- a/metadata-service/auth-ranger-impl/src/main/java/com/datahub/authorization/ranger/response/UserById.java +++ /dev/null @@ -1,24 +0,0 @@ -package com.datahub.authorization.ranger.response; - -import java.util.List; -import java.util.Map; - - -public class UserById { - public static final String GROUP_NAME_LIST = "groupNameList"; - private final List groupNameList; - - public UserById(Map userPropertyMap) throws Exception { - if (!userPropertyMap.containsKey(GROUP_NAME_LIST)) { - throw new Exception(String.format("Property \"%s\" is not found", GROUP_NAME_LIST)); - } - - this.groupNameList = (List) userPropertyMap.get(GROUP_NAME_LIST); - } - - public List getGroupNameList() { - return this.groupNameList; - } -} - - diff --git a/metadata-service/auth-ranger-impl/src/main/java/com/datahub/authorization/ranger/response/UserByName.java b/metadata-service/auth-ranger-impl/src/main/java/com/datahub/authorization/ranger/response/UserByName.java deleted file mode 100644 index 3136d7a7d0ca8..0000000000000 --- a/metadata-service/auth-ranger-impl/src/main/java/com/datahub/authorization/ranger/response/UserByName.java +++ /dev/null @@ -1,22 +0,0 @@ -package com.datahub.authorization.ranger.response; - -import java.util.Map; -import org.apache.hadoop.util.StringUtils; - - -public class UserByName { - public static final String ID = "id"; - private final Integer id; - - public UserByName(Map userPropertyMap) throws Exception { - if (!userPropertyMap.containsKey(ID)) { - throw new Exception(StringUtils.format("Property \"%s\" is not found", ID)); - } - - this.id = (Integer) userPropertyMap.get(ID); - } - - public Integer getId() { - return this.id; - } -} diff --git a/metadata-service/auth-ranger-impl/src/test/java/com/datahub/authorization/ranger/RangerAuthorizerTest.java b/metadata-service/auth-ranger-impl/src/test/java/com/datahub/authorization/ranger/RangerAuthorizerTest.java deleted file mode 100644 index cefca552e7d13..0000000000000 --- a/metadata-service/auth-ranger-impl/src/test/java/com/datahub/authorization/ranger/RangerAuthorizerTest.java +++ /dev/null @@ -1,150 +0,0 @@ -package com.datahub.authorization.ranger; - -import com.datahub.authorization.AuthorizationRequest; -import com.datahub.authorization.AuthorizationResult; -import com.datahub.authorization.AuthorizerContext; -import com.datahub.authorization.ranger.response.UserById; -import com.datahub.authorization.ranger.response.UserByName; -import com.linkedin.metadata.authorization.PoliciesConfig; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Optional; -import java.util.stream.Collectors; -import org.apache.ranger.plugin.policyengine.RangerAccessRequest; -import org.apache.ranger.plugin.policyengine.RangerAccessRequestImpl; -import org.apache.ranger.plugin.policyengine.RangerAccessResourceImpl; -import org.apache.ranger.plugin.policyengine.RangerAccessResult; -import org.apache.ranger.plugin.service.RangerBasePlugin; -import org.mockito.ArgumentMatcher; -import org.testng.annotations.BeforeMethod; -import org.testng.annotations.Test; - -import static org.mockito.Mockito.*; - - -/*** - * RangerAccessRequest class doesn't have the equal method implementation and hence need to provide mock matcher - */ -class RangerAccessRequestMatcher implements ArgumentMatcher { - private final RangerAccessRequest expected; - - public RangerAccessRequestMatcher(RangerAccessRequest expected) { - this.expected = expected; - } - - @Override - public boolean matches(RangerAccessRequest argument) { - return argument.getUserGroups().equals(expected.getUserGroups()) && argument.getUserRoles() - .equals(expected.getUserRoles()) && argument.getUser().equals(expected.getUser()); - } -} - -public class RangerAuthorizerTest { - private RangerBasePlugin rangerBasePlugin; - private RangerRestClientWrapper rangerRestClientWrapper; - - private DataHubRangerClient _dataHubRangerClient; - - private RangerAuthorizer rangerAuthorizer; - private List roles; - private List groups; - - RangerAccessResourceImpl rangerAccessResource; - private Map authorizerConfigMap; - - @BeforeMethod - public void setupTest() throws Exception { - authorizerConfigMap = new HashMap<>(); - authorizerConfigMap.put(AuthorizerConfig.CONFIG_USERNAME, "foo"); - authorizerConfigMap.put(AuthorizerConfig.CONFIG_PASSWORD, "bar"); - - // Mock Apache Ranger library classes - rangerBasePlugin = mock(RangerBasePlugin.class); - rangerRestClientWrapper = mock(RangerRestClientWrapper.class); - - // Spy our class method to inject Mock objects of Apache Ranger library classes - rangerAuthorizer = spy(RangerAuthorizer.class); - - AuthorizerConfig authorizerConfig = AuthorizerConfig.builder() - .username((String) authorizerConfigMap.get(AuthorizerConfig.CONFIG_USERNAME)) - .password((String) authorizerConfigMap.get(AuthorizerConfig.CONFIG_PASSWORD)) - .build(); - - _dataHubRangerClient = spy(new DataHubRangerClient(authorizerConfig)); - - rangerAccessResource = new RangerAccessResourceImpl(); - rangerAccessResource.setValue("platform", "platform"); - - // Mock - doNothing().when(rangerBasePlugin).setResultProcessor(null); - doNothing().when(rangerBasePlugin).init(); - doReturn(rangerBasePlugin).when(_dataHubRangerClient).newRangerBasePlugin(); - doReturn(rangerRestClientWrapper).when(_dataHubRangerClient).newRangerRestClientWrapper(); - doReturn(_dataHubRangerClient).when(rangerAuthorizer).newDataHubRangerClient(); - - roles = new ArrayList<>(); - roles.add("admin"); - when(rangerRestClientWrapper.getUserRole("datahub")).thenReturn(roles); - - Map userByIdResponse = new HashMap<>(); - groups = new ArrayList<>(); - groups.add("public"); - userByIdResponse.put(UserById.GROUP_NAME_LIST, groups); - when(rangerRestClientWrapper.getUserById(1)).thenReturn(new UserById(userByIdResponse)); - - when(_dataHubRangerClient.newRangerBasePlugin()).thenReturn(rangerBasePlugin); - - rangerAuthorizer.init(authorizerConfigMap, new AuthorizerContext(null)); - } - - @Test - public void testAuthorizationAllow() throws Exception { - - RangerAccessRequest rangerAccessRequest = - new RangerAccessRequestImpl(rangerAccessResource, PoliciesConfig.VIEW_ANALYTICS_PRIVILEGE.getType(), "datahub", - this.groups.stream().collect(Collectors.toSet()), this.roles.stream().collect(Collectors.toSet())); - - RangerAccessResult rangerAccessResult = new RangerAccessResult(1, "datahub", null, null); - // For rangerAccessRequest the access should be allowed - rangerAccessResult.setIsAllowed(true); - - when(rangerBasePlugin.isAccessAllowed(argThat(new RangerAccessRequestMatcher(rangerAccessRequest)))).thenReturn( - rangerAccessResult); - // mock Apache Ranger API response as per username "github" - Map userByNameResponse = new HashMap<>(); - userByNameResponse.put(UserByName.ID, 1); - when(rangerRestClientWrapper.getUserByName("datahub")).thenReturn(new UserByName(userByNameResponse)); - - assert this.callAuthorizer("urn:li:corpuser:datahub").getType() == AuthorizationResult.Type.ALLOW; - } - - @Test - public void testAuthorizationDeny() throws Exception { - - RangerAccessRequest rangerAccessRequest = - new RangerAccessRequestImpl(rangerAccessResource, PoliciesConfig.VIEW_ANALYTICS_PRIVILEGE.getType(), "X", - this.groups.stream().collect(Collectors.toSet()), this.roles.stream().collect(Collectors.toSet())); - - RangerAccessResult rangerAccessResult = new RangerAccessResult(1, "datahub", null, null); - // For rangerAccessRequest the access should be denied - rangerAccessResult.setIsAllowed(false); - - // mock Apache Ranger API response as per username "X" - Map userByNameResponse = new HashMap<>(); - userByNameResponse.put(UserByName.ID, 1); - when(rangerRestClientWrapper.getUserByName("X")).thenReturn(new UserByName(userByNameResponse)); - - when(rangerBasePlugin.isAccessAllowed(argThat(new RangerAccessRequestMatcher(rangerAccessRequest)))).thenReturn( - rangerAccessResult); - - assert this.callAuthorizer("urn:li:corpuser:X").getType() == AuthorizationResult.Type.DENY; - } - - private AuthorizationResult callAuthorizer(String urn) { - AuthorizationRequest authorizationRequest = - new AuthorizationRequest(urn, PoliciesConfig.VIEW_ANALYTICS_PRIVILEGE.getType(), Optional.empty()); - return rangerAuthorizer.authorize(authorizationRequest); - } -} \ No newline at end of file diff --git a/metadata-service/auth-servlet-impl/build.gradle b/metadata-service/auth-servlet-impl/build.gradle index bb258a800b00e..3338f3a5c6b94 100644 --- a/metadata-service/auth-servlet-impl/build.gradle +++ b/metadata-service/auth-servlet-impl/build.gradle @@ -1,7 +1,7 @@ apply plugin: 'java' dependencies { - compile project(':metadata-service:auth-api') + compile project(':metadata-auth:auth-api') compile project(':metadata-service:factories') compile externalDependency.springCore diff --git a/metadata-service/auth-servlet-impl/src/main/java/com/datahub/authentication/AuthServiceController.java b/metadata-service/auth-servlet-impl/src/main/java/com/datahub/auth/authentication/AuthServiceController.java similarity index 98% rename from metadata-service/auth-servlet-impl/src/main/java/com/datahub/authentication/AuthServiceController.java rename to metadata-service/auth-servlet-impl/src/main/java/com/datahub/auth/authentication/AuthServiceController.java index 816ce24a5ac47..cb52c7f57f604 100644 --- a/metadata-service/auth-servlet-impl/src/main/java/com/datahub/authentication/AuthServiceController.java +++ b/metadata-service/auth-servlet-impl/src/main/java/com/datahub/auth/authentication/AuthServiceController.java @@ -1,9 +1,13 @@ -package com.datahub.authentication; +package com.datahub.auth.authentication; import com.datahub.authentication.invite.InviteTokenService; import com.datahub.authentication.token.StatelessTokenService; import com.datahub.authentication.token.TokenType; import com.datahub.authentication.user.NativeUserService; +import com.datahub.authentication.Actor; +import com.datahub.authentication.ActorType; +import com.datahub.authentication.Authentication; +import com.datahub.authentication.AuthenticationContext; import com.datahub.telemetry.TrackingService; import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.JsonNode; diff --git a/metadata-service/factories/build.gradle b/metadata-service/factories/build.gradle index bfb7a62ccbb61..2473f7c8ae4b3 100644 --- a/metadata-service/factories/build.gradle +++ b/metadata-service/factories/build.gradle @@ -5,6 +5,8 @@ dependencies { compile project(':metadata-io') compile project(':metadata-utils') compile project(':metadata-service:auth-impl') + compile project(':metadata-service:auth-api') + compile project(':metadata-service:plugin') compile project(':datahub-graphql-core') compile project(':metadata-service:restli-servlet-impl') compile project(':metadata-dao-impl:kafka-producer') @@ -24,7 +26,9 @@ dependencies { compile externalDependency.springCore compile externalDependency.springKafka compile externalDependency.springWeb - compile project(':metadata-service:auth-ranger-impl') + + implementation externalDependency.awsPostgresIamAuth + implementation externalDependency.awsRds annotationProcessor externalDependency.lombok diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/AuthorizerChainFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/AuthorizerChainFactory.java index e2d814b7e25f0..4e6d13d5c3af3 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/AuthorizerChainFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/AuthorizerChainFactory.java @@ -1,20 +1,33 @@ package com.linkedin.gms.factory.auth; -import com.datahub.authentication.Authentication; -import com.datahub.authorization.AuthorizerConfiguration; -import com.datahub.authorization.AuthorizerContext; -import com.datahub.authorization.DataHubAuthorizer; -import com.datahub.authorization.Authorizer; import com.datahub.authorization.AuthorizerChain; +import com.datahub.authorization.DataHubAuthorizer; import com.datahub.authorization.DefaultResourceSpecResolver; +import com.datahub.plugins.PluginConstant; +import com.datahub.authentication.Authentication; +import com.datahub.plugins.auth.authorization.Authorizer; +import com.datahub.authorization.AuthorizerContext; import com.datahub.authorization.ResourceSpecResolver; -import com.linkedin.metadata.client.JavaEntityClient; +import com.datahub.plugins.common.PluginConfig; +import com.datahub.plugins.common.PluginPermissionManager; +import com.datahub.plugins.common.PluginType; +import com.datahub.plugins.common.SecurityMode; +import com.datahub.plugins.configuration.Config; +import com.datahub.plugins.configuration.ConfigProvider; +import com.datahub.plugins.factory.PluginConfigFactory; +import com.datahub.plugins.loader.IsolatedClassLoader; +import com.datahub.plugins.loader.PluginPermissionManagerImpl; +import com.google.common.collect.ImmutableMap; import com.linkedin.gms.factory.config.ConfigurationProvider; import com.linkedin.gms.factory.spring.YamlPropertySourceFactory; +import com.linkedin.metadata.client.JavaEntityClient; +import java.nio.file.Path; +import java.nio.file.Paths; import java.util.ArrayList; import java.util.Collections; import java.util.List; -import java.util.Map; +import java.util.Optional; +import java.util.stream.Collectors; import javax.annotation.Nonnull; import lombok.extern.slf4j.Slf4j; import org.springframework.beans.factory.annotation.Autowired; @@ -50,13 +63,13 @@ public class AuthorizerChainFactory { @Scope("singleton") @Nonnull protected AuthorizerChain getInstance() { - // Init authorizer context - final AuthorizerContext ctx = initAuthorizerContext(); + final ResourceSpecResolver resolver = initResolver(); // Extract + initialize customer authorizers from application configs. - final List authorizers = new ArrayList<>(initCustomAuthorizers(ctx)); + final List authorizers = new ArrayList<>(initCustomAuthorizers(resolver)); if (configurationProvider.getAuthorization().getDefaultAuthorizer().isEnabled()) { + AuthorizerContext ctx = new AuthorizerContext(Collections.emptyMap(), resolver); this.dataHubAuthorizer.init(Collections.emptyMap(), ctx); log.info("Default DataHubAuthorizer is enabled. Appending it to the authorization chain."); authorizers.add(this.dataHubAuthorizer); @@ -65,53 +78,68 @@ protected AuthorizerChain getInstance() { return new AuthorizerChain(authorizers, dataHubAuthorizer); } - private AuthorizerContext initAuthorizerContext() { - final ResourceSpecResolver resolver = new DefaultResourceSpecResolver(systemAuthentication, entityClient); - return new AuthorizerContext(resolver); + private ResourceSpecResolver initResolver() { + return new DefaultResourceSpecResolver(systemAuthentication, entityClient); } - private List initCustomAuthorizers(AuthorizerContext ctx) { + private List initCustomAuthorizers(ResourceSpecResolver resolver) { final List customAuthorizers = new ArrayList<>(); - if (this.configurationProvider.getAuthorization().getAuthorizers() != null) { - - final List authorizerConfigurations = - this.configurationProvider.getAuthorization().getAuthorizers(); - - for (AuthorizerConfiguration authorizer : authorizerConfigurations) { - final String type = authorizer.getType(); - // continue if authorizer is not enabled - if (!authorizer.isEnabled()) { - log.info(String.format("Authorizer %s is not enabled", type)); - continue; - } - - final Map configs = - authorizer.getConfigs() != null ? authorizer.getConfigs() : Collections.emptyMap(); - - log.debug(String.format("Found configs for notification sink of type %s: %s ", type, configs)); - - // Instantiate the Authorizer - Class clazz = null; - try { - clazz = (Class) Class.forName(type); - } catch (ClassNotFoundException e) { - throw new RuntimeException( - String.format("Failed to find custom Authorizer class with name %s on the classpath.", type)); - } - - // Else construct an instance of the class, each class should have an empty constructor. - try { - final Authorizer authorizerInstance = clazz.newInstance(); - authorizerInstance.init(configs, ctx); - customAuthorizers.add(authorizerInstance); - log.info(String.format("Authorizer %s is initialized", type)); - } catch (Exception e) { - throw new RuntimeException( - String.format("Failed to instantiate custom Authorizer with class name %s", clazz.getCanonicalName()), e); - } - } - } + Path pluginBaseDirectory = Paths.get(configurationProvider.getDatahub().getPlugin().getAuth().getPath()); + ConfigProvider configProvider = + new ConfigProvider(pluginBaseDirectory); + + Optional optionalConfig = configProvider.load(); + // Register authorizer plugins if present + optionalConfig.ifPresent((config) -> { + registerAuthorizer(customAuthorizers, resolver, config); + }); + return customAuthorizers; } + + private void registerAuthorizer(List customAuthorizers, ResourceSpecResolver resolver, Config config) { + PluginConfigFactory authorizerPluginPluginConfigFactory = new PluginConfigFactory(config); + // Load only Authorizer configuration from plugin config factory + List authorizers = + authorizerPluginPluginConfigFactory.loadPluginConfigs(PluginType.AUTHORIZER); + + // Select only enabled authorizer for instantiation + List enabledAuthorizers = authorizers.stream().filter(pluginConfig -> { + if (!pluginConfig.getEnabled()) { + log.info(String.format("Authorizer %s is not enabled", pluginConfig.getName())); + } + return pluginConfig.getEnabled(); + }).collect(Collectors.toList()); + + // Get security mode set by user + SecurityMode securityMode = + SecurityMode.valueOf(this.configurationProvider.getDatahub().getPlugin().getPluginSecurityMode()); + // Create permission manager with security mode + PluginPermissionManager permissionManager = new PluginPermissionManagerImpl(securityMode); + + // Save ContextClassLoader. As some plugins are directly using context classloader from current thread to load libraries + // This will break plugin as their dependencies are inside plugin directory only + ClassLoader contextClassLoader = Thread.currentThread().getContextClassLoader(); + // Instantiate Authorizer plugins + enabledAuthorizers.forEach((pluginConfig) -> { + // Create context + AuthorizerContext context = new AuthorizerContext( + ImmutableMap.of(PluginConstant.PLUGIN_HOME, pluginConfig.getPluginHomeDirectory().toString()), resolver); + IsolatedClassLoader isolatedClassLoader = new IsolatedClassLoader(permissionManager, pluginConfig); + try { + Thread.currentThread().setContextClassLoader((ClassLoader) isolatedClassLoader); + Authorizer authorizer = (Authorizer) isolatedClassLoader.instantiatePlugin(Authorizer.class); + log.info("Initializing plugin {}", pluginConfig.getName()); + authorizer.init(pluginConfig.getConfigs().orElse(Collections.emptyMap()), context); + customAuthorizers.add(authorizer); + log.info("Plugin {} is initialized", pluginConfig.getName()); + } catch (ClassNotFoundException e) { + log.debug(String.format("Failed to init the plugin", pluginConfig.getName())); + throw new RuntimeException(e); + } finally { + Thread.currentThread().setContextClassLoader(contextClassLoader); + } + }); + } } \ No newline at end of file diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/LocalEbeanServerConfigFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/LocalEbeanServerConfigFactory.java index fa2e420e7e16a..66d917b444e01 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/LocalEbeanServerConfigFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/LocalEbeanServerConfigFactory.java @@ -6,6 +6,8 @@ import io.ebean.datasource.DataSourceConfig; import io.ebean.datasource.DataSourcePoolListener; import java.sql.Connection; +import java.util.HashMap; +import java.util.Map; import lombok.extern.slf4j.Slf4j; import org.springframework.beans.factory.annotation.Value; import org.springframework.context.annotation.Bean; @@ -51,6 +53,9 @@ public class LocalEbeanServerConfigFactory { @Value("${ebean.autoCreateDdl:false}") private Boolean ebeanAutoCreate; + @Value("${ebean.postgresUseIamAuth:false}") + private Boolean postgresUseIamAuth; + private DataSourcePoolListener getListenerToTrackCounts(String metricName) { final String counterName = "ebeans_connection_pool_size_" + metricName; return new DataSourcePoolListener() { @@ -79,6 +84,12 @@ private DataSourceConfig buildDataSourceConfig(String dataSourceUrl, String data dataSourceConfig.setLeakTimeMinutes(ebeanLeakTimeMinutes); dataSourceConfig.setWaitTimeoutMillis(ebeanWaitTimeoutMillis); dataSourceConfig.setListener(getListenerToTrackCounts(dataSourceType)); + // Adding IAM auth access for AWS Postgres + if (postgresUseIamAuth) { + Map custom = new HashMap<>(); + custom.put("wrapperPlugins", "iam"); + dataSourceConfig.setCustomProperties(custom); + } return dataSourceConfig; } diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/config/ConfigurationProvider.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/config/ConfigurationProvider.java index 987043b4b8003..77b0ffa65cee8 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/config/ConfigurationProvider.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/config/ConfigurationProvider.java @@ -3,7 +3,7 @@ import com.datahub.authentication.AuthenticationConfiguration; import com.datahub.authorization.AuthorizationConfiguration; import com.linkedin.datahub.graphql.featureflags.FeatureFlags; -import com.linkedin.metadata.config.DatahubConfiguration; +import com.linkedin.metadata.config.DataHubConfiguration; import com.linkedin.metadata.config.IngestionConfiguration; import com.linkedin.metadata.config.TestsConfiguration; import com.linkedin.metadata.telemetry.TelemetryConfiguration; @@ -47,7 +47,7 @@ public class ConfigurationProvider { /** * DataHub top-level server configurations */ - private DatahubConfiguration datahub; + private DataHubConfiguration datahub; /** * Feature flags indicating what is turned on vs turned off diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/graphql/GraphQLEngineFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/graphql/GraphQLEngineFactory.java index 768f09c784def..d7613f276e138 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/graphql/GraphQLEngineFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/graphql/GraphQLEngineFactory.java @@ -2,10 +2,10 @@ import com.datahub.authentication.group.GroupService; import com.datahub.authentication.invite.InviteTokenService; -import com.datahub.authentication.post.PostService; import com.datahub.authentication.token.StatefulTokenService; import com.datahub.authentication.user.NativeUserService; import com.datahub.authorization.role.RoleService; +import com.datahub.authentication.post.PostService; import com.linkedin.datahub.graphql.GmsGraphQLEngine; import com.linkedin.datahub.graphql.GraphQLEngine; import com.linkedin.datahub.graphql.analytics.service.AnalyticsService; diff --git a/metadata-service/factories/src/main/resources/application.yml b/metadata-service/factories/src/main/resources/application.yml index 85a1735d221e3..01aa12fca0478 100644 --- a/metadata-service/factories/src/main/resources/application.yml +++ b/metadata-service/factories/src/main/resources/application.yml @@ -34,14 +34,6 @@ authorization: enabled: ${AUTH_POLICIES_ENABLED:true} cacheRefreshIntervalSecs: ${POLICY_CACHE_REFRESH_INTERVAL_SECONDS:120} - # Optional: A set of custom authorizers, serving in addition to the default DataHub policies-based authorizer. - authorizers: - - type: com.datahub.authorization.ranger.RangerAuthorizer - enabled: ${RANGER_AUTHORIZER_ENABLED:false} - configs: - username: ${RANGER_USERNAME} - password: ${RANGER_PASSWORD} - ingestion: enabled: ${UI_INGESTION_ENABLED:true} defaultCliVersion: '${UI_INGESTION_DEFAULT_CLI_VERSION:@cliMajorVersion@}' @@ -65,10 +57,13 @@ datahub: protocol: ${DATAHUB_GMS_SSL_PROTOCOL:${GMS_SSL_PROTOCOL:#{null}}} plugin: + pluginSecurityMode: ${PLUGIN_SECURITY_MODE:RESTRICTED} # Possible value RESTRICTED or LENIENT, default to RESTRICTED entityRegistry: - path: ${ENTITY_REGISTRY_PLUGIN_PATH:/etc/datahub/plugins/models} + path: ${ENTITY_REGISTRY_PLUGIN_PATH:/etc/datahub/plugins/models} retention: path: ${RETENTION_PLUGIN_PATH:/etc/datahub/plugins/retention} + auth: + path: ${AUTH_PLUGIN_PATH:/etc/datahub/plugins/auth} entityService: impl: ${ENTITY_SERVICE_IMPL:ebean} @@ -110,6 +105,7 @@ ebean: leakTimeMinutes: ${EBEAN_LEAK_TIME_MINUTES:15} waitTimeoutMillis: ${EBEAN_WAIT_TIMEOUT_MILLIS:1000} autoCreateDdl: ${EBEAN_AUTOCREATE:false} + postgresUseIamAuth: ${EBEAN_POSTGRES_USE_AWS_IAM_AUTH:false} # Only required if entityService.impl is cassandra cassandra: diff --git a/metadata-service/graphql-servlet-impl/build.gradle b/metadata-service/graphql-servlet-impl/build.gradle index 7c365bcdac26e..dd38224f529eb 100644 --- a/metadata-service/graphql-servlet-impl/build.gradle +++ b/metadata-service/graphql-servlet-impl/build.gradle @@ -2,7 +2,7 @@ apply plugin: 'java' dependencies { compile project(':datahub-graphql-core') - compile project(':metadata-service:auth-api') + compile project(':metadata-auth:auth-api') compile project(':metadata-service:factories') compile externalDependency.springCore diff --git a/metadata-service/graphql-servlet-impl/src/main/java/com/datahub/graphql/SpringQueryContext.java b/metadata-service/graphql-servlet-impl/src/main/java/com/datahub/graphql/SpringQueryContext.java index f1fc7038a4909..a1ddc5a013f7d 100644 --- a/metadata-service/graphql-servlet-impl/src/main/java/com/datahub/graphql/SpringQueryContext.java +++ b/metadata-service/graphql-servlet-impl/src/main/java/com/datahub/graphql/SpringQueryContext.java @@ -1,7 +1,7 @@ package com.datahub.graphql; import com.datahub.authentication.Authentication; -import com.datahub.authorization.Authorizer; +import com.datahub.plugins.auth.authorization.Authorizer; import com.linkedin.datahub.graphql.QueryContext; diff --git a/metadata-service/openapi-servlet/build.gradle b/metadata-service/openapi-servlet/build.gradle index 3b947fee08272..eaae46e21b520 100644 --- a/metadata-service/openapi-servlet/build.gradle +++ b/metadata-service/openapi-servlet/build.gradle @@ -2,7 +2,7 @@ apply plugin: 'java' dependencies { - compile project(':metadata-service:auth-api') + compile project(':metadata-auth:auth-api') compile project(':metadata-service:factories') compile externalDependency.reflections diff --git a/metadata-service/plugin/build.gradle b/metadata-service/plugin/build.gradle new file mode 100644 index 0000000000000..00a6384b923a0 --- /dev/null +++ b/metadata-service/plugin/build.gradle @@ -0,0 +1,33 @@ +apply plugin: 'java' + +dependencies { + + implementation 'org.apache.commons:commons-lang3:3.12.0' + implementation project(path: ':metadata-auth:auth-api') + implementation externalDependency.guava + implementation externalDependency.jacksonDataBind + implementation externalDependency.jacksonDataFormatYaml + implementation externalDependency.jacksonJDK8 + implementation externalDependency.jacksonDataPropertyFormat + implementation externalDependency.logbackClassic; + implementation externalDependency.slf4jApi + + compileOnly externalDependency.lombok + + testImplementation externalDependency.mockito + testImplementation externalDependency.testng + annotationProcessor externalDependency.lombok +} + +test { + // Build sample plugin to unit test IsolatedClassLoader + dependsOn ':metadata-service:plugin:src:test:sample-test-plugins:build' + // Set system properties for permission management + systemProperty 'datahub.gms.home', file("$projectDir").absolutePath + systemProperty 'java.security.policy', file("../war/src/main/resources/security.policy").absolutePath + systemProperty 'datahub.project.root.dir', "$rootDir" // used in security.policy +} + +clean { + dependsOn ':metadata-service:plugin:src:test:sample-test-plugins:clean' +} diff --git a/metadata-service/plugin/src/main/java/com/datahub/plugins/auth/configuration/AuthParam.java b/metadata-service/plugin/src/main/java/com/datahub/plugins/auth/configuration/AuthParam.java new file mode 100644 index 0000000000000..e632aa7eadff0 --- /dev/null +++ b/metadata-service/plugin/src/main/java/com/datahub/plugins/auth/configuration/AuthParam.java @@ -0,0 +1,35 @@ +package com.datahub.plugins.auth.configuration; + +import java.util.Map; +import java.util.Optional; +import lombok.Data; + + +/** + * POJO for YAML section presents in config.yml at location plugins[].params. + * + * These parameters are same for Authenticator and Authorizer plugins. + * + * {@link com.datahub.plugins.auth.provider.AuthPluginConfigProvider} uses this AuthParam to create instance of + * either {@link AuthenticatorPluginConfig} or {@link AuthorizerPluginConfig} + */ +@Data +public class AuthParam { + /** + * Fully-qualified class-name of plugin + */ + private String className; + + /** + * Default jarFileName is ".jar". If plugin's jar file name is different from default value then set + * this property. + */ + private Optional jarFileName = Optional.empty(); + + /** + * These configs are specific to plugin. GMS pass this map as is to plugin + * {@link com.datahub.plugins.auth.authentication.Authenticator} or + * {@link com.datahub.plugins.auth.authorization.Authorizer} init method + */ + private Optional> configs = Optional.empty(); +} diff --git a/metadata-service/plugin/src/main/java/com/datahub/plugins/auth/configuration/AuthPluginConfig.java b/metadata-service/plugin/src/main/java/com/datahub/plugins/auth/configuration/AuthPluginConfig.java new file mode 100644 index 0000000000000..b4546d9f5af16 --- /dev/null +++ b/metadata-service/plugin/src/main/java/com/datahub/plugins/auth/configuration/AuthPluginConfig.java @@ -0,0 +1,24 @@ +package com.datahub.plugins.auth.configuration; + +import com.datahub.plugins.common.PluginConfig; +import com.datahub.plugins.common.PluginType; +import java.nio.file.Path; +import java.util.Map; +import java.util.Optional; +import lombok.Data; +import lombok.EqualsAndHashCode; +import lombok.NoArgsConstructor; + + +/** + * Superclass for {@link AuthenticatorPluginConfig} and {@link AuthorizerPluginConfig} + */ +@Data +@NoArgsConstructor +@EqualsAndHashCode(callSuper = false) +public class AuthPluginConfig extends PluginConfig { + public AuthPluginConfig(PluginType type, String name, Boolean enabled, String className, Path pluginHomeDirectory, + Path pluginJarPath, Optional> configs) { + super(type, name, enabled, className, pluginHomeDirectory, pluginJarPath, configs); + } +} diff --git a/metadata-service/plugin/src/main/java/com/datahub/plugins/auth/configuration/AuthenticatorPluginConfig.java b/metadata-service/plugin/src/main/java/com/datahub/plugins/auth/configuration/AuthenticatorPluginConfig.java new file mode 100644 index 0000000000000..276faed56f7ab --- /dev/null +++ b/metadata-service/plugin/src/main/java/com/datahub/plugins/auth/configuration/AuthenticatorPluginConfig.java @@ -0,0 +1,24 @@ +package com.datahub.plugins.auth.configuration; + +import com.datahub.plugins.common.PluginType; +import java.nio.file.Path; +import java.util.Map; +import java.util.Optional; +import lombok.Data; +import lombok.EqualsAndHashCode; +import lombok.NoArgsConstructor; + + +/** + * Authenticator plugin configuration provided by user. + * {@link com.datahub.plugins.auth.provider.AuthenticatorPluginConfigProvider} instantiate this class + */ +@Data +@NoArgsConstructor +@EqualsAndHashCode(callSuper = false) +public class AuthenticatorPluginConfig extends AuthPluginConfig { + public AuthenticatorPluginConfig(String name, Boolean enabled, String className, Path pluginDirectory, Path pluginJar, + Optional> configs) { + super(PluginType.AUTHENTICATOR, name, enabled, className, pluginDirectory, pluginJar, configs); + } +} // currently this class doesn't have any special attributes diff --git a/metadata-service/plugin/src/main/java/com/datahub/plugins/auth/configuration/AuthorizerPluginConfig.java b/metadata-service/plugin/src/main/java/com/datahub/plugins/auth/configuration/AuthorizerPluginConfig.java new file mode 100644 index 0000000000000..1a4bd1ea07906 --- /dev/null +++ b/metadata-service/plugin/src/main/java/com/datahub/plugins/auth/configuration/AuthorizerPluginConfig.java @@ -0,0 +1,24 @@ +package com.datahub.plugins.auth.configuration; + +import com.datahub.plugins.common.PluginType; +import java.nio.file.Path; +import java.util.Map; +import java.util.Optional; +import lombok.Data; +import lombok.EqualsAndHashCode; +import lombok.NoArgsConstructor; + + +/** + * Authorizer plugin configuration provided by user. + * {@link com.datahub.plugins.auth.provider.AuthorizerPluginConfigProvider} instantiate this class + */ +@Data +@NoArgsConstructor +@EqualsAndHashCode(callSuper = false) +public class AuthorizerPluginConfig extends AuthPluginConfig { + public AuthorizerPluginConfig(String name, Boolean enabled, String className, Path pluginDirectory, Path pluginJar, + Optional> configs) { + super(PluginType.AUTHORIZER, name, enabled, className, pluginDirectory, pluginJar, configs); + } +} // currently this class doesn't have any special attributes diff --git a/metadata-service/plugin/src/main/java/com/datahub/plugins/auth/provider/AuthPluginConfigProvider.java b/metadata-service/plugin/src/main/java/com/datahub/plugins/auth/provider/AuthPluginConfigProvider.java new file mode 100644 index 0000000000000..b970258aa3ea0 --- /dev/null +++ b/metadata-service/plugin/src/main/java/com/datahub/plugins/auth/provider/AuthPluginConfigProvider.java @@ -0,0 +1,44 @@ +package com.datahub.plugins.auth.provider; + +import com.datahub.plugins.auth.configuration.AuthParam; +import com.datahub.plugins.auth.configuration.AuthPluginConfig; +import com.datahub.plugins.common.PluginConfigProvider; +import com.datahub.plugins.common.PluginType; +import com.datahub.plugins.configuration.PluginConfig; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.List; +import java.util.stream.Collectors; +import java.util.stream.Stream; + + +/** + * Base class for {@link AuthenticatorPluginConfigProvider} and {@link AuthorizerPluginConfigProvider}. + */ +public abstract class AuthPluginConfigProvider implements PluginConfigProvider { + public abstract PluginType getType(); + + public abstract AuthPluginConfig createAuthPluginConfig(PluginConfig pluginConfig); + + @Override + public List processConfig( + List pluginConfigConfigs) { + // Filter out AuthPlugin + Stream authPluginHolder = + pluginConfigConfigs.stream().filter(pluginHolder -> pluginHolder.getType() == getType()); + // Create AuthPlugin type instances + List authPlugins = + authPluginHolder.map(this::createAuthPluginConfig).collect(Collectors.toList()); + return authPlugins; + } + + public Path formPluginJar(PluginConfig pluginConfig, AuthParam authConfig) { + // User is either going to explicitly set the jarFileName or we will infer it from plugin name + String jarName = authConfig.getJarFileName().orElse(pluginConfig.getName() + ".jar"); + Path jarPath = Paths.get(pluginConfig.getPluginHomeDirectory().toString(), jarName); + if (!jarPath.toFile().exists()) { + throw new IllegalArgumentException(String.format("Plugin Jar %s not found", jarPath)); + } + return jarPath; + } +} diff --git a/metadata-service/plugin/src/main/java/com/datahub/plugins/auth/provider/AuthenticatorPluginConfigProvider.java b/metadata-service/plugin/src/main/java/com/datahub/plugins/auth/provider/AuthenticatorPluginConfigProvider.java new file mode 100644 index 0000000000000..546cee04d05a0 --- /dev/null +++ b/metadata-service/plugin/src/main/java/com/datahub/plugins/auth/provider/AuthenticatorPluginConfigProvider.java @@ -0,0 +1,36 @@ +package com.datahub.plugins.auth.provider; + +import com.datahub.plugins.auth.configuration.AuthParam; +import com.datahub.plugins.auth.configuration.AuthPluginConfig; +import com.datahub.plugins.auth.configuration.AuthenticatorPluginConfig; +import com.datahub.plugins.common.PluginType; +import com.datahub.plugins.common.YamlMapper; +import com.datahub.plugins.configuration.PluginConfig; +import java.nio.file.Path; +import javax.annotation.Nonnull; + + +/** + * Responsible for creating {@link AuthenticatorPluginConfig} instance. + * This provider is register in {@link com.datahub.plugins.factory.PluginConfigFactory} as provider of Authenticator + * configuration + */ +public class AuthenticatorPluginConfigProvider extends AuthPluginConfigProvider { + @Override + public PluginType getType() { + return PluginType.AUTHENTICATOR; + } + + @Override + public AuthPluginConfig createAuthPluginConfig(@Nonnull PluginConfig pluginConfig) { + // Map Yaml section present in config.yml at plugins[].params to AuthParam + AuthParam authParam = (new YamlMapper()).fromMap(pluginConfig.getParams(), AuthParam.class); + // Make the pluginJar file path either from name of plugin or explicitly from plugins[].params.jarFileName + // This logic is common for authenticator and authorizer plugin and hence define in superclass + Path pluginJar = formPluginJar(pluginConfig, authParam); + + return new AuthenticatorPluginConfig(pluginConfig.getName(), pluginConfig.getEnabled(), authParam.getClassName(), + pluginConfig.getPluginHomeDirectory(), pluginJar, authParam.getConfigs()); + } +} + diff --git a/metadata-service/plugin/src/main/java/com/datahub/plugins/auth/provider/AuthorizerPluginConfigProvider.java b/metadata-service/plugin/src/main/java/com/datahub/plugins/auth/provider/AuthorizerPluginConfigProvider.java new file mode 100644 index 0000000000000..397dc3fd93b36 --- /dev/null +++ b/metadata-service/plugin/src/main/java/com/datahub/plugins/auth/provider/AuthorizerPluginConfigProvider.java @@ -0,0 +1,36 @@ +package com.datahub.plugins.auth.provider; + +import com.datahub.plugins.auth.configuration.AuthParam; +import com.datahub.plugins.auth.configuration.AuthPluginConfig; +import com.datahub.plugins.auth.configuration.AuthorizerPluginConfig; +import com.datahub.plugins.common.PluginType; +import com.datahub.plugins.common.YamlMapper; +import com.datahub.plugins.configuration.PluginConfig; +import java.nio.file.Path; +import javax.annotation.Nonnull; + + +/** + * Responsible for creating {@link AuthorizerPluginConfig} instance. + * This provider is register in {@link com.datahub.plugins.factory.PluginConfigFactory} as provider of Authorizer + * configuration + */ +public class AuthorizerPluginConfigProvider extends AuthPluginConfigProvider { + @Override + public PluginType getType() { + return PluginType.AUTHORIZER; + } + + @Override + public AuthPluginConfig createAuthPluginConfig(@Nonnull PluginConfig pluginConfig) { + // Map Yaml section present in config.yml at plugins[].params to AuthParam + AuthParam authParam = (new YamlMapper()).fromMap(pluginConfig.getParams(), AuthParam.class); + + // Make the pluginJar file path either from name of plugin or explicitly from plugins[].params.jarFileName + // This logic is common for authenticator and authorizer plugin and hence define in superclass + Path pluginJar = formPluginJar(pluginConfig, authParam); + + return new AuthorizerPluginConfig(pluginConfig.getName(), pluginConfig.getEnabled(), authParam.getClassName(), + pluginConfig.getPluginHomeDirectory(), pluginJar, authParam.getConfigs()); + } +} diff --git a/metadata-service/plugin/src/main/java/com/datahub/plugins/common/ConfigValidationUtils.java b/metadata-service/plugin/src/main/java/com/datahub/plugins/common/ConfigValidationUtils.java new file mode 100644 index 0000000000000..c4dc94b7c73d5 --- /dev/null +++ b/metadata-service/plugin/src/main/java/com/datahub/plugins/common/ConfigValidationUtils.java @@ -0,0 +1,53 @@ +package com.datahub.plugins.common; + +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import javax.annotation.Nonnull; +import org.apache.commons.lang3.StringUtils; + + +/** + * Common validations. + * Used in {@link com.datahub.plugins.configuration.PluginConfig} + */ +public class ConfigValidationUtils { + + private ConfigValidationUtils() { + } + + public static void whiteSpacesValidation(@Nonnull String fieldName, @Nonnull String value) + throws IllegalArgumentException { + if (StringUtils.isEmpty(value) || StringUtils.containsWhitespace(value)) { + throw new IllegalArgumentException( + String.format("%s should not be empty and should not contains whitespaces", fieldName)); + } + } + + public static void mapShouldNotBeEmpty(@Nonnull String fieldName, @Nonnull Map attributeMap) + throws IllegalArgumentException { + if (attributeMap.isEmpty()) { + throw new IllegalArgumentException(String.format("%s should not be empty", fieldName)); + } + } + + public static void listShouldNotBeEmpty(@Nonnull String fieldName, @Nonnull List list) + throws IllegalArgumentException { + if (list.isEmpty()) { + throw new IllegalArgumentException(String.format("%s should not be empty", fieldName)); + } + } + + public static void listShouldNotHaveDuplicate(@Nonnull String fieldName, @Nonnull List list) { + Set set = new HashSet<>(); + list.forEach((input) -> { + if (set.contains(input)) { + throw new IllegalArgumentException( + String.format("Duplicate entry of %s is found in %s. %s should not contain duplicate", input, fieldName, + fieldName)); + } + set.add(input); + }); + } +} diff --git a/metadata-service/plugin/src/main/java/com/datahub/plugins/common/PluginConfig.java b/metadata-service/plugin/src/main/java/com/datahub/plugins/common/PluginConfig.java new file mode 100644 index 0000000000000..02b3b4566d705 --- /dev/null +++ b/metadata-service/plugin/src/main/java/com/datahub/plugins/common/PluginConfig.java @@ -0,0 +1,58 @@ +package com.datahub.plugins.common; + +import java.nio.file.Path; +import java.util.Map; +import java.util.Optional; +import lombok.AllArgsConstructor; +import lombok.Data; +import lombok.NoArgsConstructor; + + +/** + * Flat form of plugin configuration configured in config.yaml at plugins[] and plugins[].params + */ +@Data +@NoArgsConstructor +@AllArgsConstructor +public class PluginConfig { + /** + * Type of plugin. Supported types are {@link PluginType} + */ + private PluginType type; + + /** + * name of the plugin. It should be unique in plugins[] list + */ + private String name; + + /** + * Whether to load the plugin in GMS. If set to true plugin will be loaded in GMS take authentication/authorization + * decisions. + */ + private Boolean enabled; + + /** + * Fully-qualified class-name of plugin + */ + private String className; + + /** + * It is always set to /. + * For example if plugin-name is ranger-authorizer and plugin-base-directory is /etc/datahub/plugins/auth then + * pluginDirectory would be /etc/datahub/plugins/auth/ranger-authorizer + */ + private Path pluginHomeDirectory; + + /** + * Default jarFileName is ".jar". If plugin's jar file name is different from default value then set + * this property. + */ + private Path pluginJarPath; + + /** + * These configs are specific to plugin. GMS pass this map as is to plugin + * {@link com.datahub.plugins.auth.authentication.Authenticator} or + * {@link com.datahub.plugins.auth.authorization.Authorizer} init method + */ + private Optional> configs; +} diff --git a/metadata-service/plugin/src/main/java/com/datahub/plugins/common/PluginConfigProvider.java b/metadata-service/plugin/src/main/java/com/datahub/plugins/common/PluginConfigProvider.java new file mode 100644 index 0000000000000..b1b0844f428b7 --- /dev/null +++ b/metadata-service/plugin/src/main/java/com/datahub/plugins/common/PluginConfigProvider.java @@ -0,0 +1,8 @@ +package com.datahub.plugins.common; + +import java.util.List; + + +public interface PluginConfigProvider { + List processConfig(List pluginConfigConfigs); +} diff --git a/metadata-service/plugin/src/main/java/com/datahub/plugins/common/PluginPermissionManager.java b/metadata-service/plugin/src/main/java/com/datahub/plugins/common/PluginPermissionManager.java new file mode 100644 index 0000000000000..0a46be21155b6 --- /dev/null +++ b/metadata-service/plugin/src/main/java/com/datahub/plugins/common/PluginPermissionManager.java @@ -0,0 +1,17 @@ +package com.datahub.plugins.common; + +import java.nio.file.Path; +import java.security.ProtectionDomain; + + +/** + * Implement this interface to create Java SecurityManager's ProtectionDomain for the plugin. + */ +public interface PluginPermissionManager { + /** + * Create codeSource instance for the location of pluginHome to apply SecurityMode restriction to the plugin code + * @param pluginHome + * @return ProtectionDomain + */ + ProtectionDomain createProtectionDomain(Path pluginHome); +} diff --git a/metadata-service/plugin/src/main/java/com/datahub/plugins/common/PluginType.java b/metadata-service/plugin/src/main/java/com/datahub/plugins/common/PluginType.java new file mode 100644 index 0000000000000..ed3bf0a4f4473 --- /dev/null +++ b/metadata-service/plugin/src/main/java/com/datahub/plugins/common/PluginType.java @@ -0,0 +1,21 @@ +package com.datahub.plugins.common; + +/** + * Supported plugin types + */ +public enum PluginType { + /** + * PluginType for Authenticator plugin + */ + AUTHENTICATOR, + + /** + * PluginType for Authorizer plugin + */ + AUTHORIZER; + + @Override + public String toString() { + return this.name().toLowerCase(); + } +} diff --git a/metadata-service/plugin/src/main/java/com/datahub/plugins/common/SecurityMode.java b/metadata-service/plugin/src/main/java/com/datahub/plugins/common/SecurityMode.java new file mode 100644 index 0000000000000..7ab0032b86497 --- /dev/null +++ b/metadata-service/plugin/src/main/java/com/datahub/plugins/common/SecurityMode.java @@ -0,0 +1,62 @@ +package com.datahub.plugins.common; + +import java.io.FilePermission; +import java.net.SocketPermission; +import java.nio.file.Path; +import java.security.AllPermission; +import java.security.Permissions; +import java.util.function.Function; + + +/** + * Supported security modes + */ +public enum SecurityMode { + /** + * In this mode plugins has limited access. + * + * Plugins are allowed to connect on below ports only + * 1) port greater than 1024 + * 2) port 80 + * 3) port 443 + * All other ports connection are disallowed. + * + * Plugins are allowed to read and write files on PLUGIN_HOME directory only and all other read/write access are + * denied. + */ + RESTRICTED(SecurityMode::restrictModePermissionSupplier), + + /** + * Plugins has full access. + * In this mode plugin can read/write to any directory, can connect to any port and can read environment variables. + */ + LENIENT(SecurityMode::lenientModePermissionSupplier); + + private final Function _permissionsSupplier; + + SecurityMode(Function permissionsSupplier) { + this._permissionsSupplier = permissionsSupplier; + } + + private static Permissions restrictModePermissionSupplier(Path sourceCodeDirectory) { + Permissions permissions = new Permissions(); + + permissions.add(new FilePermission(sourceCodeDirectory.toString() + "/*", "read,write,delete")); + permissions.add( + new SocketPermission("*:1024-", "connect,resolve")); // Allow to connect access to all socket above 1024 + permissions.add(new SocketPermission("*:80", "connect,resolve")); // Allow to connect access to HTTP port + permissions.add(new SocketPermission("*:443", "connect,resolve")); // Allow to connect access to HTTPS port + + return permissions; + } + + private static Permissions lenientModePermissionSupplier(Path sourceCodeDirectory) { + Permissions permissions = new Permissions(); + permissions.add(new AllPermission()); + return permissions; + } + + public Function permissionsSupplier() { + return this._permissionsSupplier; + } +} diff --git a/metadata-service/plugin/src/main/java/com/datahub/plugins/common/YamlMapper.java b/metadata-service/plugin/src/main/java/com/datahub/plugins/common/YamlMapper.java new file mode 100644 index 0000000000000..c4a79e9434923 --- /dev/null +++ b/metadata-service/plugin/src/main/java/com/datahub/plugins/common/YamlMapper.java @@ -0,0 +1,39 @@ +package com.datahub.plugins.common; + +import com.fasterxml.jackson.databind.MapperFeature; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.dataformat.yaml.YAMLMapper; +import com.fasterxml.jackson.datatype.jdk8.Jdk8Module; +import java.io.IOException; +import java.nio.file.Path; +import java.util.Map; +import javax.annotation.Nonnull; + + +/** + * + * A mapper to map plugin configuration to java Pojo classes + */ +public class YamlMapper { + private final ObjectMapper objectMapper; + + public YamlMapper() { + this.objectMapper = YAMLMapper.builder().enable(MapperFeature.ACCEPT_CASE_INSENSITIVE_ENUMS).build(); + objectMapper.registerModule(new Jdk8Module()); + } + + public T fromMap(@Nonnull Map params, Class clazz) { + return objectMapper.convertValue(params, clazz); + } + + public T fromFile(@Nonnull Path file, @Nonnull Class clazz) { + T pojo = null; + try { + pojo = (T) objectMapper.readValue(file.toFile(), clazz); + } catch (IOException e) { + // Won't occur as we're already checking file existence in ConfigProvider's load method + throw new RuntimeException(e); + } + return pojo; + } +} diff --git a/metadata-service/plugin/src/main/java/com/datahub/plugins/configuration/Config.java b/metadata-service/plugin/src/main/java/com/datahub/plugins/configuration/Config.java new file mode 100644 index 0000000000000..6cf1966787875 --- /dev/null +++ b/metadata-service/plugin/src/main/java/com/datahub/plugins/configuration/Config.java @@ -0,0 +1,44 @@ +package com.datahub.plugins.configuration; + +import com.datahub.plugins.common.ConfigValidationUtils; +import com.fasterxml.jackson.databind.annotation.JsonDeserialize; +import com.fasterxml.jackson.databind.annotation.JsonPOJOBuilder; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import javax.annotation.Nonnull; +import lombok.Builder; +import lombok.Getter; + + +/** + * {@link Config} is getting loaded from /etc/datahub/plugins/auth/config.yaml + */ +@Getter +@Builder +@JsonDeserialize(builder = Config.CustomBuilder.class) +public class Config { + public static final String FIELD_PLUGINS = "plugins"; // for validation error messages + @Nonnull + private List plugins; + + public static CustomBuilder builder() { + return new CustomBuilder(); + } + + @JsonPOJOBuilder(withPrefix = "") + public static class CustomBuilder extends ConfigBuilder { + public Config build() { + ConfigValidationUtils.listShouldNotBeEmpty(FIELD_PLUGINS, Collections.singletonList(super.plugins)); + + List list = new ArrayList<>(super.plugins.size()); + super.plugins.forEach((pluginConfig) -> { + list.add(pluginConfig.getName()); + }); + + ConfigValidationUtils.listShouldNotHaveDuplicate(FIELD_PLUGINS, list); + + return super.build(); + } + } +} diff --git a/metadata-service/plugin/src/main/java/com/datahub/plugins/configuration/ConfigProvider.java b/metadata-service/plugin/src/main/java/com/datahub/plugins/configuration/ConfigProvider.java new file mode 100644 index 0000000000000..ac2590209f4db --- /dev/null +++ b/metadata-service/plugin/src/main/java/com/datahub/plugins/configuration/ConfigProvider.java @@ -0,0 +1,48 @@ +package com.datahub.plugins.configuration; + +import com.datahub.plugins.common.YamlMapper; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.Optional; +import javax.annotation.Nonnull; +import lombok.extern.slf4j.Slf4j; + + +@Slf4j +public class ConfigProvider { + public static final String CONFIG_FILE_NAME = "config.yml"; + + /** + * Yaml file path of plugin configuration file. Content of this file should match with {@link Config} + */ + private final Path configFilePath; + + /** + * Directory where all plugins are mounted in DataHub GMS. + * Default pluginBaseDir is /etc/datahub/plugins/auth. + */ + private final Path pluginBaseDir; + + public ConfigProvider(@Nonnull Path pluginBaseDirectory) { + this.pluginBaseDir = pluginBaseDirectory.toAbsolutePath(); + this.configFilePath = Paths.get(this.pluginBaseDir.toString(), CONFIG_FILE_NAME); + } + + private void setPluginDir(@Nonnull PluginConfig pluginConfig) { + Path pluginDir = Paths.get(this.pluginBaseDir.toString(), pluginConfig.getName()); + pluginConfig.setPluginHomeDirectory(pluginDir); + } + + public Optional load() { + // Check config file should exist + if (!this.configFilePath.toFile().exists()) { + log.warn("Configuration {} file not found at location {}", CONFIG_FILE_NAME, this.pluginBaseDir); + return Optional.empty(); + } + + Config config = new YamlMapper().fromFile(this.configFilePath, Config.class); + // set derived attributes + config.getPlugins().forEach(this::setPluginDir); + return Optional.of(config); + } +} \ No newline at end of file diff --git a/metadata-service/plugin/src/main/java/com/datahub/plugins/configuration/PluginConfig.java b/metadata-service/plugin/src/main/java/com/datahub/plugins/configuration/PluginConfig.java new file mode 100644 index 0000000000000..faeeabbf955eb --- /dev/null +++ b/metadata-service/plugin/src/main/java/com/datahub/plugins/configuration/PluginConfig.java @@ -0,0 +1,70 @@ +package com.datahub.plugins.configuration; + +import com.datahub.plugins.common.PluginType; +import com.datahub.plugins.common.ConfigValidationUtils; +import com.fasterxml.jackson.annotation.JsonIgnore; +import com.fasterxml.jackson.databind.annotation.JsonDeserialize; +import com.fasterxml.jackson.databind.annotation.JsonPOJOBuilder; +import java.nio.file.Path; +import java.util.Map; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.EqualsAndHashCode; +import lombok.NoArgsConstructor; + + +/** + * POJO to map YAML section present in config.yml at plugins[] + */ +@Data +@NoArgsConstructor +@AllArgsConstructor +@Builder +@JsonDeserialize(builder = PluginConfig.CustomBuilder.class) +@EqualsAndHashCode(onlyExplicitlyIncluded = true) +public class PluginConfig { + /** + * name of the plugin. It should be unique in plugins[] list + */ + @EqualsAndHashCode.Include + private String name; // In list of plugin, the name should be unique + + /** + * Type of plugin. Supported types are {@link PluginType} + */ + private PluginType type; + + /** + * Whether to load the plugin in GMS. If set to true plugin will be loaded in GMS take authentication/authorization + * decisions. + */ + private Boolean enabled; + + /** + * Attributes in params should be as per POJO {@link com.datahub.plugins.auth.configuration.AuthParam} + */ + private Map params; + + /** + * It is always set to /. + * For example if plugin-name is ranger-authorizer and plugin-base-directory is /etc/datahub/plugins/auth then + * pluginDirectory would be /etc/datahub/plugins/auth/ranger-authorizer + */ + @JsonIgnore + private Path pluginHomeDirectory; + + public static CustomBuilder builder() { + return new CustomBuilder(); + } + + @JsonPOJOBuilder(withPrefix = "") + public static class CustomBuilder extends PluginConfigBuilder { + public PluginConfig build() { + ConfigValidationUtils.whiteSpacesValidation("name", super.name); + ConfigValidationUtils.mapShouldNotBeEmpty("params", super.params); + + return super.build(); + } + } +} diff --git a/metadata-service/plugin/src/main/java/com/datahub/plugins/factory/PluginConfigFactory.java b/metadata-service/plugin/src/main/java/com/datahub/plugins/factory/PluginConfigFactory.java new file mode 100644 index 0000000000000..b0a59a1656c8d --- /dev/null +++ b/metadata-service/plugin/src/main/java/com/datahub/plugins/factory/PluginConfigFactory.java @@ -0,0 +1,37 @@ +package com.datahub.plugins.factory; + +import com.datahub.plugins.auth.provider.AuthenticatorPluginConfigProvider; +import com.datahub.plugins.auth.provider.AuthorizerPluginConfigProvider; +import com.datahub.plugins.common.PluginConfig; +import com.datahub.plugins.common.PluginConfigProvider; +import com.datahub.plugins.common.PluginType; +import com.datahub.plugins.configuration.Config; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import javax.annotation.Nonnull; + + +/** + * Create instance of config provider as per type mentioned in {@link Config} + */ +public class PluginConfigFactory { + private final static Map CONFIG_PROVIDER_REGISTRY; + + static { + CONFIG_PROVIDER_REGISTRY = new HashMap<>(2); + CONFIG_PROVIDER_REGISTRY.put(PluginType.AUTHENTICATOR, new AuthenticatorPluginConfigProvider()); + CONFIG_PROVIDER_REGISTRY.put(PluginType.AUTHORIZER, new AuthorizerPluginConfigProvider()); + } + + private final Config _config; + + public PluginConfigFactory(@Nonnull Config config) { + this._config = config; + } + + @Nonnull + public List loadPluginConfigs(@Nonnull PluginType pluginType) { + return CONFIG_PROVIDER_REGISTRY.get(pluginType).processConfig(this._config.getPlugins()); + } +} diff --git a/metadata-service/plugin/src/main/java/com/datahub/plugins/loader/IsolatedClassLoader.java b/metadata-service/plugin/src/main/java/com/datahub/plugins/loader/IsolatedClassLoader.java new file mode 100644 index 0000000000000..92a7cae0647c5 --- /dev/null +++ b/metadata-service/plugin/src/main/java/com/datahub/plugins/loader/IsolatedClassLoader.java @@ -0,0 +1,318 @@ +package com.datahub.plugins.loader; + +import com.datahub.plugins.Plugin; +import com.datahub.plugins.common.PluginConfig; +import com.datahub.plugins.common.PluginPermissionManager; +import java.io.ByteArrayOutputStream; +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.net.MalformedURLException; +import java.net.URL; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.security.ProtectionDomain; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.Enumeration; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.jar.JarEntry; +import java.util.jar.JarFile; +import java.util.stream.Collectors; +import java.util.stream.Stream; +import java.util.zip.ZipEntry; +import javax.annotation.Nonnull; +import lombok.extern.slf4j.Slf4j; +import org.apache.commons.lang3.StringUtils; + + +/** + * IsolatedClassLoader to load custom implementation of DataHub Plugins. + * Override methods behave as per Java ClassLoader documentation. + */ +@Slf4j +public class IsolatedClassLoader extends ClassLoader { + public static final String EXECUTION_DIR = "__run__"; + private final PluginPermissionManager _pluginPermissionManager; + + private final PluginConfig _pluginConfig; + + private final List _classLoaders = new ArrayList<>(2); + + private Map _classPathVsZipEntry; + + private JarFile _pluginJarRef; + + private final Path _executionDirectory; + + public IsolatedClassLoader(@Nonnull PluginPermissionManager pluginPermissionManager, + @Nonnull PluginConfig pluginToLoad, @Nonnull ClassLoader... applicationClassLoaders) { + this._pluginPermissionManager = pluginPermissionManager; + this._pluginConfig = pluginToLoad; + this._classLoaders.add(this.getClass().getClassLoader()); // then application class-loader + this._classLoaders.addAll(Arrays.asList(applicationClassLoaders)); // if any extra class loaders + this._executionDirectory = + Paths.get("/tmp", pluginToLoad.getPluginHomeDirectory().toString(), EXECUTION_DIR); // to store .so files i.e. libraries + try { + this.createJarEntryMap(); + } catch (IOException e) { + // This would occur if we don't have permission on directory and chances of this is close to zero, hence catching + // this checked exception and throwing runtime exception + // to make caller code more readable + String message = String.format("Unable to load jar file %s for plugin %s", pluginToLoad.getPluginJarPath(), + pluginToLoad.getName()); + throw new RuntimeException(message, e); + } + } + + private void createJarEntryMap() throws IOException { + log.debug("Plugin jar file path {}", this._pluginConfig.getPluginJarPath()); + this._pluginJarRef = new JarFile(this._pluginConfig.getPluginJarPath().toFile()); + this._classPathVsZipEntry = new HashMap<>(); + for (Enumeration enums = this._pluginJarRef.entries(); enums.hasMoreElements(); ) { + JarEntry entry = enums.nextElement(); + if (entry.getName().endsWith("/")) { + // we don't want to keep directories in map + continue; + } + this._classPathVsZipEntry.put(entry.getName(), entry); + } + } + + /** + * Load plugin class from jar given in pluginToLoad parameter and return instance of class which implements Plugin + * interface. + * This method verifies whether loaded plugin is assignable to expectedInstanceOf class + * @param expectedInstanceOf class instance of interface caller is expecting + * @return Instance of Plugin + * @throws ClassNotFoundException className parameter available in Plugin configuration is not found + */ + @Nonnull + public Plugin instantiatePlugin(@Nonnull Class expectedInstanceOf) throws ClassNotFoundException { + Class clazz = this.loadClass(this._pluginConfig.getClassName(), true); + + try { + log.debug("Creating instance of plugin {}", this._pluginConfig.getClassName()); + Plugin plugin = (Plugin) clazz.newInstance(); + // Check loaded plugin has implemented the proper implementation of child interface + if (!expectedInstanceOf.isAssignableFrom(clazz)) { + throw new InstantiationException( + String.format("In plugin %s, the class %s has not implemented the interface %s", + this._pluginConfig.getName(), plugin.getClass().getCanonicalName(), + expectedInstanceOf.getCanonicalName())); + } + log.debug("Successfully created instance of plugin {}", this._pluginConfig.getClassName()); + return plugin; + } catch (InstantiationException | IllegalAccessException e) { + throw new RuntimeException(String.format("Failed to instantiate the plugin %s", this._pluginConfig.getName()), e); + } + } + + private String classNameToPath(@Nonnull String resourceName) { + // in-case of java class , we need to append the .class to last element + return resourceName.replaceAll("\\.", "/") + ".class"; + } + + private byte[] getClassData(ZipEntry zipEntry) throws ClassNotFoundException { + try (InputStream ins = this._pluginJarRef.getInputStream(zipEntry); + ByteArrayOutputStream baos = new ByteArrayOutputStream()) { + byte[] buffer = new byte[4096]; + int bytesNumRead; + while ((bytesNumRead = ins.read(buffer)) != -1) { + baos.write(buffer, 0, bytesNumRead); + } + return baos.toByteArray(); + } catch (IOException e) { + log.debug("Failed to load the zipEntry {}", zipEntry.getName()); + throw new ClassNotFoundException(); + } + } + + @Override + protected Class loadClass(String s, boolean b) throws ClassNotFoundException { + log.debug("Load class {}", s); + String path = this.classNameToPath(s); + log.debug("File path {}", path); + // Check if requested class is available in plugin jar entries + if (!this._classPathVsZipEntry.containsKey(path)) { + // Try to load using Application class loader + log.debug("Class {} not found in plugin jar, trying application class loader chain", s); + for (ClassLoader classLoader : this._classLoaders) { + try { + log.debug("Looking in ClassLoader {}", classLoader.getClass().getName()); + return classLoader.loadClass(s); + } catch (ClassNotFoundException classNotFoundException) { + // Pass it and let search in next ClassLoader + } + } + log.debug("Class {} not found in application class-loader chain", s); + throw new ClassNotFoundException(); + } + + byte[] classBytes = getClassData(this._classPathVsZipEntry.get(path)); + + ProtectionDomain protectionDomain = + this._pluginPermissionManager.createProtectionDomain(this._pluginConfig.getPluginHomeDirectory()); + return defineClass(s, classBytes, 0, classBytes.length, protectionDomain); + } + + @Override + public URL getResource(String s) { + log.debug("Get resource {}", s); + return this.findResource(s); + } + + @Override + public Enumeration getResources(String s) throws IOException { + URL url = this.getResource(s); + if (url == null) { + log.debug("Returning empty enumeration"); + return Collections.emptyEnumeration(); + } + List urls = new ArrayList<>(1); + urls.add(url); + return Collections.enumeration(urls); + } + + @Override + public InputStream getResourceAsStream(String s) { + log.debug("Resource as stream = {}", s); + try { + URL url = this.findResource(s); + if (url == null) { + return null; + } + return url.openStream(); + } catch (IOException e) { + throw new RuntimeException(String.format("Resource %s not found", s), e); + } + } + + private Optional findResourceInPluginJar(String resource) { + if (this._classPathVsZipEntry.containsKey(resource)) { + StringBuilder builder = new StringBuilder(); + builder.append("jar:file:").append(this._pluginConfig.getPluginJarPath()).append("!/"); + builder.append(resource); + try { + log.debug("Resource {} is found in plugin jar at location {}", resource, builder); + return Optional.of(new URL(builder.toString())); + } catch (MalformedURLException e) { + throw new RuntimeException(String.format("Resource %s not found", resource), e); + } + } + return Optional.empty(); + } + + private Optional findResourceInPluginHome(String resource) { + try { + try (Stream stream = Files.find(this._pluginConfig.getPluginHomeDirectory(), 1, + ((path, basicFileAttributes) -> path.toFile().getName().equals(resource)))) { + List resources = stream.collect(Collectors.toList()); + if (resources.size() > 0) { + log.debug("Number of resources found {}", resources.size()); + log.debug("Resource {} is found in plugin directory", resource); + return Optional.of(resources.get(0).toUri().toURL()); + } + } + } catch (IOException e) { + throw new RuntimeException(String.format("Resource %s not found", resource), e); + } + + return Optional.empty(); + } + + /** + * Look for resource in below order + * - First search in plugin jar if not found + * - then search in plugin directory if not found then return null + * @param resource Resource to find + * @return URL of the resource + */ + @Override + protected URL findResource(String resource) { + Optional optionalURL = Optional.empty(); + String trimResource = StringUtils.strip(resource.trim(), "/"); + + log.debug("Finding resource = {}", trimResource); + + // Look for resource in jar entries + optionalURL = this.findResourceInPluginJar(trimResource); + if (optionalURL.isPresent()) { + return optionalURL.get(); + } + + // Look for resource in PLUGIN_HOME directory + optionalURL = this.findResourceInPluginHome(trimResource); + if (optionalURL.isPresent()) { + return optionalURL.get(); + } + + // Look for resource in application class loader chain + log.debug("Resource not found in plugin = {}", trimResource); + log.debug("Trying application class loader chain"); + for (ClassLoader classLoader : this._classLoaders) { + optionalURL = Optional.ofNullable(classLoader.getResource(trimResource)); + if (optionalURL.isPresent()) { + log.debug("Resource found in ClassLoader = {}", classLoader.getClass().getName()); + break; + } + } + + return optionalURL.orElse( + null); // As per java class-loader, this method should return null if resource is not found + } + + @Override + protected String findLibrary(String s) { + log.debug("Looking for library {}", s); + Path destinationPath = Paths.get(this._executionDirectory.toString(), s); + File file = destinationPath.toFile(); + + // Check if already present + if (file.exists()) { + log.debug("Library found in execution directory"); + return destinationPath.toString(); + } + + // create execution directory if not already created + if (!this._executionDirectory.toFile().exists()) { + if (!this._executionDirectory.toFile().mkdirs()) { + log.warn("Failed to create directory {}", this._executionDirectory); + return null; + } + } + // Look in plugin jar, plugin directory and chain of class loader + URL url = this.findResource(s); + if (url == null) { + log.debug("Library not found"); + return null; + } + + try { + // Write jar entry to destinationPath for class-loading + JarExtractor.write(url, destinationPath); + } catch (IOException e) { + throw new RuntimeException(e); + } + log.debug("Library found in ClassLoader"); + return destinationPath.toString(); + } + + @Override + protected Enumeration findResources(String s) throws IOException { + log.debug("Find resources = {}", s); + URL url = this.findResource(s); + if (url == null) { + log.debug("Returning empty enumeration"); + return Collections.emptyEnumeration(); + } + List urls = new ArrayList<>(1); + urls.add(url); + return Collections.enumeration(urls); + } +} diff --git a/metadata-service/plugin/src/main/java/com/datahub/plugins/loader/JarExtractor.java b/metadata-service/plugin/src/main/java/com/datahub/plugins/loader/JarExtractor.java new file mode 100644 index 0000000000000..a20e9d0760968 --- /dev/null +++ b/metadata-service/plugin/src/main/java/com/datahub/plugins/loader/JarExtractor.java @@ -0,0 +1,33 @@ +package com.datahub.plugins.loader; + +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.net.URL; +import java.nio.file.Path; +import javax.annotation.Nonnull; +import lombok.extern.slf4j.Slf4j; + + +@Slf4j +class JarExtractor { + + private JarExtractor() { + } + + /** + * Write url content to destinationFilePath + * @param url + * @param destinationFilePath + * @throws IOException + */ + public static void write(@Nonnull URL url, @Nonnull Path destinationFilePath) throws IOException { + try (InputStream input = url.openStream()) { + try (FileOutputStream output = new FileOutputStream(destinationFilePath.toFile())) { + while (input.available() > 0) { + output.write(input.read()); + } + } + } + } +} \ No newline at end of file diff --git a/metadata-service/plugin/src/main/java/com/datahub/plugins/loader/PluginPermissionManagerImpl.java b/metadata-service/plugin/src/main/java/com/datahub/plugins/loader/PluginPermissionManagerImpl.java new file mode 100644 index 0000000000000..0596f8abcea74 --- /dev/null +++ b/metadata-service/plugin/src/main/java/com/datahub/plugins/loader/PluginPermissionManagerImpl.java @@ -0,0 +1,42 @@ +package com.datahub.plugins.loader; + +import com.datahub.plugins.common.PluginPermissionManager; +import com.datahub.plugins.common.SecurityMode; +import java.net.MalformedURLException; +import java.net.URL; +import java.nio.file.Path; +import java.security.CodeSource; +import java.security.Permissions; +import java.security.ProtectionDomain; +import java.security.cert.Certificate; +import javax.annotation.Nonnull; + + +public class PluginPermissionManagerImpl implements PluginPermissionManager { + + private final SecurityMode _securityMode; + + public PluginPermissionManagerImpl(@Nonnull SecurityMode securityMode) { + this._securityMode = securityMode; + } + + /** + * Create codeSource instance for the location of pluginHome to apply SecurityMode restriction to the plugin code + * @param pluginHome + * @return ProtectionDomain + */ + @Override + public ProtectionDomain createProtectionDomain(@Nonnull Path pluginHome) { + { + URL url = null; + try { + url = pluginHome.toUri().toURL(); + } catch (MalformedURLException e) { + throw new RuntimeException(e); + } + Permissions permissions = this._securityMode.permissionsSupplier().apply(pluginHome); + CodeSource codeSource = new CodeSource(url, (Certificate[]) null); + return new ProtectionDomain(codeSource, permissions); + } + } +} diff --git a/metadata-service/plugin/src/test/java/com/datahub/plugins/auth/TestConfig.java b/metadata-service/plugin/src/test/java/com/datahub/plugins/auth/TestConfig.java new file mode 100644 index 0000000000000..64c53f1cb6db3 --- /dev/null +++ b/metadata-service/plugin/src/test/java/com/datahub/plugins/auth/TestConfig.java @@ -0,0 +1,31 @@ +package com.datahub.plugins.auth; + +import com.datahub.plugins.common.PluginType; +import com.datahub.plugins.configuration.Config; +import com.datahub.plugins.configuration.PluginConfig; +import java.util.Arrays; +import java.util.List; +import java.util.Map; +import org.testng.annotations.Test; + + +@Test +public class TestConfig { + @Test + public void testConfig() { + PluginConfig authorizerConfig = new PluginConfig(); + authorizerConfig.setName("apache-ranger-authorizer"); + authorizerConfig.setType(PluginType.AUTHORIZER); + authorizerConfig.setParams(Map.of("className", "com.datahub.authorization.ranger.RangerAuthorizer", "configs", + Map.of("username", "foo", "password", "root123"))); + + PluginConfig authenticatorConfig = new PluginConfig(); + authorizerConfig.setName("sample-authenticator"); + authorizerConfig.setType(PluginType.AUTHENTICATOR); + authorizerConfig.setParams(Map.of("className", "com.datahub.plugins.test.TestAuthenticator")); + + List plugins = Arrays.asList(authorizerConfig, authenticatorConfig); + + assert Config.builder().plugins(plugins).build() != null; + } +} diff --git a/metadata-service/plugin/src/test/java/com/datahub/plugins/auth/TestConfigProvider.java b/metadata-service/plugin/src/test/java/com/datahub/plugins/auth/TestConfigProvider.java new file mode 100644 index 0000000000000..e311aae258109 --- /dev/null +++ b/metadata-service/plugin/src/test/java/com/datahub/plugins/auth/TestConfigProvider.java @@ -0,0 +1,72 @@ +package com.datahub.plugins.auth; + +import com.datahub.plugins.common.PluginConfig; +import com.datahub.plugins.common.PluginType; +import com.datahub.plugins.configuration.Config; +import com.datahub.plugins.configuration.ConfigProvider; +import com.datahub.plugins.factory.PluginConfigFactory; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.List; +import java.util.function.Consumer; +import org.testng.annotations.Test; + + +@Test +public class TestConfigProvider { + @Test + public void testConfigurationLoading() throws Exception { + Path pluginBaseDirectory = Paths.get("src", "test", "resources", "valid-base-plugin-dir1"); + ConfigProvider configProvider = new ConfigProvider(pluginBaseDirectory); + Config config = configProvider.load().orElseThrow(() -> new Exception("Should not be empty")); + + assert config != null; + + PluginConfigFactory authenticatorPluginPluginConfigFactory = new PluginConfigFactory(config); + List authenticators = + authenticatorPluginPluginConfigFactory.loadPluginConfigs(PluginType.AUTHENTICATOR); + + List authorizers = authenticatorPluginPluginConfigFactory.loadPluginConfigs(PluginType.AUTHORIZER); + + assert authenticators.size() != 0; + assert authorizers.size() != 0; + + Consumer validateAuthenticationPlugin = (plugin) -> { + assert plugin.getName().equals("apache-ranger-authenticator"); + + assert "com.datahub.ranger.Authenticator".equals(plugin.getClassName()); + + assert plugin.getEnabled(); + + String pluginJarPath = + Paths.get(pluginBaseDirectory.toString(), "apache-ranger-authenticator", "apache-ranger-authenticator.jar") + .toAbsolutePath() + .toString(); + assert pluginJarPath.equals(plugin.getPluginJarPath().toString()); + + String pluginDirectory = Paths.get(pluginBaseDirectory.toString(), plugin.getName()).toAbsolutePath().toString(); + assert pluginDirectory.equals(plugin.getPluginHomeDirectory().toString()); + }; + + Consumer validateAuthorizationPlugin = (plugin) -> { + assert plugin.getName().equals("apache-ranger-authorizer"); + + assert "com.datahub.ranger.Authorizer".equals(plugin.getClassName()); + + assert plugin.getEnabled(); + + assert Paths.get(pluginBaseDirectory.toString(), "apache-ranger-authorizer", "apache-ranger-authorizer.jar") + .toAbsolutePath() + .toString() + .equals(plugin.getPluginJarPath().toString()); + + assert Paths.get(pluginBaseDirectory.toString(), plugin.getName()) + .toAbsolutePath() + .toString() + .equals(plugin.getPluginHomeDirectory().toString()); + }; + + authenticators.forEach(validateAuthenticationPlugin); + authorizers.forEach(validateAuthorizationPlugin); + } +} diff --git a/metadata-service/plugin/src/test/java/com/datahub/plugins/auth/TestConfigValidationUtils.java b/metadata-service/plugin/src/test/java/com/datahub/plugins/auth/TestConfigValidationUtils.java new file mode 100644 index 0000000000000..d85bfc0379d17 --- /dev/null +++ b/metadata-service/plugin/src/test/java/com/datahub/plugins/auth/TestConfigValidationUtils.java @@ -0,0 +1,35 @@ +package com.datahub.plugins.auth; + +import com.datahub.plugins.common.ConfigValidationUtils; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import org.testng.annotations.Test; + + +@Test +public class TestConfigValidationUtils { + + @Test(expectedExceptions = IllegalArgumentException.class) + public void testWhiteSpacesValidation() { + ConfigValidationUtils.whiteSpacesValidation("name", "plugin name with spaces"); + } + + @Test(expectedExceptions = IllegalArgumentException.class) + public void testMapShouldNotBeEmpty() { + ConfigValidationUtils.mapShouldNotBeEmpty("configs", Collections.emptyMap()); + } + + @Test(expectedExceptions = IllegalArgumentException.class) + public void testListShouldNotBeEmpty() { + ConfigValidationUtils.listShouldNotBeEmpty("plugins", Collections.emptyList()); + } + + @Test(expectedExceptions = IllegalArgumentException.class) + public void testListShouldNotHaveDuplicate() { + List list = new ArrayList<>(); + list.add("ranger-authorizer"); + list.add("ranger-authorizer"); + ConfigValidationUtils.listShouldNotHaveDuplicate("plugins", list); + } +} diff --git a/metadata-service/plugin/src/test/java/com/datahub/plugins/auth/TestIsolatedClassLoader.java b/metadata-service/plugin/src/test/java/com/datahub/plugins/auth/TestIsolatedClassLoader.java new file mode 100644 index 0000000000000..314849e8ebea5 --- /dev/null +++ b/metadata-service/plugin/src/test/java/com/datahub/plugins/auth/TestIsolatedClassLoader.java @@ -0,0 +1,208 @@ +package com.datahub.plugins.auth; + +import com.datahub.authentication.Authentication; +import com.datahub.authentication.AuthenticationException; +import com.datahub.authentication.AuthenticationRequest; +import com.datahub.authentication.AuthenticatorContext; +import com.datahub.authorization.AuthorizationRequest; +import com.datahub.authorization.AuthorizerContext; +import com.datahub.plugins.PluginConstant; +import com.datahub.plugins.auth.authentication.Authenticator; +import com.datahub.plugins.auth.authorization.Authorizer; +import com.datahub.plugins.auth.configuration.AuthenticatorPluginConfig; +import com.datahub.plugins.auth.configuration.AuthorizerPluginConfig; +import com.datahub.plugins.common.PluginConfig; +import com.datahub.plugins.common.PluginPermissionManager; +import com.datahub.plugins.common.PluginType; +import com.datahub.plugins.common.SecurityMode; +import com.datahub.plugins.configuration.Config; +import com.datahub.plugins.configuration.ConfigProvider; +import com.datahub.plugins.factory.PluginConfigFactory; +import com.datahub.plugins.loader.IsolatedClassLoader; +import com.datahub.plugins.loader.PluginPermissionManagerImpl; +import com.google.common.collect.ImmutableMap; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import org.testng.annotations.BeforeClass; +import org.testng.annotations.Test; + + +/** + * This test case covers below scenarios + * 1. Loading plugin configuration and validating the loaded configuration against the expected configuration. + * This scenario is covered in @{link com.datahub.plugins.auth.TestIsolatedClassLoader#testConfigurationLoading()} + * test + * + * 2. Plugin name should be unique in config.yaml. The plugin framework should raise error if more than one plugin + * has the same name. + * This scenario is covered in @{link com.datahub.plugins.auth.TestIsolatedClassLoader#testDuplicatePluginName()} + * test + * + * 3. Developer can provide plugin jar file name in config.yaml. + * This scenario is covered in @{link com.datahub.plugins.auth.TestIsolatedClassLoader#testJarFileName()} test + * + * 4. Test @{link com.datahub.plugins.auth.TestIsolatedClassLoader#testAuthenticatorPlugin()} covers the valid + * authenticator plugin execution. + * Plugin used in this test-case is metadata-service/plugin/src/test/sample-test-plugins/src/main/java/com/datahub + * /plugins/test/TestAuthenticator.java + * + * 5. Test @{link com.datahub.plugins.auth.TestIsolatedClassLoader#testAuthorizerPlugin()} covers the valid + * authorizer plugin execution + * Plugin used in this test-case is metadata-service/plugin/src/test/sample-test-plugins/src/main/java/com/datahub + * /plugins/test/TestAuthorizer.java + * + * 6. The plugin framework should raise error if authenticator plugin is configured as authorizer plugin or vice-versa. + * This scenario is covered in @{link com.datahub.plugins.auth.TestIsolatedClassLoader#testIncorrectImplementation + * ()}. + * The test case tries to load authorizer plugin as authenticator plugin + */ +class TestIsolatedClassLoader { + + @BeforeClass + public void setSecurityManager() { + System.setSecurityManager(new SecurityManager()); + } + + @Test + public void testDuplicatePluginName() { + Path yamlConfig = Paths.get("src", "test", "resources", "duplicate-plugin-name"); + try { + (new ConfigProvider(yamlConfig)).load(); + } catch (RuntimeException e) { + assert e.getCause() + .getMessage() + .contains( + "Duplicate entry of apache-ranger-authorizer is found in plugins. plugins should not contain duplicate"); + } + } + + @Test + public void testJarFileName() throws Exception { + Path configPath = Paths.get("src", "test", "resources", "plugin-jar-from-jarFileName"); + + Path authenticatorPluginJarPath = Paths.get(configPath.toAbsolutePath().toString(), "apache-ranger-authenticator", + "apache-ranger-authenticator-v1.0.1.jar"); + Config config = (new ConfigProvider(configPath)).load().orElseThrow(() -> new Exception("Should not be empty")); + List pluginConfig = (new PluginConfigFactory(config)).loadPluginConfigs(PluginType.AUTHENTICATOR); + pluginConfig.forEach((pluginConfigWithJar) -> { + assert pluginConfigWithJar.getPluginJarPath().equals(authenticatorPluginJarPath); + }); + + Path authorizerPluginJarPath = Paths.get(configPath.toAbsolutePath().toString(), "apache-ranger-authorizer", + "apache-ranger-authorizer-v2.0.1.jar"); + List authorizerPluginConfigs = + (new PluginConfigFactory(config)).loadPluginConfigs(PluginType.AUTHORIZER); + + authorizerPluginConfigs.forEach((pluginConfigWithJar) -> { + assert pluginConfigWithJar.getPluginJarPath().equals(authorizerPluginJarPath); + }); + } + + public static Path getSamplePluginDirectory() { + // plugin directory + return Paths.get("src", "test", "resources", "sample-plugins").toAbsolutePath(); + } + + public static Path getSamplePluginJar() { + // plugin jar path + return Paths.get(getSamplePluginDirectory().toString(), "sample-plugins.jar"); + } + + public static Optional> getConfigs() { + // plugin configs + return Optional.of(ImmutableMap.of("key1", "value1", "key2", "value2", "key3", "value3")); + } + + public static AuthorizerPluginConfig getAuthorizerPluginConfig() { + AuthorizerPluginConfig authorizerPluginConfig = new AuthorizerPluginConfig(); + authorizerPluginConfig.setClassName("com.datahub.plugins.test.TestAuthorizer"); + authorizerPluginConfig.setConfigs(getConfigs()); + authorizerPluginConfig.setPluginHomeDirectory(getSamplePluginDirectory()); + authorizerPluginConfig.setPluginJarPath(getSamplePluginJar()); + // plugin name + authorizerPluginConfig.setName("sample-plugin-authorizer"); + + return authorizerPluginConfig; + } + + public static AuthenticatorPluginConfig getAuthenticatorPluginConfig() { + AuthenticatorPluginConfig authenticatorPluginConfig = new AuthenticatorPluginConfig(); + authenticatorPluginConfig.setClassName("com.datahub.plugins.test.TestAuthenticator"); + authenticatorPluginConfig.setConfigs(getConfigs()); + authenticatorPluginConfig.setPluginHomeDirectory(getSamplePluginDirectory()); + authenticatorPluginConfig.setPluginJarPath(getSamplePluginJar()); + // plugin name + authenticatorPluginConfig.setName("sample-plugin-authenticator"); + return authenticatorPluginConfig; + } + + @Test + public void testAuthenticatorPlugin() throws ClassNotFoundException, AuthenticationException { + // authenticator plugin config instance + AuthenticatorPluginConfig authenticatorPluginConfig = getAuthenticatorPluginConfig(); + // create IsolatedClassLoader + PluginPermissionManager permissionManager = new PluginPermissionManagerImpl(SecurityMode.RESTRICTED); + IsolatedClassLoader isolatedClassLoader = new IsolatedClassLoader(permissionManager, authenticatorPluginConfig); + // initiate and invoke the init and authenticate methods + Authenticator authenticator = (Authenticator) isolatedClassLoader.instantiatePlugin(Authenticator.class); + AuthenticatorContext authenticatorContext = new AuthenticatorContext( + ImmutableMap.of(PluginConstant.PLUGIN_HOME, authenticatorPluginConfig.getPluginHomeDirectory().toString())); + AuthenticationRequest request = new AuthenticationRequest(ImmutableMap.of("foo", "bar")); + authenticator.init(authenticatorPluginConfig.getConfigs().orElse(new HashMap<>()), authenticatorContext); + + Authentication authentication = authenticator.authenticate(request); + assert authentication.getActor().getId().equals("fake"); + } + + @Test + public void testAuthorizerPlugin() throws ClassNotFoundException, AuthenticationException { + // authenticator plugin config instance + AuthorizerPluginConfig authorizerPluginConfig = getAuthorizerPluginConfig(); + // create IsolatedClassLoader + PluginPermissionManager permissionManager = new PluginPermissionManagerImpl(SecurityMode.RESTRICTED); + IsolatedClassLoader isolatedClassLoader = new IsolatedClassLoader(permissionManager, authorizerPluginConfig); + // initiate and invoke the init and authenticate methods + Authorizer authorizer = (Authorizer) isolatedClassLoader.instantiatePlugin(Authorizer.class); + AuthorizerContext authorizerContext = new AuthorizerContext( + ImmutableMap.of(PluginConstant.PLUGIN_HOME, authorizerPluginConfig.getPluginHomeDirectory().toString()), null); + AuthorizationRequest authorizationRequest = new AuthorizationRequest("urn:li:user:fake", "test", Optional.empty()); + authorizer.init(authorizerPluginConfig.getConfigs().orElse(new HashMap<>()), authorizerContext); + assert authorizer.authorize(authorizationRequest).getMessage().equals("fake message"); + } + + @Test + public void testIncorrectImplementation() { + AuthorizerPluginConfig authorizerPluginConfig = getAuthorizerPluginConfig(); + // create IsolatedClassLoader + PluginPermissionManager permissionManager = new PluginPermissionManagerImpl(SecurityMode.RESTRICTED); + IsolatedClassLoader isolatedClassLoader = new IsolatedClassLoader(permissionManager, authorizerPluginConfig); + // initiate and invoke the init and authenticate methods + try { + // Authorizer configuration is provided, however here we were expecting that plugin should be of type + // Authenticator.class + Authorizer authorizer = (Authorizer) isolatedClassLoader.instantiatePlugin(Authenticator.class); + assert authorizer != null; + } catch (RuntimeException | ClassNotFoundException e) { + assert e.getCause() instanceof java.lang.InstantiationException; + } + } + + @Test + public void testLenientMode() throws ClassNotFoundException, AuthenticationException { + // authenticator plugin config instance + AuthenticatorPluginConfig authenticatorPluginConfig = getAuthenticatorPluginConfig(); + authenticatorPluginConfig.setClassName("com.datahub.plugins.test.TestLenientModeAuthenticator"); + // create IsolatedClassLoader + PluginPermissionManager permissionManager = new PluginPermissionManagerImpl(SecurityMode.LENIENT); + IsolatedClassLoader isolatedClassLoader = new IsolatedClassLoader(permissionManager, authenticatorPluginConfig); + // initiate and invoke the init and authenticate methods + Authenticator authenticator = (Authenticator) isolatedClassLoader.instantiatePlugin(Authenticator.class); + authenticator.init(authenticatorPluginConfig.getConfigs().orElse(new HashMap<>()), null); + AuthenticationRequest request = new AuthenticationRequest(ImmutableMap.of("foo", "bar")); + assert authenticator.authenticate(request) != null; + } +} diff --git a/metadata-service/plugin/src/test/java/com/datahub/plugins/auth/TestPluginConfigFactory.java b/metadata-service/plugin/src/test/java/com/datahub/plugins/auth/TestPluginConfigFactory.java new file mode 100644 index 0000000000000..1d182f5fa8ea7 --- /dev/null +++ b/metadata-service/plugin/src/test/java/com/datahub/plugins/auth/TestPluginConfigFactory.java @@ -0,0 +1,39 @@ +package com.datahub.plugins.auth; + +import com.datahub.plugins.common.PluginConfig; +import com.datahub.plugins.common.PluginType; +import com.datahub.plugins.configuration.Config; +import com.datahub.plugins.configuration.ConfigProvider; +import com.datahub.plugins.factory.PluginConfigFactory; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.List; +import org.testng.annotations.Test; + + +public class TestPluginConfigFactory { + + @Test + public void authConfig() throws Exception { + Path pluginBaseDirectory = Paths.get("src", "test", "resources", "valid-base-plugin-dir1"); + ConfigProvider configProvider = new ConfigProvider(pluginBaseDirectory); + Config config = configProvider.load().orElseThrow(() -> new Exception("Should not be empty")); + + assert config != null; + + PluginConfigFactory authenticatorPluginConfigFactory = new PluginConfigFactory(config); + + // Load authenticator plugin configuration + List authenticatorConfigs = + authenticatorPluginConfigFactory.loadPluginConfigs(PluginType.AUTHENTICATOR); + authenticatorConfigs.forEach(c -> { + assert c.getClassName().equals("com.datahub.ranger.Authenticator"); // className should match to Authenticator + }); + + // Load authorizer plugin configuration + List authorizerConfigs = authenticatorPluginConfigFactory.loadPluginConfigs(PluginType.AUTHORIZER); + authorizerConfigs.forEach(c -> { + assert c.getClassName().equals("com.datahub.ranger.Authorizer"); // className should match to Authorizer + }); + } +} diff --git a/metadata-service/plugin/src/test/java/com/datahub/plugins/auth/TestPluginPermissionManager.java b/metadata-service/plugin/src/test/java/com/datahub/plugins/auth/TestPluginPermissionManager.java new file mode 100644 index 0000000000000..e6882e7de3120 --- /dev/null +++ b/metadata-service/plugin/src/test/java/com/datahub/plugins/auth/TestPluginPermissionManager.java @@ -0,0 +1,75 @@ +package com.datahub.plugins.auth; + +import com.datahub.plugins.common.SecurityMode; +import com.datahub.plugins.loader.PluginPermissionManagerImpl; +import java.net.MalformedURLException; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.security.Permission; +import java.security.PermissionCollection; +import java.security.ProtectionDomain; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import org.testng.annotations.Test; + + +@Test +public class TestPluginPermissionManager { + @Test + public void testRestrictedMode() throws MalformedURLException { + PluginPermissionManagerImpl pluginPermissionManager = new PluginPermissionManagerImpl(SecurityMode.RESTRICTED); + + Path pluginHome = Paths.get("src", "test", "resources", "valid-base-plugin-dir1", "apache-ranger-authenticator"); + + ProtectionDomain protectionDomain = pluginPermissionManager.createProtectionDomain(pluginHome.toAbsolutePath()); + + // provided pluginHome and codeSource in protection domain should be equal + assert pluginHome.toUri() + .toURL() + .toExternalForm() + .equals(protectionDomain.getCodeSource().getLocation().toExternalForm()); + + PermissionCollection permissionCollection = protectionDomain.getPermissions(); + List permissions = Collections.list(permissionCollection.elements()); + // It should have 4 permissions + assert permissions.size() == 4; + + Map map = new HashMap<>(); // expected permissions + map.put("*:1024-", "connect,resolve"); + map.put("*:80", "connect,resolve"); + map.put("*:443", "connect,resolve"); + map.put(pluginHome.toAbsolutePath() + "/*", "read,write,delete"); + + // Compare actual with expected + permissions.forEach(permission -> { + assert map.keySet().contains(permission.getName()); + assert map.values().contains(permission.getActions()); + }); + } + + public void testLenientMode() throws MalformedURLException { + PluginPermissionManagerImpl pluginPermissionManager = new PluginPermissionManagerImpl(SecurityMode.LENIENT); + + Path pluginHome = Paths.get("src", "test", "resources", "valid-base-plugin-dir1", "apache-ranger-authenticator"); + + ProtectionDomain protectionDomain = pluginPermissionManager.createProtectionDomain(pluginHome.toAbsolutePath()); + + // provided pluginHome and codeSource in protection domain should be equal + assert pluginHome.toUri() + .toURL() + .toExternalForm() + .equals(protectionDomain.getCodeSource().getLocation().toExternalForm()); + + PermissionCollection permissionCollection = protectionDomain.getPermissions(); + List permissions = Collections.list(permissionCollection.elements()); + + // It should have 1 permission + assert permissions.size() == 1; + + permissions.forEach(permission -> { + assert permission.getName().equals(""); + }); + } +} diff --git a/metadata-service/plugin/src/test/resources/duplicate-plugin-name/apache-ranger-authenticator/apache-ranger-authenticator.jar b/metadata-service/plugin/src/test/resources/duplicate-plugin-name/apache-ranger-authenticator/apache-ranger-authenticator.jar new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/metadata-service/plugin/src/test/resources/duplicate-plugin-name/apache-ranger-authorizer/apache-ranger-authorizer.jar b/metadata-service/plugin/src/test/resources/duplicate-plugin-name/apache-ranger-authorizer/apache-ranger-authorizer.jar new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/metadata-service/plugin/src/test/resources/duplicate-plugin-name/config.yml b/metadata-service/plugin/src/test/resources/duplicate-plugin-name/config.yml new file mode 100644 index 0000000000000..8c47a0c17e2e2 --- /dev/null +++ b/metadata-service/plugin/src/test/resources/duplicate-plugin-name/config.yml @@ -0,0 +1,18 @@ +plugins: + - name: "apache-ranger-authorizer" + type: "authorizer" + enabled: "true" + params: + className: "com.datahub.authorization.ranger.RangerAuthorizer" + configs: + username: "foo" + password: "bar" + + - name: "apache-ranger-authorizer" # duplicate name + type: "authorizer" + enabled: "true" + params: + className: "com.datahub.authorization.ranger.RangerAuthorizer" + configs: + username: "foo" + password: "bar" \ No newline at end of file diff --git a/metadata-service/plugin/src/test/resources/plugin-jar-from-jarFileName/apache-ranger-authenticator/apache-ranger-authenticator-v1.0.1.jar b/metadata-service/plugin/src/test/resources/plugin-jar-from-jarFileName/apache-ranger-authenticator/apache-ranger-authenticator-v1.0.1.jar new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/metadata-service/plugin/src/test/resources/plugin-jar-from-jarFileName/apache-ranger-authorizer/apache-ranger-authorizer-v2.0.1.jar b/metadata-service/plugin/src/test/resources/plugin-jar-from-jarFileName/apache-ranger-authorizer/apache-ranger-authorizer-v2.0.1.jar new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/metadata-service/plugin/src/test/resources/plugin-jar-from-jarFileName/config.yml b/metadata-service/plugin/src/test/resources/plugin-jar-from-jarFileName/config.yml new file mode 100644 index 0000000000000..a1e995b7747c0 --- /dev/null +++ b/metadata-service/plugin/src/test/resources/plugin-jar-from-jarFileName/config.yml @@ -0,0 +1,20 @@ +plugins: + - name: "apache-ranger-authorizer" + type: "authorizer" + enabled: "true" + params: + className: "com.datahub.authorization.ranger.RangerAuthorizer" + jarFileName: "apache-ranger-authorizer-v2.0.1.jar" + configs: + username: "foo" + password: "bar" + + - name: "apache-ranger-authenticator" + type: "authenticator" + enabled: "true" + params: + className: "com.datahub.authorization.ranger.RangerAuthorizer" + jarFileName: "apache-ranger-authenticator-v1.0.1.jar" + configs: + username: "foo" + password: "bar" \ No newline at end of file diff --git a/metadata-service/plugin/src/test/resources/valid-base-plugin-dir1/apache-ranger-authenticator/apache-ranger-authenticator.jar b/metadata-service/plugin/src/test/resources/valid-base-plugin-dir1/apache-ranger-authenticator/apache-ranger-authenticator.jar new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/metadata-service/plugin/src/test/resources/valid-base-plugin-dir1/apache-ranger-authorizer/apache-ranger-authorizer.jar b/metadata-service/plugin/src/test/resources/valid-base-plugin-dir1/apache-ranger-authorizer/apache-ranger-authorizer.jar new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/metadata-service/plugin/src/test/resources/valid-base-plugin-dir1/config.yml b/metadata-service/plugin/src/test/resources/valid-base-plugin-dir1/config.yml new file mode 100644 index 0000000000000..389b349f1a487 --- /dev/null +++ b/metadata-service/plugin/src/test/resources/valid-base-plugin-dir1/config.yml @@ -0,0 +1,19 @@ +plugins: + - name: "apache-ranger-authorizer" + type: "authorizer" + enabled: "true" + params: + className: "com.datahub.ranger.Authorizer" + configs: + key1: "value1" + key2: "value2" + key3: "value3" + - name: "apache-ranger-authenticator" + type: "authenticator" + enabled: "true" + params: + className: "com.datahub.ranger.Authenticator" + configs: + key1: "value1" + key2: "value2" + key3: "value3" diff --git a/metadata-service/plugin/src/test/sample-test-plugins/build.gradle b/metadata-service/plugin/src/test/sample-test-plugins/build.gradle new file mode 100644 index 0000000000000..7d4b43402a586 --- /dev/null +++ b/metadata-service/plugin/src/test/sample-test-plugins/build.gradle @@ -0,0 +1,25 @@ +apply plugin: 'java' + +jar { + archiveFileName = "sample-plugins.jar" +} +dependencies { + implementation project(path: ':metadata-auth:auth-api') + implementation externalDependency.lombok + implementation externalDependency.logbackClassic; + + testImplementation 'org.junit.jupiter:junit-jupiter-api:5.8.1' + testRuntimeOnly 'org.junit.jupiter:junit-jupiter-engine:5.8.1' + + annotationProcessor externalDependency.lombok +} + +task copyJar(type: Copy) { + from jar // here it automatically reads jar file produced from jar task + into '../resources/sample-plugins/' +} + +clean { + delete 'buildDir', '../resources/sample-plugins' +} +build.dependsOn copyJar \ No newline at end of file diff --git a/metadata-service/plugin/src/test/sample-test-plugins/src/main/java/com/datahub/plugins/test/TestAuthenticator.java b/metadata-service/plugin/src/test/sample-test-plugins/src/main/java/com/datahub/plugins/test/TestAuthenticator.java new file mode 100644 index 0000000000000..4fb958de2edd6 --- /dev/null +++ b/metadata-service/plugin/src/test/sample-test-plugins/src/main/java/com/datahub/plugins/test/TestAuthenticator.java @@ -0,0 +1,132 @@ +package com.datahub.plugins.test; + +import com.datahub.authentication.Actor; +import com.datahub.authentication.ActorType; +import com.datahub.authentication.Authentication; +import com.datahub.authentication.AuthenticationException; +import com.datahub.authentication.AuthenticationRequest; +import com.datahub.authentication.AuthenticatorContext; +import com.datahub.plugins.PluginConstant; +import com.datahub.plugins.auth.authentication.Authenticator; +import java.io.BufferedReader; +import java.io.BufferedWriter; +import java.io.FileWriter; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.net.Socket; +import java.net.URL; +import java.net.UnknownHostException; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.security.AccessControlException; +import java.util.Map; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; +import lombok.extern.slf4j.Slf4j; + + +@Slf4j +public class TestAuthenticator implements Authenticator { + private AuthenticatorContext _authenticatorContext; + + @Override + public void init(@Nonnull Map authenticatorConfig, @Nullable AuthenticatorContext context) { + /* + * authenticatorConfig contains key, value pairs set in plugins[].params.configs of config.yml + */ + this._authenticatorContext = context; + assert authenticatorConfig.containsKey("key1"); + assert authenticatorConfig.containsKey("key2"); + assert authenticatorConfig.containsKey("key3"); + assert authenticatorConfig.get("key1").equals("value1"); + assert authenticatorConfig.get("key2").equals("value2"); + assert authenticatorConfig.get("key3").equals("value3"); + + log.info("Init succeed"); + } + + private void readInputStream() { + // Test resource as stream is working + try (InputStream inputStream = this.getClass().getClassLoader().getResourceAsStream("foo_bar.json")) { + assert inputStream != null; + BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream)); + assert reader.readLine() != null; + log.info("authenticate succeed"); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + private void accessFile() { + // Try to create a file on PLUGIN_DIRECTORY to test plugin should have permission to read/write on plugin directory + Path pluginDirectory = + Paths.get((String) this._authenticatorContext.data().get(PluginConstant.PLUGIN_HOME), "tmp_file1.txt"); + try { + + try (BufferedWriter writer = new BufferedWriter(new FileWriter(pluginDirectory.toString()))) { + writer.write("Happy writing"); + } + + if (!pluginDirectory.toFile().delete()) { + throw new IOException("Not able to delete file"); + } + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + public void accessSystemProperty() { + try { + System.getProperty("user.home"); + throw new RuntimeException("Plugin is able to access system properties"); // we should not reach here + } catch (AccessControlException accessControlException) { + log.info("Expected: Don't have permission to read system properties"); + } + } + + public void accessSocket() { + try { + URL url = new URL("https://github.com"); + try (InputStream input = url.openStream()) { + assert input != null; + } + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + public void accessLowerSocket() { + try { + new Socket("localhost", 50); + throw new RuntimeException("Plugin is able to access lower port"); + } catch (AccessControlException e) { + log.info("Expected: Don't have permission to open socket on lower port"); + } catch (UnknownHostException e) { + throw new RuntimeException(e); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + @Nullable + @Override + public Authentication authenticate(@Nonnull AuthenticationRequest authenticationRequest) + throws AuthenticationException { + // Call some resource related API to test IsolatedClassLoader + URL url = this.getClass().getClassLoader().getResource("foo_bar.json"); + assert url != null; + // Test IsolatedClassLoader stream access + this.readInputStream(); + // We should have permission to write and delete file from plugin directory + this.accessFile(); + // We should not have access to System properties + this.accessSystemProperty(); + // We should be able to open socket + this.accessSocket(); + // We should not be able to access lower socket + this.accessLowerSocket(); + + return new Authentication(new Actor(ActorType.USER, "fake"), "foo:bar"); + } +} diff --git a/metadata-service/plugin/src/test/sample-test-plugins/src/main/java/com/datahub/plugins/test/TestAuthorizer.java b/metadata-service/plugin/src/test/sample-test-plugins/src/main/java/com/datahub/plugins/test/TestAuthorizer.java new file mode 100644 index 0000000000000..b6bc282f10b65 --- /dev/null +++ b/metadata-service/plugin/src/test/sample-test-plugins/src/main/java/com/datahub/plugins/test/TestAuthorizer.java @@ -0,0 +1,81 @@ +package com.datahub.plugins.test; + +import com.datahub.authorization.AuthorizationRequest; +import com.datahub.authorization.AuthorizationResult; +import com.datahub.authorization.AuthorizedActors; +import com.datahub.authorization.AuthorizerContext; +import com.datahub.authorization.ResourceSpec; +import com.datahub.plugins.PluginConstant; +import com.datahub.plugins.auth.authorization.Authorizer; +import java.io.BufferedReader; +import java.io.BufferedWriter; +import java.io.FileWriter; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.net.URL; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.Map; +import java.util.Optional; +import javax.annotation.Nonnull; +import lombok.extern.slf4j.Slf4j; + + +@Slf4j +public class TestAuthorizer implements Authorizer { + private AuthorizerContext _authorizerContext; + + @Override + public void init(@Nonnull Map authorizerConfig, @Nonnull AuthorizerContext ctx) { + this._authorizerContext = ctx; + assert authorizerConfig.containsKey("key1"); + assert authorizerConfig.containsKey("key2"); + assert authorizerConfig.containsKey("key3"); + assert authorizerConfig.get("key1").equals("value1"); + assert authorizerConfig.get("key2").equals("value2"); + assert authorizerConfig.get("key3").equals("value3"); + + log.info("Init succeed"); + } + + @Override + public AuthorizationResult authorize(@Nonnull AuthorizationRequest request) { + // Call some resource related API to test IsolatedClassLoader + URL url = this.getClass().getClassLoader().getResource("foo_bar.json"); + assert url != null; + + // Try to create a file on PLUGIN_DIRECTORY to test plugin should have permission to read/write on plugin directory + Path pluginDirectory = + Paths.get((String) this._authorizerContext.data().get(PluginConstant.PLUGIN_HOME), "tmp_file1.txt"); + try { + + try (BufferedWriter writer = new BufferedWriter(new FileWriter(pluginDirectory.toString()))) { + writer.write("Happy writing"); + } + + if (!pluginDirectory.toFile().delete()) { + throw new IOException("Not able to delete file"); + } + } catch (IOException e) { + throw new RuntimeException(e); + } + + // Test resource as stream is working + try (InputStream inputStream = this.getClass().getClassLoader().getResourceAsStream("foo_bar.json")) { + assert inputStream != null; + BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream)); + assert reader.readLine() != null; + log.info("authorizer succeed"); + } catch (IOException e) { + throw new RuntimeException(e); + } + return new AuthorizationResult(request, AuthorizationResult.Type.ALLOW, "fake message"); + } + + @Override + public AuthorizedActors authorizedActors(String privilege, Optional resourceSpec) { + return new AuthorizedActors("ALL", null, null, true, true); + } +} + diff --git a/metadata-service/plugin/src/test/sample-test-plugins/src/main/java/com/datahub/plugins/test/TestLenientModeAuthenticator.java b/metadata-service/plugin/src/test/sample-test-plugins/src/main/java/com/datahub/plugins/test/TestLenientModeAuthenticator.java new file mode 100644 index 0000000000000..2cc27f11a6254 --- /dev/null +++ b/metadata-service/plugin/src/test/sample-test-plugins/src/main/java/com/datahub/plugins/test/TestLenientModeAuthenticator.java @@ -0,0 +1,30 @@ +package com.datahub.plugins.test; + +import com.datahub.authentication.Actor; +import com.datahub.authentication.ActorType; +import com.datahub.authentication.Authentication; +import com.datahub.authentication.AuthenticationException; +import com.datahub.authentication.AuthenticationRequest; +import com.datahub.authentication.AuthenticatorContext; +import com.datahub.plugins.auth.authentication.Authenticator; +import java.util.Map; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; + + +public class TestLenientModeAuthenticator implements Authenticator { + @Override + public void init(@Nonnull Map authenticatorConfig, @Nullable AuthenticatorContext context) { + + } + + @Nullable + @Override + public Authentication authenticate(@Nonnull AuthenticationRequest authenticationRequest) + throws AuthenticationException { + // We should be able to access user directory as we are going to be loaded with Lenient mode IsolatedClassLoader + String userHome = System.getProperty("user.home"); + assert userHome != null; + return new Authentication(new Actor(ActorType.USER, "fake"), "foo:bar"); + } +} diff --git a/metadata-service/plugin/src/test/sample-test-plugins/src/main/resources/foo_bar.json b/metadata-service/plugin/src/test/sample-test-plugins/src/main/resources/foo_bar.json new file mode 100644 index 0000000000000..5240a10dc4bee --- /dev/null +++ b/metadata-service/plugin/src/test/sample-test-plugins/src/main/resources/foo_bar.json @@ -0,0 +1,3 @@ +{ + "message": "Happy Reading" +} \ No newline at end of file diff --git a/metadata-service/restli-client/build.gradle b/metadata-service/restli-client/build.gradle index 7cc643a626c06..f1f33542466dd 100644 --- a/metadata-service/restli-client/build.gradle +++ b/metadata-service/restli-client/build.gradle @@ -2,7 +2,7 @@ apply plugin: 'pegasus' dependencies { compile project(':metadata-service:restli-api') - compile project(':metadata-service:auth-api') + compile project(':metadata-auth:auth-api') compile project(path: ':metadata-service:restli-api', configuration: 'restClient') compile project(':metadata-events:mxe-schemas') compile project(':metadata-utils') diff --git a/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/RestliEntityClient.java b/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/RestliEntityClient.java index b767a380ed063..f1e5352c9da98 100644 --- a/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/RestliEntityClient.java +++ b/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/RestliEntityClient.java @@ -157,7 +157,7 @@ public Map batchGet(@Nonnull final Set urns, @Nonnull final Au } /** - * Batch get a set of aspects for a single entity. + * Batch get a set of aspects for multiple entities. * * @param entityName the entity type to fetch * @param urns the urns of the entities to batch get diff --git a/metadata-service/restli-servlet-impl/build.gradle b/metadata-service/restli-servlet-impl/build.gradle index 08d3a1c2e3092..e1768c18d0d48 100644 --- a/metadata-service/restli-servlet-impl/build.gradle +++ b/metadata-service/restli-servlet-impl/build.gradle @@ -33,7 +33,7 @@ dependencies { } compile project(':metadata-service:restli-api') - compile project(':metadata-service:auth-api') + compile project(':metadata-auth:auth-api') compile project(path: ':metadata-service:restli-api', configuration: 'dataTemplate') compile project(':li-utils') compile project(':metadata-models') diff --git a/metadata-service/war/src/main/resources/security.policy b/metadata-service/war/src/main/resources/security.policy new file mode 100644 index 0000000000000..ad439d2acaf81 --- /dev/null +++ b/metadata-service/war/src/main/resources/security.policy @@ -0,0 +1,21 @@ +// Allow DataHub GMS runtime all access +grant codeBase "file:${datahub.gms.home}/-" { + permission java.security.AllPermission; + permission java.io.FilePermission "/tmp/jetty-0_0_0_0-8080-war_war-_-any-11061940556061128854/webapp/WEB-INF/lib/auth-impl.jar", "read"; +}; + +// Allow all access for Jetty as Jetty at runtime extract and loads the jar from tmp directory +grant codeBase "file:/tmp/-" { + permission java.security.AllPermission; + permission java.io.FilePermission "/tmp/jetty-0_0_0_0-8080-war_war-_-any-11061940556061128854/webapp/WEB-INF/lib/auth-impl.jar", "read"; +}; + +// Allow gradle build to work as normal +grant codeBase "file:${datahub.project.root.dir}/-" { + permission java.security.AllPermission; +}; + +// Allow gradle to work as normal +grant codeBase "file:${user.home}/.gradle/-" { + permission java.security.AllPermission; +}; diff --git a/metadata-service/war/src/main/webapp/WEB-INF/authServlet-servlet.xml b/metadata-service/war/src/main/webapp/WEB-INF/authServlet-servlet.xml index 18dadce9a4814..320cba06826a4 100644 --- a/metadata-service/war/src/main/webapp/WEB-INF/authServlet-servlet.xml +++ b/metadata-service/war/src/main/webapp/WEB-INF/authServlet-servlet.xml @@ -2,5 +2,5 @@ - + \ No newline at end of file diff --git a/metadata-service/war/src/main/webapp/WEB-INF/beans.xml b/metadata-service/war/src/main/webapp/WEB-INF/beans.xml index f4d7adc4fc11c..bb2a833253d73 100644 --- a/metadata-service/war/src/main/webapp/WEB-INF/beans.xml +++ b/metadata-service/war/src/main/webapp/WEB-INF/beans.xml @@ -10,7 +10,7 @@ http://www.springframework.org/schema/task/spring-task-3.0.xsd" > - + diff --git a/metadata-service/war/src/main/webapp/WEB-INF/web.xml b/metadata-service/war/src/main/webapp/WEB-INF/web.xml index 9275ca87f22f3..66a579d5be0f0 100644 --- a/metadata-service/war/src/main/webapp/WEB-INF/web.xml +++ b/metadata-service/war/src/main/webapp/WEB-INF/web.xml @@ -97,7 +97,7 @@ AuthenticationFilter - com.datahub.authentication.filter.AuthenticationFilter + com.datahub.auth.authentication.filter.AuthenticationFilter true diff --git a/settings.gradle b/settings.gradle index 3237b255bd2bc..70737b1e08c8b 100644 --- a/settings.gradle +++ b/settings.gradle @@ -13,7 +13,8 @@ include 'metadata-service:restli-client' include 'metadata-service:restli-servlet-impl' include 'metadata-service:graphql-servlet-impl' include 'metadata-service:openapi-servlet' -include 'metadata-service:auth-ranger-impl' +include 'metadata-service:plugin' +include 'metadata-service:plugin:src:test:sample-test-plugins' include 'metadata-dao-impl:kafka-producer' include 'metadata-events:mxe-avro-1.7' include 'metadata-events:mxe-registration' @@ -43,6 +44,6 @@ include 'metadata-integration:java:spark-lineage' include 'metadata-integration:java:datahub-client' include 'metadata-integration:java:datahub-protobuf' include 'ingestion-scheduler' -include 'datahub-ranger-plugin' include 'metadata-ingestion-modules:airflow-plugin' include 'smoke-test' +include 'metadata-auth:auth-api' diff --git a/smoke-test/tests/cypress/cypress/integration/mutations/domains.js b/smoke-test/tests/cypress/cypress/integration/mutations/domains.js index 2a91875e1aa9a..cb80d83898b55 100644 --- a/smoke-test/tests/cypress/cypress/integration/mutations/domains.js +++ b/smoke-test/tests/cypress/cypress/integration/mutations/domains.js @@ -1,4 +1,5 @@ const test_domain = "CypressDomainTest"; +let domain_created_urn = "" describe("add remove domain", () => { it("create domain", () => { @@ -14,12 +15,55 @@ describe("add remove domain", () => { .parents("[data-testid^='urn:li:domain:']") .invoke('attr', 'data-testid') .then((data_test_id) => { - cy.log(data_test_id) + domain_created_urn = data_test_id; }) }) - // add asset to domain - // Search filter by domain - // Remove entity from domain - // Delete a domain - ensure that the dangling reference is deleted on the asset + it("add entities to domain", () => { + cy.login(); + cy.goToDomainList(); + cy.clickOptionWithText(test_domain); + cy.waitTextVisible("Add assets") + cy.clickOptionWithText("Add assets") + cy.get(".ant-modal-content").within(() => { + cy.get('[data-testid="search-input"]').click().type("jaffle") + cy.waitTextVisible("jaffle_shop") + cy.get(".ant-checkbox-input").first().click() + cy.get("#continueButton").click() + }) + cy.waitTextVisible("Added assets to Domain!") + }) + + it("search filter by domain", () => { + cy.login(); + cy.goToStarSearchList() + cy.waitTextVisible(test_domain) + cy.get('[data-testid="facet-domains-' + domain_created_urn + '"]').click() + cy.waitTextVisible("jaffle_shop") + }) + + it("remove entity from domain", () => { + cy.login(); + cy.goToDomainList(); + cy.removeDomainFromDataset( + "urn:li:dataset:(urn:li:dataPlatform:bigquery,cypress_project.jaffle_shop.customers,PROD)", + "customers", + domain_created_urn + ) + }) + + it("delete a domain and ensure dangling reference is deleted on entities", () => { + cy.login(); + cy.goToDomainList(); + cy.get('[data-testid="' + domain_created_urn + '"]').within(() => { + cy.get(".ant-dropdown-trigger").click(); + }); + cy.clickOptionWithText("Delete"); + cy.clickOptionWithText("Yes"); + cy.ensureTextNotPresent(test_domain) + + cy.goToContainer("urn:li:container:348c96555971d3f5c1ffd7dd2e7446cb") + cy.waitTextVisible("jaffle_shop") + cy.ensureTextNotPresent(test_domain) + }) }); \ No newline at end of file diff --git a/smoke-test/tests/cypress/cypress/integration/mutations/mutations.js b/smoke-test/tests/cypress/cypress/integration/mutations/mutations.js index 37c1db3edcf8d..58d99741a287d 100644 --- a/smoke-test/tests/cypress/cypress/integration/mutations/mutations.js +++ b/smoke-test/tests/cypress/cypress/integration/mutations/mutations.js @@ -2,7 +2,7 @@ describe("mutations", () => { before(() => { // warm up elastic by issuing a `*` search cy.login(); - cy.visit("/search?query=%2A"); + cy.goToStarSearchList(); cy.wait(5000); }); diff --git a/smoke-test/tests/cypress/cypress/support/commands.js b/smoke-test/tests/cypress/cypress/support/commands.js index 38f7a5231f908..c9174825bff84 100644 --- a/smoke-test/tests/cypress/cypress/support/commands.js +++ b/smoke-test/tests/cypress/cypress/support/commands.js @@ -83,6 +83,12 @@ Cypress.Commands.add("goToUserList", () => { cy.waitTextVisible("Manage Users & Groups"); }) +Cypress.Commands.add("goToStarSearchList", () => { + cy.visit("/search?query=%2A") + cy.waitTextVisible("Showing") + cy.waitTextVisible("results") +}) + Cypress.Commands.add("openThreeDotDropdown", () => { cy.get('[data-testid="entity-header-dropdown"]').click(); }); @@ -143,6 +149,12 @@ Cypress.Commands.add('addTermToDataset', (urn, dataset_name, term) => { cy.contains(term); }); +Cypress.Commands.add("removeDomainFromDataset", (urn, dataset_name, domain_urn) => { + cy.goToDataset(urn, dataset_name); + cy.get('.sidebar-domain-section [href="/domain/' + domain_urn + '"] .anticon-close').click(); + cy.clickOptionWithText("Yes"); +}) + Cypress.Commands.add("openEntityTab", (tab) => { const selector = 'div[id$="' + tab + '"]:nth-child(1)' cy.highlighElement(selector); diff --git a/smoke-test/tests/cypress/integration_test.py b/smoke-test/tests/cypress/integration_test.py index 898e2a1baba4f..c1f229f7580ed 100644 --- a/smoke-test/tests/cypress/integration_test.py +++ b/smoke-test/tests/cypress/integration_test.py @@ -1,16 +1,117 @@ +from typing import List + import pytest import subprocess import os -from tests.utils import ingest_file_via_rest -from tests.utils import delete_urns_from_file +from tests.utils import ( + create_datahub_step_state_aspects, + get_admin_username, + ingest_file_via_rest, + delete_urns_from_file, +) + +CYPRESS_TEST_DATA_DIR = "tests/cypress" + +TEST_DATA_FILENAME = "data.json" +TEST_DBT_DATA_FILENAME = "cypress_dbt_data.json" +TEST_SCHEMA_BLAME_DATA_FILENAME = "schema-blame-data.json" +TEST_ONBOARDING_DATA_FILENAME: str = "onboarding.json" + +HOME_PAGE_ONBOARDING_IDS: List[str] = [ + "global-welcome-to-datahub", + "home-page-ingestion", + "home-page-domains", + "home-page-platforms", + "home-page-most-popular", + "home-page-search-bar", +] + +SEARCH_ONBOARDING_IDS: List[str] = [ + "search-results-filters", + "search-results-advanced-search", +] + +ENTITY_PROFILE_ONBOARDING_IDS: List[str] = [ + "entity-profile-entities", + "entity-profile-properties", + "entity-profile-documentation", + "entity-profile-lineage", + "entity-profile-schema", + "entity-profile-owners", + "entity-profile-tags", + "entity-profile-glossary-terms", + "entity-profile-domains", +] + +INGESTION_ONBOARDING_IDS: List[str] = [ + "ingestion-create-source", + "ingestion-refresh-sources", +] + +BUSINESS_GLOSSARY_ONBOARDING_IDS: List[str] = [ + "business-glossary-intro", + "business-glossary-create-term", + "business-glossary-create-term-group", +] + +DOMAINS_ONBOARDING_IDS: List[str] = [ + "domains-intro", + "domains-create-domain", +] + +USERS_ONBOARDING_IDS: List[str] = [ + "users-intro", + "users-sso", + "users-invite-link", + "users-assign-role", +] + +GROUPS_ONBOARDING_IDS: List[str] = [ + "groups-intro", + "groups-create-group", +] + +ROLES_ONBOARDING_IDS: List[str] = [ + "roles-intro", +] + +POLICIES_ONBOARDING_IDS: List[str] = [ + "policies-intro", + "policies-create-policy", +] + +ONBOARDING_ID_LISTS: List[List[str]] = [ + HOME_PAGE_ONBOARDING_IDS, + SEARCH_ONBOARDING_IDS, + ENTITY_PROFILE_ONBOARDING_IDS, + INGESTION_ONBOARDING_IDS, + BUSINESS_GLOSSARY_ONBOARDING_IDS, + DOMAINS_ONBOARDING_IDS, + USERS_ONBOARDING_IDS, + GROUPS_ONBOARDING_IDS, + ROLES_ONBOARDING_IDS, + POLICIES_ONBOARDING_IDS, +] + +ONBOARDING_IDS: List[str] = [] +for id_list in ONBOARDING_ID_LISTS: + ONBOARDING_IDS.extend(id_list) def ingest_data(): + print("creating onboarding data file") + create_datahub_step_state_aspects( + get_admin_username(), + ONBOARDING_IDS, + f"{CYPRESS_TEST_DATA_DIR}/{TEST_ONBOARDING_DATA_FILENAME}", + ) + print("ingesting test data") - ingest_file_via_rest("tests/cypress/data.json") - ingest_file_via_rest("tests/cypress/cypress_dbt_data.json") - ingest_file_via_rest("tests/cypress/schema-blame-data.json") + ingest_file_via_rest(f"{CYPRESS_TEST_DATA_DIR}/{TEST_DATA_FILENAME}") + ingest_file_via_rest(f"{CYPRESS_TEST_DATA_DIR}/{TEST_DBT_DATA_FILENAME}") + ingest_file_via_rest(f"{CYPRESS_TEST_DATA_DIR}/{TEST_SCHEMA_BLAME_DATA_FILENAME}") + ingest_file_via_rest(f"{CYPRESS_TEST_DATA_DIR}/{TEST_ONBOARDING_DATA_FILENAME}") @pytest.fixture(scope="module", autouse=True) @@ -18,29 +119,40 @@ def ingest_cleanup_data(): ingest_data() yield print("removing test data") - delete_urns_from_file("tests/cypress/data.json") - delete_urns_from_file("tests/cypress/cypress_dbt_data.json") - delete_urns_from_file("tests/cypress/schema-blame-data.json") + delete_urns_from_file(f"{CYPRESS_TEST_DATA_DIR}/{TEST_DATA_FILENAME}") + delete_urns_from_file(f"{CYPRESS_TEST_DATA_DIR}/{TEST_DBT_DATA_FILENAME}") + delete_urns_from_file(f"{CYPRESS_TEST_DATA_DIR}/{TEST_SCHEMA_BLAME_DATA_FILENAME}") + delete_urns_from_file(f"{CYPRESS_TEST_DATA_DIR}/{TEST_ONBOARDING_DATA_FILENAME}") + + print("deleting onboarding data file") + if os.path.exists(f"{CYPRESS_TEST_DATA_DIR}/{TEST_ONBOARDING_DATA_FILENAME}"): + os.remove(f"{CYPRESS_TEST_DATA_DIR}/{TEST_ONBOARDING_DATA_FILENAME}") def test_run_cypress(frontend_session, wait_for_healthchecks): # Run with --record option only if CYPRESS_RECORD_KEY is non-empty record_key = os.getenv("CYPRESS_RECORD_KEY") if record_key: - print('Running Cypress tests with recording') + print("Running Cypress tests with recording") command = "NO_COLOR=1 npx cypress run --record" else: - print('Running Cypress tests without recording') + print("Running Cypress tests without recording") # command = "NO_COLOR=1 npx cypress --version" command = "NO_COLOR=1 npx cypress run" # Add --headed --spec '**/mutations/mutations.js' (change spec name) # in case you want to see the browser for debugging - proc = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd="tests/cypress") + proc = subprocess.Popen( + command, + shell=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + cwd=f"{CYPRESS_TEST_DATA_DIR}", + ) stdout = proc.stdout.read() stderr = proc.stderr.read() return_code = proc.wait() print(stdout.decode("utf-8")) - print('stderr output:') + print("stderr output:") print(stderr.decode("utf-8")) - print('return code', return_code) - assert(return_code == 0) + print("return code", return_code) + assert return_code == 0 diff --git a/smoke-test/tests/utils.py b/smoke-test/tests/utils.py index 52884c30b72aa..1715756799a78 100644 --- a/smoke-test/tests/utils.py +++ b/smoke-test/tests/utils.py @@ -1,7 +1,7 @@ import json import os from datetime import datetime, timedelta -from typing import Tuple +from typing import Any, Dict, List, Tuple from time import sleep import requests_wrapper as requests @@ -10,6 +10,8 @@ from datahub.cli.docker_cli import check_local_docker_containers from datahub.ingestion.run.pipeline import Pipeline +TIME: int = 1581407189000 + def get_frontend_session(): session = requests.Session() @@ -25,6 +27,10 @@ def get_frontend_session(): return session +def get_admin_username() -> str: + return os.getenv("ADMIN_USERNAME", "datahub") + + def get_admin_credentials(): return ( os.getenv("ADMIN_USERNAME", "datahub"), @@ -163,3 +169,36 @@ def get_timestampmillis_at_start_of_day(relative_day_num: int) -> int: def get_strftime_from_timestamp_millis(ts_millis: int) -> str: return datetime.fromtimestamp(ts_millis / 1000).strftime("%Y-%m-%d %H:%M:%S") + + +def create_datahub_step_state_aspect( + username: str, onboarding_id: str +) -> Dict[str, Any]: + entity_urn = f"urn:li:dataHubStepState:urn:li:corpuser:{username}-{onboarding_id}" + print(f"Creating dataHubStepState aspect for {entity_urn}") + return { + "auditHeader": None, + "entityType": "dataHubStepState", + "entityUrn": entity_urn, + "changeType": "UPSERT", + "aspectName": "dataHubStepStateProperties", + "aspect": { + "value": f'{{"properties":{{}},"lastModified":{{"actor":"urn:li:corpuser:{username}","time":{TIME}}}}}', + "contentType": "application/json", + }, + "systemMetadata": None, + } + + +def create_datahub_step_state_aspects( + username: str, onboarding_ids: str, onboarding_filename +) -> None: + """ + For a specific user, creates dataHubStepState aspects for each onboarding id in the list + """ + aspects_dict: List[Dict[str, any]] = [ + create_datahub_step_state_aspect(username, onboarding_id) + for onboarding_id in onboarding_ids + ] + with open(onboarding_filename, "w") as f: + json.dump(aspects_dict, f, indent=2)