airbytehq · tuliren · Sep 14, 2021 · Aug 25, 2021 · Aug 25, 2021 · Aug 31, 2021
@@ -172,6 +172,7 @@ jobs:
           SOURCE_ZUORA_TEST_CREDS: ${{ secrets.SOURCE_ZUORA_TEST_CREDS }}
           SOURCE_BAMBOO_HR_CREDS: ${{ secrets.SOURCE_BAMBOO_HR_CREDS }}
           SOURCE_BIGCOMMERCE_CREDS: ${{ secrets.SOURCE_BIGCOMMERCE_CREDS }}
+          DESTINATION_DATABRICKS_CREDS: ${{ secrets.DESTINATION_DATABRICKS_CREDS }}
       - run: |
           docker login -u airbytebot -p ${DOCKER_PASSWORD}
           ./tools/integrations/manage.sh publish airbyte-integrations/${{ github.event.inputs.connector }} ${{ github.event.inputs.run-tests }}

@@ -172,6 +172,7 @@ jobs:
           SOURCE_ZUORA_TEST_CREDS: ${{ secrets.SOURCE_ZUORA_TEST_CREDS }}
           SOURCE_BAMBOO_HR_CREDS: ${{ secrets.SOURCE_BAMBOO_HR_CREDS }}
           SOURCE_BIGCOMMERCE_CREDS: ${{ secrets.SOURCE_BIGCOMMERCE_CREDS }}
+          DESTINATION_DATABRICKS_CREDS: ${{ secrets.DESTINATION_DATABRICKS_CREDS }}
       - run: |
           ./tools/bin/ci_integration_test.sh ${{ github.event.inputs.connector }}
         name: test ${{ github.event.inputs.connector }}

diff --git a/airbyte-integrations/builds.md b/airbyte-integrations/builds.md
@@ -89,6 +89,7 @@
 | :--- | :--- |
 | Azure Blob Storage | [![destination-azure-blob-storage](https://img.shields.io/endpoint?url=https%3A%2F%2Fdnsgjos7lj2fu.cloudfront.net%2Ftests%2Fsummary%2Fdestination-azure-blob-storage%2Fbadge.json)](https://dnsgjos7lj2fu.cloudfront.net/tests/summary/destination-azure-blob-storage) |
 | BigQuery   | [![destination-bigquery](https://img.shields.io/endpoint?url=https%3A%2F%2Fdnsgjos7lj2fu.cloudfront.net%2Ftests%2Fsummary%2Fdestination-bigquery%2Fbadge.json)](https://dnsgjos7lj2fu.cloudfront.net/tests/summary/destination-bigquery) |
+| Databricks | [![destination-bigquery](https://img.shields.io/endpoint?url=https%3A%2F%2Fdnsgjos7lj2fu.cloudfront.net%2Ftests%2Fsummary%2Fdestination-databricks%2Fbadge.json)](https://dnsgjos7lj2fu.cloudfront.net/tests/summary/destination-databricks) |
 | Google Cloud Storage (GCS) | [![destination-gcs](https://img.shields.io/endpoint?url=https%3A%2F%2Fdnsgjos7lj2fu.cloudfront.net%2Ftests%2Fsummary%2Fdestination-s3%2Fbadge.json)](https://dnsgjos7lj2fu.cloudfront.net/tests/summary/destination-gcs) |
 | Google PubSub   | [![destination-pubsub](https://img.shields.io/endpoint?url=https%3A%2F%2Fdnsgjos7lj2fu.cloudfront.net%2Ftests%2Fsummary%2Fdestination-pubsub%2Fbadge.json)](https://dnsgjos7lj2fu.cloudfront.net/tests/summary/destination-pubsub) |
 | Kafka   | [![destination-kafka](https://img.shields.io/endpoint?url=https%3A%2F%2Fdnsgjos7lj2fu.cloudfront.net%2Ftests%2Fsummary%2Fdestination-kafka%2Fbadge.json)](https://dnsgjos7lj2fu.cloudfront.net/tests/summary/destination-kafka) |

diff --git a/airbyte-integrations/connectors/destination-databricks/.dockerignore b/airbyte-integrations/connectors/destination-databricks/.dockerignore
@@ -0,0 +1,3 @@
+*
+!Dockerfile
+!build
diff --git a/airbyte-integrations/connectors/destination-databricks/.gitignore b/airbyte-integrations/connectors/destination-databricks/.gitignore
@@ -0,0 +1,6 @@
+# The driver is not checked into the source code due to legal reasons.
+# You can download the driver here:
+# https://databricks.com/spark/jdbc-drivers-download
+# By downloading this driver, you agree to the terms & conditions:
+# https://databricks.com/jdbc-odbc-driver-license
+lib/SparkJDBC42.jar
diff --git a/airbyte-integrations/connectors/destination-databricks/BOOTSTRAP.md b/airbyte-integrations/connectors/destination-databricks/BOOTSTRAP.md
@@ -0,0 +1,6 @@
+# Databricks Destination Connector Bootstrap
+
+The Databricks Connector enables a developer to sync data into a Databricks cluster. It does so in two steps:
+
+1. Persist source data in S3 staging files in the Parquet format.
+2. Create delta table based on the Parquet staging files.
diff --git a/airbyte-integrations/connectors/destination-databricks/Dockerfile b/airbyte-integrations/connectors/destination-databricks/Dockerfile
@@ -0,0 +1,11 @@
+FROM airbyte/integration-base-java:dev
+
+WORKDIR /airbyte
+ENV APPLICATION destination-databricks
+
+COPY build/distributions/${APPLICATION}*.tar ${APPLICATION}.tar
+
+RUN tar xf ${APPLICATION}.tar --strip-components=1
+
+LABEL io.airbyte.version=0.1.0
+LABEL io.airbyte.name=airbyte/destination-databricks
diff --git a/airbyte-integrations/connectors/destination-databricks/README.md b/airbyte-integrations/connectors/destination-databricks/README.md
@@ -0,0 +1,82 @@
+# Destination Databricks
+
+This is the repository for the Databricks destination connector in Java.
+For information about how to use this connector within Airbyte, see [the User Documentation](https://docs.airbyte.io/integrations/destinations/databricks).
+
+## Databricks JDBC Driver
+This connector requires a JDBC driver to connect to Databricks cluster. The driver is developed by Simba. Before downloading and using this driver, you must agree to the [JDBC ODBC driver license](https://databricks.com/jdbc-odbc-driver-license). This means that you can only use this driver to connector third party applications to Apache Spark SQL within a Databricks offering using the ODBC and/or JDBC protocols. The driver can be downloaded from [here](https://databricks.com/spark/jdbc-drivers-download).
+
+This is currently a private connector that is only available in Airbyte Cloud. To build and publish this connector, first download the driver and put it under the `lib` directory. Please do not publish this connector publicly.  We are working on a solution to publicize it.
+
+## Local development
+
+#### Building via Gradle
+From the Airbyte repository root, run:
+```
+./gradlew :airbyte-integrations:connectors:destination-databricks:build
+```
+
+#### Create credentials
+**If you are a community contributor**, you will need access to AWS S3 and Databricks cluster to run the integration tests:
+
+- Create a Databricks cluster. See [documentation](https://docs.databricks.com/clusters/create.html).
+- Create an S3 bucket. See [documentation](https://docs.aws.amazon.com/general/latest/gr/aws-sec-cred-types.html#access-keys-and-secret-access-keys).
+- Grant the Databricks cluster full access to the S3 bucket. Or mount it as Databricks File System (DBFS). See [documentation](https://docs.databricks.com/data/data-sources/aws/amazon-s3.html).
+- Place both Databricks and S3 credentials in `sample_secrets/config.json`, which conforms to the spec file in `src/main/resources/spec.json`.
+- Rename the directory from `sample_secrets` to `secrets`.
+- Note that the `secrets` directory is git-ignored by default, so there is no danger of accidentally checking in sensitive information.
+
+**If you are an Airbyte core member**:
+
+- Get the `destination databricks creds` secrets on Last Pass, and put it in `sample_secrets/config.json`.
+- Rename the directory from `sample_secrets` to `secrets`.
+
+### Locally running the connector docker image
+
+#### Build
+Build the connector image via Gradle:
+```
+./gradlew :airbyte-integrations:connectors:destination-databricks:airbyteDocker
+```
+When building via Gradle, the docker image name and tag, respectively, are the values of the `io.airbyte.name` and `io.airbyte.version` `LABEL`s in
+the Dockerfile.
+
+#### Run
+Then run any of the connector commands as follows:
+```
+docker run --rm airbyte/destination-databricks:dev spec
+docker run --rm -v $(pwd)/secrets:/secrets airbyte/destination-databricks:dev check --config /secrets/config.json
+docker run --rm -v $(pwd)/secrets:/secrets airbyte/destination-databricks:dev discover --config /secrets/config.json
+docker run --rm -v $(pwd)/secrets:/secrets -v $(pwd)/integration_tests:/integration_tests airbyte/destination-databricks:dev read --config /secrets/config.json --catalog /integration_tests/configured_catalog.json
+```
+
+## Testing
+We use `JUnit` for Java tests.
+
+### Unit and Integration Tests
+Place unit tests under `src/test/io/airbyte/integrations/destinations/databricks`.
+
+#### Acceptance Tests
+Airbyte has a standard test suite that all destination connectors must pass. Implement the `TODO`s in
+`src/test-integration/java/io/airbyte/integrations/destinations/databricksDestinationAcceptanceTest.java`.
+
+### Using gradle to run tests
+All commands should be run from airbyte project root.
+To run unit tests:
+```
+./gradlew :airbyte-integrations:connectors:destination-databricks:unitTest
+```
+To run acceptance and custom integration tests:
+```
+./gradlew :airbyte-integrations:connectors:destination-databricks:integrationTest
+```
+
+## Dependency Management
+
+### Publishing a new version of the connector
+You've checked out the repo, implemented a million dollar feature, and you're ready to share your changes with the world. Now what?
+1. Make sure your changes are passing unit and integration tests.
+1. Bump the connector version in `Dockerfile` -- just increment the value of the `LABEL io.airbyte.version` appropriately (we use [SemVer](https://semver.org/)).
+1. Create a Pull Request.
+1. Pat yourself on the back for being an awesome contributor.
+1. Someone from Airbyte will take a look at your PR and iterate with you to merge it into master.
@@ -0,0 +1,31 @@
+plugins {
+    id 'application'
+    id 'airbyte-docker'
+    id 'airbyte-integration-test-java'
+}
+
+application {
+    mainClass = 'io.airbyte.integrations.destination.databricks.DatabricksDestination'
+}
+
+dependencies {
+    implementation project(':airbyte-db:lib')
+    implementation project(':airbyte-config:models')
+    implementation project(':airbyte-protocol:models')
+    implementation project(':airbyte-integrations:bases:base-java')
+    implementation files(project(':airbyte-integrations:bases:base-java').airbyteDocker.outputs)
+    implementation project(':airbyte-integrations:connectors:destination-jdbc')
+    implementation project(':airbyte-integrations:connectors:destination-s3')
+    // Spark JDBC is not checked into the repo for legal reason
+    implementation files("lib/SparkJDBC42.jar")
+
+    // parquet
+    implementation group: 'org.apache.hadoop', name: 'hadoop-common', version: '3.3.0'
+    implementation group: 'org.apache.hadoop', name: 'hadoop-aws', version: '3.3.0'
+    implementation group: 'org.apache.hadoop', name: 'hadoop-mapreduce-client-core', version: '3.3.0'
+    implementation group: 'org.apache.parquet', name: 'parquet-avro', version: '1.12.0'
+    implementation group: 'tech.allegro.schema.json2avro', name: 'converter', version: '0.2.10'
+
+    integrationTestJavaImplementation project(':airbyte-integrations:bases:standard-destination-test')
+    integrationTestJavaImplementation project(':airbyte-integrations:connectors:destination-databricks')
+}
diff --git a/airbyte-integrations/connectors/destination-databricks/lib/.keep b/airbyte-integrations/connectors/destination-databricks/lib/.keep
diff --git a/airbyte-integrations/connectors/destination-databricks/sample_secrets/config.json b/airbyte-integrations/connectors/destination-databricks/sample_secrets/config.json
@@ -0,0 +1,15 @@
+{
+  "databricks_server_hostname": "required",
+  "databricks_http_path": "required",
+  "databricks_port": "443",
+  "databricks_personal_access_token": "required",
+  "database_schema": "public",
+  "data_source": {
+    "data_source_type": "S3",
+    "s3_bucket_name": "required",
+    "s3_bucket_path":"required",
+    "s3_bucket_region": "required",
+    "s3_access_key_id": "required",
+    "s3_secret_access_key": "required"
+  }
+}
@@ -0,0 +1,14 @@
+package io.airbyte.integrations.destination.databricks;
+
+import java.util.Set;
+
+public class DatabricksConstants {
+
+  public static final String DATABRICKS_USERNAME = "token";
+  public static final String DATABRICKS_DRIVER_CLASS = "com.simba.spark.jdbc.Driver";
+
+  public static final Set<String> DEFAULT_TBL_PROPERTIES = Set.of(
+      "delta.autoOptimize.optimizeWrite = true",
+      "delta.autoOptimize.autoCompact = true");
+
+}
@@ -0,0 +1,101 @@
+/*
+ * MIT License
+ *
+ * Copyright (c) 2020 Airbyte
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+package io.airbyte.integrations.destination.databricks;
+
+import com.fasterxml.jackson.databind.JsonNode;
+import io.airbyte.db.Databases;
+import io.airbyte.db.jdbc.JdbcDatabase;
+import io.airbyte.integrations.base.AirbyteMessageConsumer;
+import io.airbyte.integrations.base.IntegrationRunner;
+import io.airbyte.integrations.destination.ExtendedNameTransformer;
+import io.airbyte.integrations.destination.jdbc.SqlOperations;
+import io.airbyte.integrations.destination.jdbc.copy.CopyConsumerFactory;
+import io.airbyte.integrations.destination.jdbc.copy.CopyDestination;
+import io.airbyte.integrations.destination.jdbc.copy.s3.S3StreamCopier;
+import io.airbyte.protocol.models.AirbyteMessage;
+import io.airbyte.protocol.models.ConfiguredAirbyteCatalog;
+import java.util.function.Consumer;
+
+public class DatabricksDestination extends CopyDestination {
+
+  public DatabricksDestination() {
+    super("database_schema");
+  }
+
+  public static void main(String[] args) throws Exception {
+    new IntegrationRunner(new DatabricksDestination()).run(args);
+  }
+
+  @Override
+  public AirbyteMessageConsumer getConsumer(JsonNode config, ConfiguredAirbyteCatalog catalog, Consumer<AirbyteMessage> outputRecordCollector) {
+    DatabricksDestinationConfig databricksConfig = DatabricksDestinationConfig.get(config);
+    return CopyConsumerFactory.create(
+        outputRecordCollector,
+        getDatabase(config),
+        getSqlOperations(),
+        getNameTransformer(),
+        databricksConfig,
+        catalog,
+        new DatabricksStreamCopierFactory(),
+        databricksConfig.getDatabaseSchema());
+  }
+
+  @Override
+  public void checkPersistence(JsonNode config) {
+    DatabricksDestinationConfig databricksConfig = DatabricksDestinationConfig.get(config);
+    S3StreamCopier.attemptS3WriteAndDelete(databricksConfig.getS3DestinationConfig().getS3Config());
+  }
+
+  @Override
+  public ExtendedNameTransformer getNameTransformer() {
+    return new DatabricksNameTransformer();
+  }
+
+  @Override
+  public JdbcDatabase getDatabase(JsonNode jsonConfig) {
+    return getDatabase(DatabricksDestinationConfig.get(jsonConfig));
+  }
+
+  @Override
+  public SqlOperations getSqlOperations() {
+    return new DatabricksSqlOperations();
+  }
+
+  static String getDatabricksConnectionString(DatabricksDestinationConfig databricksConfig) {
+    return String.format("jdbc:spark://%s:%s/default;transportMode=http;ssl=1;httpPath=%s;UserAgentEntry=Airbyte",
+        databricksConfig.getDatabricksServerHostname(),
+        databricksConfig.getDatabricksPort(),
+        databricksConfig.getDatabricksHttpPath());
+  }
+
+  static JdbcDatabase getDatabase(DatabricksDestinationConfig databricksConfig) {
+    return Databases.createJdbcDatabase(
+        DatabricksConstants.DATABRICKS_USERNAME,
+        databricksConfig.getDatabricksPersonalAccessToken(),
+        getDatabricksConnectionString(databricksConfig),
+        DatabricksConstants.DATABRICKS_DRIVER_CLASS);
+  }
+
+}