Skip to content

Commit

Permalink
🎉 New destination: Kinesis (#7799)
Browse files Browse the repository at this point in the history
* add kinesis destination connector

* format code

* add javadoc and general documentation

* fix buffer bug and refactor code

* rename vars to common airbyte usage

* run format

* run format + seed file

* add eof

Co-authored-by: Marcos Marx <[email protected]>
  • Loading branch information
itaseskii and marcosmarxm authored Nov 22, 2021
1 parent 1f7a42b commit 0eba52e
Show file tree
Hide file tree
Showing 31 changed files with 1,576 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
{
"destinationDefinitionId": "6d1d66d4-26ab-4602-8d32-f85894b04955",
"name": "Kinesis",
"dockerRepository": "airbyte/destination-kinesis",
"dockerImageTag": "0.1.0",
"documentationUrl": "https://docs.airbyte.io/integrations/destinations/kinesis"
}
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,11 @@
dockerRepository: airbyte/destination-kafka
dockerImageTag: 0.1.2
documentationUrl: https://docs.airbyte.io/integrations/destinations/kafka
- name: Kinesis
destinationDefinitionId: 6d1d66d4-26ab-4602-8d32-f85894b04955
dockerRepository: airbyte/destination-kinesis
dockerImageTag: 0.1.0
documentationUrl: https://docs.airbyte.io/integrations/destinations/kinesis
- name: Local CSV
destinationDefinitionId: 8be1cf83-fde1-477f-a4ad-318d23c9f3c6
dockerRepository: airbyte/destination-csv
Expand Down
56 changes: 56 additions & 0 deletions airbyte-config/init/src/main/resources/seed/destination_specs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1257,6 +1257,62 @@
supportsDBT: false
supported_destination_sync_modes:
- "append"
- dockerImage: "airbyte/destination-kinesis:0.1.0"
spec:
documentationUrl: "https://docs.airbyte.io/integrations/destinations/kinesis"
connectionSpecification:
$schema: "http://json-schema.org/draft-07/schema#"
title: "Kinesis Destination Spec"
type: "object"
required:
- "shardCount"
- "accessKey"
- "privateKey"
- "bufferSize"
additionalProperties: true
properties:
endpoint:
title: "Endpoint"
description: "Aws Kinesis endpoint."
type: "string"
order: 0
region:
title: "Region"
description: "Aws region."
type: "string"
order: 1
shardCount:
title: "shardCount"
description: "Number of shards to which the data should be streamed."
type: "integer"
default: 5
order: 2
accessKey:
title: "accessKey"
description: "Aws access key."
airbyte_secret: true
type: "string"
order: 3
privateKey:
title: "privateKey"
description: "Aws private key."
airbyte_secret: true
type: "string"
order: 4
bufferSize:
title: "bufferSize"
description: "Buffer size for storing kinesis records before being batch\
\ streamed."
type: "integer"
minimum: 1
maximum: 500
default: 100
order: 5
supportsIncremental: true
supportsNormalization: false
supportsDBT: false
supported_destination_sync_modes:
- "append"
- dockerImage: "airbyte/destination-csv:0.2.8"
spec:
documentationUrl: "https://docs.airbyte.io/integrations/destinations/local-csv"
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
*
!Dockerfile
!build
11 changes: 11 additions & 0 deletions airbyte-integrations/connectors/destination-kinesis/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
FROM airbyte/integration-base-java:dev

WORKDIR /airbyte
ENV APPLICATION destination-kinesis

COPY build/distributions/${APPLICATION}*.tar ${APPLICATION}.tar

RUN tar xf ${APPLICATION}.tar --strip-components=1

LABEL io.airbyte.version=0.1.0
LABEL io.airbyte.name=airbyte/destination-kinesis
68 changes: 68 additions & 0 deletions airbyte-integrations/connectors/destination-kinesis/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
# Destination Kinesis

This is the repository for the Kinesis destination connector in Java.
For information about how to use this connector within Airbyte, see [the User Documentation](https://docs.airbyte.io/integrations/destinations/kinesis).

## Local development

#### Building via Gradle
From the Airbyte repository root, run:
```
./gradlew :airbyte-integrations:connectors:destination-kinesis:build
```

#### Create credentials
**If you are a community contributor**, generate the necessary credentials and place them in `secrets/config.json` conforming to the spec file in `src/main/resources/spec.json`.
Note that the `secrets` directory is git-ignored by default, so there is no danger of accidentally checking in sensitive information.

**If you are an Airbyte core member**, follow the [instructions](https://docs.airbyte.io/connector-development#using-credentials-in-ci) to set up the credentials.

### Locally running the connector docker image

#### Build
Build the connector image via Gradle:
```
./gradlew :airbyte-integrations:connectors:destination-kinesis:airbyteDocker
```
When building via Gradle, the docker image name and tag, respectively, are the values of the `io.airbyte.name` and `io.airbyte.version` `LABEL`s in
the Dockerfile.

#### Run
Then run any of the connector commands as follows:
```
docker run --rm airbyte/destination-kinesis:dev spec
docker run --rm -v $(pwd)/secrets:/secrets airbyte/destination-kinesis:dev check --config /secrets/config.json
docker run --rm -v $(pwd)/secrets:/secrets airbyte/destination-kinesis:dev discover --config /secrets/config.json
docker run --rm -v $(pwd)/secrets:/secrets -v $(pwd)/integration_tests:/integration_tests airbyte/destination-kinesis:dev read --config /secrets/config.json --catalog /integration_tests/configured_catalog.json
```

## Testing
We use `JUnit` for Java tests.

### Unit and Integration Tests
Place unit tests under `src/test/io/airbyte/integrations/destinations/kinesis`.

#### Acceptance Tests
Airbyte has a standard test suite that all destination connectors must pass. Implement the `TODO`s in
`src/test-integration/java/io/airbyte/integrations/destinations/kinesisDestinationAcceptanceTest.java`.

### Using gradle to run tests
All commands should be run from airbyte project root.
To run unit tests:
```
./gradlew :airbyte-integrations:connectors:destination-kinesis:unitTest
```
To run acceptance and custom integration tests:
```
./gradlew :airbyte-integrations:connectors:destination-kinesis:integrationTest
```

## Dependency Management

### Publishing a new version of the connector
You've checked out the repo, implemented a million dollar feature, and you're ready to share your changes with the world. Now what?
1. Make sure your changes are passing unit and integration tests.
1. Bump the connector version in `Dockerfile` -- just increment the value of the `LABEL io.airbyte.version` appropriately (we use [SemVer](https://semver.org/)).
1. Create a Pull Request.
1. Pat yourself on the back for being an awesome contributor.
1. Someone from Airbyte will take a look at your PR and iterate with you to merge it into master.
22 changes: 22 additions & 0 deletions airbyte-integrations/connectors/destination-kinesis/bootstrap.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# Kinesis Destination

Amazon Kinesis makes it easy to collect, process, and analyze real-time, streaming data so you can get timely insights and react quickly to new information. Amazon Kinesis offers key capabilities to cost-effectively process streaming data at any scale, along with the flexibility to choose the tools that best suit the requirements of your application.
You can use Kinesis Data Streams for rapid and continuous data intake and aggregation. The type of data used can include IT infrastructure log data, application logs, social media, market data feeds, and web clickstream data. Because the response time for the data intake and processing is in real time, the processing is typically lightweight.
[Read more about Amazon Kinesis](https://aws.amazon.com/kinesis/)

This connector maps an incoming Airbyte namespace and stream to a different Kinesis stream created and configured with the provided shard count. The connector
supports the `append` sync mode which enables records to be directly streamed to an existing Kinesis stream.

The implementation uses the [Kinesis](https://docs.aws.amazon.com/sdk-for-java/latest/developer-guide/examples-kinesis.html) Aws v2 Java Sdk to access the Kinesis service.
[KinesisStream](./src/main/java/io/airbyte/integrations/destination/kinesis/KinesisStream.java) is the main entrypoint for communicating with Kinesis and providing the needed functionalities. Internally it uses a KinesisClient retreived from the
[KinesisClientPool](./src/main/java/io/airbyte/integrations/destination/kinesis/KinesisClientPool.java). Retrieved records from the Kinesis stream are mapped to
[KinesisRecord](./src/main/java/io/airbyte/integrations/destination/kinesis/KinesisRecord.java). Buffering of records is also supported which should increase performance and throughput by sending the records through a single HTTP request.

The [KinesisMessageConsumer](./src/main/java/io/airbyte/integrations/destination/kinesis/KinesisMessageConsumer.java)
class contains the logic for handling airbyte messages, creating the needed Kinesis streams and streaming the received data.

## Development

See the [KinesisStream](./src/main/java/io/airbyte/integrations/destination/kinesis/KinesisStream.java) class on how to use the Kinesis client for accessing the Kinesis service.

If you want to learn more, read the [Aws docs](https://docs.aws.amazon.com/sdk-for-java/latest/developer-guide/examples-kinesis.html)
30 changes: 30 additions & 0 deletions airbyte-integrations/connectors/destination-kinesis/build.gradle
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
plugins {
id 'application'
id 'airbyte-docker'
id 'airbyte-integration-test-java'
}

application {
mainClass = 'io.airbyte.integrations.destination.kinesis.KinesisDestination'
}

def kinesisVersion = '2.17.75'
def testContainersVersion = '1.16.2'
def assertVersion = '3.21.0'

dependencies {
implementation project(':airbyte-config:models')
implementation project(':airbyte-protocol:models')
implementation project(':airbyte-integrations:bases:base-java')
implementation files(project(':airbyte-integrations:bases:base-java').airbyteDocker.outputs)

// https://mvnrepository.com/artifact/software.amazon.awssdk/kinesis
implementation "software.amazon.awssdk:kinesis:${kinesisVersion}"

testImplementation "org.assertj:assertj-core:${assertVersion}"
testImplementation "org.testcontainers:localstack:${testContainersVersion}"


integrationTestJavaImplementation project(':airbyte-integrations:bases:standard-destination-test')
integrationTestJavaImplementation project(':airbyte-integrations:connectors:destination-kinesis')
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
version: '3.7'

services:
kinesis:
image: localstack/localstack:0.12.20
ports:
- "4566:4566"
environment:
- "SERVICES=kinesis"
- "HOSTNAME=localhost"
- "KINESIS_LATENCY=200"
- "KINESIS_SHARD_LIMIT=500"
# - "AWS_ACCESS_KEY_ID="
# - "AWS_SECRET_ACCESS_KEY="
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
/*
* Copyright (c) 2021 Airbyte, Inc., all rights reserved.
*/

package io.airbyte.integrations.destination.kinesis;

import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.atomic.AtomicInteger;
import software.amazon.awssdk.services.kinesis.KinesisClient;

/**
* KinesisClientPool class for managing a pool of kinesis clients with different configurations.
*/
public class KinesisClientPool {

private static final ConcurrentHashMap<KinesisConfig, Tuple<KinesisClient, AtomicInteger>> clients;

static {
clients = new ConcurrentHashMap<>();
}

private KinesisClientPool() {

}

/**
* Initializes a Kinesis client for accessing Kinesis. If there is already an existing client with
* the provided configuration it will return the existing one and increase the usage count, if not
* it will return a new one.
*
* @param kinesisConfig used to configure the Kinesis client.
* @return KinesisClient which can be used to access Kinesis.
*/
public static KinesisClient initClient(KinesisConfig kinesisConfig) {
var cachedClient = clients.get(kinesisConfig);
if (cachedClient != null) {
cachedClient.value2().incrementAndGet();
return cachedClient.value1();
} else {
var client = KinesisUtils.buildKinesisClient(kinesisConfig);
clients.put(kinesisConfig, Tuple.of(client, new AtomicInteger(1)));
return client;
}
}

/**
* Returns a Kinesis client to the pool. If the client is no longer used by any other external
* instances it will be closed and removed from the map, if not only its usage count will be
* decreased.
*
* @param kinesisConfig that was used to configure the Kinesis client.
*/
public static void closeClient(KinesisConfig kinesisConfig) {
var cachedClient = clients.get(kinesisConfig);
if (cachedClient == null) {
throw new IllegalStateException("No session for the provided config");
}
int count = cachedClient.value2().decrementAndGet();
if (count < 1) {
cachedClient.value1().close();
clients.remove(kinesisConfig);
}
}

}
Loading

0 comments on commit 0eba52e

Please sign in to comment.