From 286e327252bfdf833d1b9d6d0e229366e95c7edd Mon Sep 17 00:00:00 2001 From: Andrii Leonets Date: Fri, 11 Jun 2021 12:34:26 +0300 Subject: [PATCH 01/63] unfinished jdbcsource separation --- .../java/io/airbyte/db/jdbc/JdbcDatabase.java | 18 +- .../java/io/airbyte/db/jdbc/SqlDatabase.java | 13 + .../mysql/MySqlSourceComprehensiveTest.java | 10 + .../source-relational-db/.dockerignore | 3 + .../source-relational-db/Dockerfile | 12 + .../source-relational-db/build.gradle | 54 +++ .../connectors/source-relational-db/readme.md | 7 + .../AbstractRelationsDbSource.java | 458 ++++++++++++++++++ .../relationaldb/RelationalDbColumnInfo.java | 49 ++ .../source/relationaldb/TableInfo.java | 19 + .../protocol/models/AbstractField.java | 41 ++ .../io/airbyte/protocol/models/Field.java | 34 +- 12 files changed, 685 insertions(+), 33 deletions(-) create mode 100644 airbyte-db/src/main/java/io/airbyte/db/jdbc/SqlDatabase.java create mode 100644 airbyte-integrations/connectors/source-relational-db/.dockerignore create mode 100644 airbyte-integrations/connectors/source-relational-db/Dockerfile create mode 100644 airbyte-integrations/connectors/source-relational-db/build.gradle create mode 100644 airbyte-integrations/connectors/source-relational-db/readme.md create mode 100644 airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/AbstractRelationsDbSource.java create mode 100644 airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/RelationalDbColumnInfo.java create mode 100644 airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/TableInfo.java create mode 100644 airbyte-protocol/models/src/main/java/io/airbyte/protocol/models/AbstractField.java diff --git a/airbyte-db/src/main/java/io/airbyte/db/jdbc/JdbcDatabase.java b/airbyte-db/src/main/java/io/airbyte/db/jdbc/JdbcDatabase.java index 8c7f1296a002..6f2000d13541 100644 --- a/airbyte-db/src/main/java/io/airbyte/db/jdbc/JdbcDatabase.java +++ b/airbyte-db/src/main/java/io/airbyte/db/jdbc/JdbcDatabase.java @@ -24,6 +24,7 @@ package io.airbyte.db.jdbc; +import com.fasterxml.jackson.databind.JsonNode; import io.airbyte.commons.functional.CheckedConsumer; import io.airbyte.commons.functional.CheckedFunction; import java.sql.Connection; @@ -36,7 +37,7 @@ /** * Database object for interacting with a JDBC connection. */ -public interface JdbcDatabase extends AutoCloseable { +public interface JdbcDatabase extends SqlDatabase { /** * Execute a database query. @@ -46,6 +47,7 @@ public interface JdbcDatabase extends AutoCloseable { */ void execute(CheckedConsumer query) throws SQLException; + @Override default void execute(String sql) throws SQLException { execute(connection -> connection.createStatement().execute(sql)); } @@ -109,7 +111,7 @@ Stream resultSetQuery(CheckedFunction type that each record will be mapped to. - * @return Result of the query mapped to a stream. + * @return Result of the query mapped to a stream.void execute(String sql) * @throws SQLException SQL related exceptions. */ Stream query(CheckedFunction statementCreator, @@ -131,4 +133,16 @@ default int queryInt(String sql, String... params) throws SQLException { } } + @Override + default Stream query(String sql, String... params) throws SQLException { + return query(connection -> { + PreparedStatement statement = connection.prepareStatement(sql); + int i = 1; + for (String param : params) { + statement.setString(i, param); + ++i; + } + return statement; + }, JdbcUtils::rowToJson); + } } diff --git a/airbyte-db/src/main/java/io/airbyte/db/jdbc/SqlDatabase.java b/airbyte-db/src/main/java/io/airbyte/db/jdbc/SqlDatabase.java new file mode 100644 index 000000000000..4ad7a11af11a --- /dev/null +++ b/airbyte-db/src/main/java/io/airbyte/db/jdbc/SqlDatabase.java @@ -0,0 +1,13 @@ +package io.airbyte.db.jdbc; + +import com.fasterxml.jackson.databind.JsonNode; +import java.sql.SQLException; +import java.util.stream.Stream; + +public interface SqlDatabase extends AutoCloseable { + + void execute(String sql) throws SQLException; + + Stream query(String sql, String... params) throws SQLException; + +} diff --git a/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/source/mysql/MySqlSourceComprehensiveTest.java b/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/source/mysql/MySqlSourceComprehensiveTest.java index f311d117d17f..59961c8f15f1 100644 --- a/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/source/mysql/MySqlSourceComprehensiveTest.java +++ b/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/source/mysql/MySqlSourceComprehensiveTest.java @@ -194,6 +194,16 @@ protected void initTests() { .addNullExpectedValue() .build()); + addDataTypeTestData( + TestDataHolder.builder() + .sourceType("decimal") + .airbyteType(JsonSchemaPrimitive.NUMBER) + .fullSourceDataType("decimal(19,2)") + .addInsertValues("1700000.00") + .addInsertValues("1700000.00") + .build()); + + addDataTypeTestData( TestDataHolder.builder() .sourceType("bit") diff --git a/airbyte-integrations/connectors/source-relational-db/.dockerignore b/airbyte-integrations/connectors/source-relational-db/.dockerignore new file mode 100644 index 000000000000..65c7d0ad3e73 --- /dev/null +++ b/airbyte-integrations/connectors/source-relational-db/.dockerignore @@ -0,0 +1,3 @@ +* +!Dockerfile +!build diff --git a/airbyte-integrations/connectors/source-relational-db/Dockerfile b/airbyte-integrations/connectors/source-relational-db/Dockerfile new file mode 100644 index 000000000000..bf1d4c215c6d --- /dev/null +++ b/airbyte-integrations/connectors/source-relational-db/Dockerfile @@ -0,0 +1,12 @@ +FROM airbyte/integration-base-java:dev + +WORKDIR /airbyte + +ENV APPLICATION source-jdbc + +COPY build/distributions/${APPLICATION}*.tar ${APPLICATION}.tar + +RUN tar xf ${APPLICATION}.tar --strip-components=1 + +LABEL io.airbyte.version=0.3.0 +LABEL io.airbyte.name=airbyte/source-jdbc diff --git a/airbyte-integrations/connectors/source-relational-db/build.gradle b/airbyte-integrations/connectors/source-relational-db/build.gradle new file mode 100644 index 000000000000..6c5333e7eba4 --- /dev/null +++ b/airbyte-integrations/connectors/source-relational-db/build.gradle @@ -0,0 +1,54 @@ +plugins { +// id 'application' +// id 'airbyte-docker' +// id 'airbyte-integration-test-java' + id "java-library" + // https://docs.gradle.org/current/userguide/java_testing.html#sec:java_test_fixtures +// id "java-test-fixtures" +// id 'com.github.eirnym.js2p' version '1.0' +} + +//application { +// mainClass = 'io.airbyte.integrations.source.jdbc.JdbcSource' +//} + +//project.configurations { +// testFixturesImplementation.extendsFrom implementation +// testFixturesRuntimeOnly.extendsFrom runtimeOnly +//} + +dependencies { + implementation project(':airbyte-commons') + implementation project(':airbyte-db') + implementation project(':airbyte-integrations:bases:base-java') + implementation project(':airbyte-protocol:models') + + implementation 'org.apache.commons:commons-lang3:3.11' + implementation 'org.projectlombok:lombok:1.18.20' + + testImplementation project(':airbyte-test-utils') + + testImplementation "org.postgresql:postgresql:42.2.18" + testImplementation "org.testcontainers:postgresql:1.15.1" + +// integrationTestJavaImplementation project(':airbyte-integrations:bases:standard-source-test') +// integrationTestJavaImplementation "org.testcontainers:postgresql:1.15.1" +// +// testFixturesImplementation project(':airbyte-protocol:models') +// testFixturesImplementation project(':airbyte-db') +// testFixturesImplementation project(':airbyte-integrations:bases:base-java') +// testFixturesImplementation project(':airbyte-integrations:connectors:source-jdbc') + + // todo (cgardens) - the java-test-fixtures plugin doesn't by default extend from test. + // we cannot make it depend on the dependencies of source-jdbc:test, because source-jdbc:test + // is going to depend on these fixtures. need to find a way to get fixtures to inherit the + // common test classes without duplicating them. this should be part of whatever solution we + // decide on for a "test-java-lib". the current implementation is leveraging the existing + // plugin, but we can something different if we don't like this tool. +// testFixturesRuntimeOnly 'org.junit.jupiter:junit-jupiter-engine:5.4.2' +// testFixturesImplementation 'org.junit.jupiter:junit-jupiter-api:5.4.2' +// testFixturesImplementation 'org.junit.jupiter:junit-jupiter-params:5.4.2' +// testFixturesImplementation group: 'org.mockito', name: 'mockito-junit-jupiter', version: '3.4.6' + + implementation files(project(':airbyte-integrations:bases:base-java').airbyteDocker.outputs) +} \ No newline at end of file diff --git a/airbyte-integrations/connectors/source-relational-db/readme.md b/airbyte-integrations/connectors/source-relational-db/readme.md new file mode 100644 index 000000000000..30ba2fa6dc6c --- /dev/null +++ b/airbyte-integrations/connectors/source-relational-db/readme.md @@ -0,0 +1,7 @@ +# JDBC Source + +We are not planning to expose this source in the UI yet. It serves as a base upon which we can build all of our other JDBC-compliant sources. + +The reasons we are not exposing this source by itself are: +1. It is not terribly user-friendly (jdbc urls are hard for a human to parse) +1. Each JDBC-compliant db, we need to make sure the appropriate drivers are installed on the image. We don't want to frontload installing all possible drivers, and instead would like to be more methodical. Instead for each JDBC-compliant source, we will extend this one and then install only the necessary JDBC drivers on that source's image. diff --git a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/AbstractRelationsDbSource.java b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/AbstractRelationsDbSource.java new file mode 100644 index 000000000000..0a7be15336c0 --- /dev/null +++ b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/AbstractRelationsDbSource.java @@ -0,0 +1,458 @@ +package io.airbyte.integrations.source.relationaldb; + +import com.fasterxml.jackson.databind.JsonNode; +import com.google.common.base.Preconditions; +import com.google.common.collect.Lists; +import io.airbyte.commons.functional.CheckedConsumer; +import io.airbyte.commons.json.Jsons; +import io.airbyte.commons.lang.Exceptions; +import io.airbyte.commons.type.Types; +import io.airbyte.commons.util.AutoCloseableIterator; +import io.airbyte.commons.util.AutoCloseableIterators; +import io.airbyte.db.jdbc.JdbcDatabase; +import io.airbyte.db.jdbc.JdbcUtils; +import io.airbyte.db.jdbc.SqlDatabase; +import io.airbyte.integrations.BaseConnector; +import io.airbyte.integrations.base.AirbyteStreamNameNamespacePair; +import io.airbyte.integrations.base.Source; +import io.airbyte.protocol.models.AbstractField; +import io.airbyte.protocol.models.AirbyteCatalog; +import io.airbyte.protocol.models.AirbyteConnectionStatus; +import io.airbyte.protocol.models.AirbyteConnectionStatus.Status; +import io.airbyte.protocol.models.AirbyteMessage; +import io.airbyte.protocol.models.AirbyteMessage.Type; +import io.airbyte.protocol.models.AirbyteRecordMessage; +import io.airbyte.protocol.models.AirbyteStream; +import io.airbyte.protocol.models.CatalogHelpers; +import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; +import io.airbyte.protocol.models.ConfiguredAirbyteStream; +import io.airbyte.protocol.models.Field; +import io.airbyte.protocol.models.JsonSchemaPrimitive; +import io.airbyte.protocol.models.SyncMode; +import java.sql.JDBCType; +import java.sql.PreparedStatement; +import java.sql.SQLException; +import java.time.Instant; +import java.util.AbstractMap.SimpleImmutableEntry; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.Set; +import java.util.concurrent.atomic.AtomicLong; +import java.util.function.Function; +import java.util.function.Predicate; +import java.util.stream.Collectors; +import java.util.stream.Stream; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public abstract class AbstractRelationsDbSource extends BaseConnector implements Source { + + private static final Logger LOGGER = LoggerFactory.getLogger(AbstractRelationsDbSource.class); + + private final String driverClass; + + public AbstractRelationsDbSource(final String driverClass) { + this.driverClass = driverClass; + } + + /** + * @TODO + * Map a database implementation-specific configuration to json object that adheres to the + * AbstractJdbcSource config spec. See resources/spec.json. + * + * @param config database implementation-specific configuration. + * @return jdbc spec. + */ + public abstract JsonNode toConfig(JsonNode config); + + /** + * Set of namespaces that are internal to the database (e.g. system schemas) and should not be included + * in the catalog. + * + * @return set of schemas to be ignored. + */ + public abstract Set getExcludedInternalNameSpaces(); + + @Override + public AirbyteConnectionStatus check(JsonNode config) { + try (final SqlDatabase database = createDatabase(config)) { + for (CheckedConsumer checkOperation : getCheckOperations(config)) { + checkOperation.accept(database); + } + + return new AirbyteConnectionStatus().withStatus(Status.SUCCEEDED); + } catch (Exception e) { + LOGGER.info("Exception while checking connection: ", e); + return new AirbyteConnectionStatus() + .withStatus(Status.FAILED) + .withMessage("Could not connect with provided configuration. Error: " + e.getMessage()); + } + } + + /** + * Configures a list of operations that can be used to check the connection to the source. + * + * @return list of consumers that run queries for the check command. + */ + public abstract List> getCheckOperations(JsonNode config) throws Exception; + + @Override + public AirbyteCatalog discover(JsonNode config) throws Exception { + try (final SqlDatabase database = createDatabase(config)) { + Optional databaseName = Optional.ofNullable(config.get("database")).map(JsonNode::asText); + List streams = getTables(database, databaseName).stream() + .map(tableInfo -> CatalogHelpers + .createAirbyteStream(tableInfo.getName(), tableInfo.getNameSpace(), tableInfo.getFields()) + .withSupportedSyncModes(Lists.newArrayList(SyncMode.FULL_REFRESH, SyncMode.INCREMENTAL)) + .withSourceDefinedPrimaryKey(Types.boxToListofList(tableInfo.getPrimaryKeys()))) + .collect(Collectors.toList()); + return new AirbyteCatalog().withStreams(streams); + } + } + + @Override + public AutoCloseableIterator read(JsonNode config, ConfiguredAirbyteCatalog catalog, JsonNode state) throws Exception { + final JdbcStateManager stateManager = new JdbcStateManager( + state == null ? JdbcStateManager.emptyState() : Jsons.object(state, JdbcState.class), + catalog); + final Instant emittedAt = Instant.now(); + + final JdbcDatabase database = createDatabase(config); + + final Map>> fullyQualifiedTableNameToInfo = + discoverInternal(database, Optional.ofNullable(config.get("database")).map(JsonNode::asText)) + .stream() + .collect(Collectors.toMap(t -> String.format("%s.%s", t.getNameSpace(), t.getName()), Function + .identity())); + + final List> incrementalIterators = + getIncrementalIterators(config, database, catalog, fullyQualifiedTableNameToInfo, stateManager, emittedAt); + final List> fullRefreshIterators = + getFullRefreshIterators(database, catalog, fullyQualifiedTableNameToInfo, stateManager, emittedAt); + final List> iteratorList = Stream + .of(incrementalIterators, fullRefreshIterators) + .flatMap(Collection::stream) + .collect(Collectors.toList()); + + return AutoCloseableIterators + .appendOnClose(AutoCloseableIterators.concatWithEagerClose(iteratorList), () -> { + LOGGER.info("Closing database connection pool."); + Exceptions.toRuntime(database::close); + LOGGER.info("Closed database connection pool."); + }); + } + + public List> getIncrementalIterators(JsonNode config, + JdbcDatabase database, + ConfiguredAirbyteCatalog catalog, + Map>> tableNameToTable, + JdbcStateManager stateManager, + Instant emittedAt) { + return getSelectedIterators( + database, + catalog, + tableNameToTable, + stateManager, + emittedAt, + configuredStream -> configuredStream.getSyncMode().equals(SyncMode.INCREMENTAL)); + } + + public List> getFullRefreshIterators(JdbcDatabase database, + ConfiguredAirbyteCatalog catalog, + Map>> tableNameToTable, + JdbcStateManager stateManager, + Instant emittedAt) { + return getSelectedIterators( + database, + catalog, + tableNameToTable, + stateManager, + emittedAt, + configuredStream -> configuredStream.getSyncMode().equals(SyncMode.FULL_REFRESH)); + } + + // TODO(dchia): Refactor the following functions and objects so they better operate around a Table + // abstraction. Indexes and strings are currently hardcoded all around making code brittle. + private List> getSelectedIterators(JdbcDatabase database, + ConfiguredAirbyteCatalog catalog, + Map>> tableNameToTable, + JdbcStateManager stateManager, + Instant emittedAt, + Predicate selector) { + final List> iteratorList = new ArrayList<>(); + for (final ConfiguredAirbyteStream airbyteStream : catalog.getStreams()) { + if (selector.test(airbyteStream)) { + final AirbyteStream stream = airbyteStream.getStream(); + final String fullyQualifiedTableName = JdbcUtils.getFullyQualifiedTableName(stream.getNamespace(), stream.getName()); + if (!tableNameToTable.containsKey(fullyQualifiedTableName)) { + LOGGER.info("Skipping stream {} because it is not in the source", fullyQualifiedTableName); + continue; + } + + final TableInfo> table = tableNameToTable.get(fullyQualifiedTableName); + final AutoCloseableIterator tableReadIterator = createReadIterator( + database, + airbyteStream, + table, + stateManager, + emittedAt); + iteratorList.add(tableReadIterator); + } + } + + return iteratorList; + } + + private AutoCloseableIterator createReadIterator(JdbcDatabase database, + ConfiguredAirbyteStream airbyteStream, + TableInfo> table, + JdbcStateManager stateManager, + Instant emittedAt) { + final String streamName = airbyteStream.getStream().getName(); + final String namespace = airbyteStream.getStream().getNamespace(); + final AirbyteStreamNameNamespacePair pair = new AirbyteStreamNameNamespacePair(streamName, namespace); + final Set selectedFieldsInCatalog = CatalogHelpers.getTopLevelFieldNames(airbyteStream); + final List selectedDatabaseFields = table.getFields() + .stream() + .map(AbstractField::getColumnName) + .filter(selectedFieldsInCatalog::contains) + .collect(Collectors.toList()); + + final AutoCloseableIterator iterator; + if (airbyteStream.getSyncMode() == SyncMode.INCREMENTAL) { + final String cursorField = IncrementalUtils.getCursorField(airbyteStream); + final Optional cursorOptional = stateManager.getCursor(pair); + + final AutoCloseableIterator airbyteMessageIterator; + if (cursorOptional.isPresent()) { + airbyteMessageIterator = getIncrementalStream(database, airbyteStream, selectedDatabaseFields, table, cursorOptional.get(), emittedAt); + } else { + // if no cursor is present then this is the first read for is the same as doing a full refresh read. + airbyteMessageIterator = getFullRefreshStream(database, streamName, namespace, selectedDatabaseFields, table, emittedAt); + } + + final JsonSchemaPrimitive cursorType = IncrementalUtils.getCursorType(airbyteStream, cursorField); + + iterator = AutoCloseableIterators.transform(autoCloseableIterator -> new StateDecoratingIterator( + autoCloseableIterator, + stateManager, + pair, + cursorField, + cursorOptional.orElse(null), + cursorType), + airbyteMessageIterator); + } else if (airbyteStream.getSyncMode() == SyncMode.FULL_REFRESH) { + iterator = getFullRefreshStream(database, streamName, namespace, selectedDatabaseFields, table, emittedAt); + } else if (airbyteStream.getSyncMode() == null) { + throw new IllegalArgumentException(String.format("%s requires a source sync mode", AbstractJdbcSource.class)); + } else { + throw new IllegalArgumentException(String.format("%s does not support sync mode: %s.", AbstractJdbcSource.class, airbyteStream.getSyncMode())); + } + + final AtomicLong recordCount = new AtomicLong(); + return AutoCloseableIterators.transform(iterator, r -> { + final long count = recordCount.incrementAndGet(); + if (count % 10000 == 0) { + LOGGER.info("Reading stream {}. Records read: {}", streamName, count); + } + return r; + }); + } + + private AutoCloseableIterator getIncrementalStream(JdbcDatabase database, + ConfiguredAirbyteStream airbyteStream, + List selectedDatabaseFields, + TableInfo> table, + String cursor, + Instant emittedAt) { + final String streamName = airbyteStream.getStream().getName(); + final String namespace = airbyteStream.getStream().getNamespace(); + final String cursorField = IncrementalUtils.getCursorField(airbyteStream); + final JDBCType cursorJdbcType = table.getFields().stream() + .filter(info -> info.getColumnName().equals(cursorField)) + .map(AbstractField::getColumnType) + .findFirst() + .orElseThrow(); + + Preconditions.checkState(table.getFields().stream().anyMatch(f -> f.getColumnName().equals(cursorField)), + String.format("Could not find cursor field %s in table %s", cursorField, table.getName())); + + final AutoCloseableIterator queryIterator = queryTableIncremental( + database, + selectedDatabaseFields, + table.getSchemaName(), + table.getName(), + cursorField, + cursorJdbcType, + cursor); + + return getMessageIterator(queryIterator, streamName, namespace, emittedAt.toEpochMilli()); + } + + private AutoCloseableIterator getFullRefreshStream(JdbcDatabase database, + String streamName, + String namespace, + List selectedDatabaseFields, + TableInfo> table, + Instant emittedAt) { + final AutoCloseableIterator queryStream = + queryTableFullRefresh(database, selectedDatabaseFields, table.getSchemaName(), table.getName()); + return getMessageIterator(queryStream, streamName, namespace, emittedAt.toEpochMilli()); + } + + protected abstract JsonSchemaPrimitive getType(T columnType); + + protected abstract String getFullyQualifiedTableName(String nameSpace, String tableName); + + @SuppressWarnings("OptionalUsedAsFieldOrParameterType") + private List> getTables(final SqlDatabase database, final Optional databaseOptional) throws Exception { + final List>> tableInfos = discoverInternal(database, databaseOptional); + final Map> fullyQualifiedTableNameToPrimaryKeys = discoverPrimaryKeys(database, databaseOptional, tableInfos); + + return tableInfos.stream() + .map(t -> { + // some databases return multiple copies of the same record for a column (e.g. redshift) because + // they have at least once delivery guarantees. we want to dedupe these, but first we check that the + // records are actually the same and provide a good error message if they are not. + assertColumnsWithSameNameAreSame(t.getNameSpace(), t.getName(), t.getFields()); + final List fields = t.getFields() + .stream() + .map(f -> Field.of(f.getName(), getType(f.getType()))) + .distinct() + .collect(Collectors.toList()); + final String fullyQualifiedTableName = getFullyQualifiedTableName(t.getNameSpace(), t.getName()); + final List primaryKeys = fullyQualifiedTableNameToPrimaryKeys.getOrDefault(fullyQualifiedTableName, Collections + .emptyList()); + + return new TableInfo.TableInfoBuilder().nameSpace(t.getNameSpace()).name(t.getName()).fields(fields).primaryKeys(primaryKeys).build(); + }) + .collect(Collectors.toList()); + } + + /** + * Discover Primary keys for each table and @return a map of schema.table name to their associated + * list of primary key fields. + * + * When invoking the conn.getMetaData().getPrimaryKeys() function without a table name, it may fail + * on some databases (for example MySql) but works on others (for instance Postgres). To avoid + * making repeated queries to the DB, we try to get all primary keys without specifying a table + * first, if it doesn't work, we retry one table at a time. + */ + protected abstract Map> discoverPrimaryKeys(SqlDatabase database, + Optional databaseOptional, + List>> tableInfos); + + /** + * Aggregate list of @param entries of StreamName and PrimaryKey and + * + * @return a map by StreamName to associated list of primary keys + */ + protected Map> aggregatePrimateKeys(List> entries) { + final Map> result = new HashMap<>(); + entries.forEach(entry -> { + if (!result.containsKey(entry.getKey())) { + result.put(entry.getKey(), new ArrayList<>()); + } + result.get(entry.getKey()).add(entry.getValue()); + }); + return result; + } + + protected void assertColumnsWithSameNameAreSame(String nameSpace, String tableName, List> columns) { + columns.stream() + .collect(Collectors.groupingBy(AbstractField::getName)) + .values() + .forEach(columnsWithSameName -> { + final AbstractField comparisonColumn = columnsWithSameName.get(0); + columnsWithSameName.forEach(column -> { + if (!column.equals(comparisonColumn)) { + throw new RuntimeException( + String.format("Found multiple columns with same name: %s in table: %s.%s but the columns are not the same. columns: %s", + comparisonColumn.getName(), nameSpace, tableName, columns)); + } + }); + }); + } + + @SuppressWarnings("OptionalUsedAsFieldOrParameterType") + protected abstract List>> discoverInternal(final SqlDatabase database, final Optional databaseOptional) + throws Exception; + + public AutoCloseableIterator getMessageIterator(AutoCloseableIterator recordIterator, + String streamName, + String namespace, + long emittedAt) { + return AutoCloseableIterators.transform(recordIterator, r -> new AirbyteMessage() + .withType(Type.RECORD) + .withRecord(new AirbyteRecordMessage() + .withStream(streamName) + .withNamespace(namespace) + .withEmittedAt(emittedAt) + .withData(r))); + } + + public AutoCloseableIterator queryTableFullRefresh(JdbcDatabase database, + List columnNames, + String schemaName, + String tableName) { + LOGGER.info("Queueing query for table: {}", tableName); + return AutoCloseableIterators.lazyIterator(() -> { + try { + final Stream stream = database.query( + connection -> { + LOGGER.info("Preparing query for table: {}", tableName); + final String sql = String.format("SELECT %s FROM %s", + JdbcUtils.enquoteIdentifierList(connection, columnNames), + JdbcUtils.getFullyQualifiedTableNameWithQuoting(connection, schemaName, tableName)); + final PreparedStatement preparedStatement = connection.prepareStatement(sql); + LOGGER.info("Executing query for table: {}", tableName); + return preparedStatement; + }, + JdbcUtils::rowToJson); + return AutoCloseableIterators.fromStream(stream); + } catch (SQLException e) { + throw new RuntimeException(e); + } + }); + } + + public AutoCloseableIterator queryTableIncremental(JdbcDatabase database, + List columnNames, + String schemaName, + String tableName, + String cursorField, + JDBCType cursorFieldType, + String cursor) { + + LOGGER.info("Queueing query for table: {}", tableName); + return AutoCloseableIterators.lazyIterator(() -> { + try { + final Stream stream = database.query( + connection -> { + LOGGER.info("Preparing query for table: {}", tableName); + final String sql = String.format("SELECT %s FROM %s WHERE %s > ?", + JdbcUtils.enquoteIdentifierList(connection, columnNames), + JdbcUtils.getFullyQualifiedTableNameWithQuoting(connection, schemaName, tableName), + JdbcUtils.enquoteIdentifier(connection, cursorField)); + + final PreparedStatement preparedStatement = connection.prepareStatement(sql); + JdbcUtils.setStatementField(preparedStatement, 1, cursorFieldType, cursor); + LOGGER.info("Executing query for table: {}", tableName); + return preparedStatement; + }, + JdbcUtils::rowToJson); + return AutoCloseableIterators.fromStream(stream); + } catch (SQLException e) { + throw new RuntimeException(e); + } + }); + } + + protected abstract SqlDatabase createDatabase(JsonNode config); + +} diff --git a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/RelationalDbColumnInfo.java b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/RelationalDbColumnInfo.java new file mode 100644 index 000000000000..2cf02a02bf7e --- /dev/null +++ b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/RelationalDbColumnInfo.java @@ -0,0 +1,49 @@ +package io.airbyte.integrations.source.relationaldb; + +import java.sql.JDBCType; +import java.util.Objects; + +class RelationalDbColumnInfo extends AbstractField { + + private final String columnName; + private final JDBCType columnType; + + public RelationalDbColumnInfo(String columnName, JDBCType columnType) { + this.columnName = columnName; + this.columnType = columnType; + } + + public String getColumnName() { + return columnName; + } + + public JDBCType getColumnType() { + return columnType; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + RelationalDbColumnInfo that = (RelationalDbColumnInfo) o; + return Objects.equals(columnName, that.columnName) && columnType == that.columnType; + } + + @Override + public int hashCode() { + return Objects.hash(columnName, columnType); + } + + @Override + public String toString() { + return "ColumnInfo{" + + "columnName='" + columnName + '\'' + + ", columnType=" + columnType + + '}'; + } + +} diff --git a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/TableInfo.java b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/TableInfo.java new file mode 100644 index 000000000000..d432a10b3f70 --- /dev/null +++ b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/TableInfo.java @@ -0,0 +1,19 @@ +package io.airbyte.integrations.source.relationaldb; + +import java.util.List; +import lombok.Builder; +import lombok.Getter; + +/** + * This class encapsulates all externally relevant Table information. + */ +@Getter +@Builder +class TableInfo { + + private final String nameSpace; + private final String name; + private final List fields; + private final List primaryKeys; + +} diff --git a/airbyte-protocol/models/src/main/java/io/airbyte/protocol/models/AbstractField.java b/airbyte-protocol/models/src/main/java/io/airbyte/protocol/models/AbstractField.java new file mode 100644 index 000000000000..3bbc263f0046 --- /dev/null +++ b/airbyte-protocol/models/src/main/java/io/airbyte/protocol/models/AbstractField.java @@ -0,0 +1,41 @@ +package io.airbyte.protocol.models; + +import java.util.Objects; + +public abstract class AbstractField { + + private final String name; + private final T type; + + public AbstractField(String name, T type) { + this.name = name; + this.type = type; + } + + public String getName() { + return name; + } + + public T getType() { + return type; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + + AbstractField field = (AbstractField) o; + return name.equals(field.name) && + type == field.type; + } + + @Override + public int hashCode() { + return Objects.hash(name, type); + } +} diff --git a/airbyte-protocol/models/src/main/java/io/airbyte/protocol/models/Field.java b/airbyte-protocol/models/src/main/java/io/airbyte/protocol/models/Field.java index 22b14d1f6efe..e9db8c085493 100644 --- a/airbyte-protocol/models/src/main/java/io/airbyte/protocol/models/Field.java +++ b/airbyte-protocol/models/src/main/java/io/airbyte/protocol/models/Field.java @@ -24,46 +24,18 @@ package io.airbyte.protocol.models; -import java.util.Objects; - -public class Field { - - private final String name; - private final JsonSchemaPrimitive type; +public class Field extends AbstractField { public Field(String name, JsonSchemaPrimitive type) { - this.name = name; - this.type = type; + super(name, type); } public static Field of(String name, JsonSchemaPrimitive type) { return new Field(name, type); } - public String getName() { - return name; - } - public String getTypeAsJsonSchemaString() { - return type.name().toLowerCase(); - } - - @Override - public boolean equals(Object o) { - if (this == o) { - return true; - } - if (o == null || getClass() != o.getClass()) { - return false; - } - Field field = (Field) o; - return name.equals(field.name) && - type == field.type; - } - - @Override - public int hashCode() { - return Objects.hash(name, type); + return getType().name().toLowerCase(); } } From 6a62eaad6ac1dc75bc610d5c32233d908451f6d6 Mon Sep 17 00:00:00 2001 From: Andrii Leonets Date: Fri, 11 Jun 2021 18:22:43 +0300 Subject: [PATCH 02/63] creation AbstactRelation --- .../java/io/airbyte/db/jdbc/JdbcDatabase.java | 1 + .../java/io/airbyte/db/jdbc/SqlDatabase.java | 24 ++ .../mysql/MySqlSourceComprehensiveTest.java | 1 - .../source-relational-db/build.gradle | 1 + ...e.java => AbstractRelationalDbSource.java} | 303 +++++++++--------- .../relationaldb/RelationalDbColumnInfo.java | 49 --- .../source/relationaldb/TableInfo.java | 24 ++ .../protocol/models/AbstractField.java | 25 ++ 8 files changed, 221 insertions(+), 207 deletions(-) rename airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/{AbstractRelationsDbSource.java => AbstractRelationalDbSource.java} (61%) delete mode 100644 airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/RelationalDbColumnInfo.java diff --git a/airbyte-db/src/main/java/io/airbyte/db/jdbc/JdbcDatabase.java b/airbyte-db/src/main/java/io/airbyte/db/jdbc/JdbcDatabase.java index 6f2000d13541..8846b8c46a02 100644 --- a/airbyte-db/src/main/java/io/airbyte/db/jdbc/JdbcDatabase.java +++ b/airbyte-db/src/main/java/io/airbyte/db/jdbc/JdbcDatabase.java @@ -145,4 +145,5 @@ default Stream query(String sql, String... params) throws SQLException return statement; }, JdbcUtils::rowToJson); } + } diff --git a/airbyte-db/src/main/java/io/airbyte/db/jdbc/SqlDatabase.java b/airbyte-db/src/main/java/io/airbyte/db/jdbc/SqlDatabase.java index 4ad7a11af11a..f1a815bc9e38 100644 --- a/airbyte-db/src/main/java/io/airbyte/db/jdbc/SqlDatabase.java +++ b/airbyte-db/src/main/java/io/airbyte/db/jdbc/SqlDatabase.java @@ -1,3 +1,27 @@ +/* + * MIT License + * + * Copyright (c) 2020 Airbyte + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + package io.airbyte.db.jdbc; import com.fasterxml.jackson.databind.JsonNode; diff --git a/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/source/mysql/MySqlSourceComprehensiveTest.java b/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/source/mysql/MySqlSourceComprehensiveTest.java index 59961c8f15f1..9e1e21582d52 100644 --- a/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/source/mysql/MySqlSourceComprehensiveTest.java +++ b/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/source/mysql/MySqlSourceComprehensiveTest.java @@ -203,7 +203,6 @@ protected void initTests() { .addInsertValues("1700000.00") .build()); - addDataTypeTestData( TestDataHolder.builder() .sourceType("bit") diff --git a/airbyte-integrations/connectors/source-relational-db/build.gradle b/airbyte-integrations/connectors/source-relational-db/build.gradle index 6c5333e7eba4..ec81171769e0 100644 --- a/airbyte-integrations/connectors/source-relational-db/build.gradle +++ b/airbyte-integrations/connectors/source-relational-db/build.gradle @@ -22,6 +22,7 @@ dependencies { implementation project(':airbyte-db') implementation project(':airbyte-integrations:bases:base-java') implementation project(':airbyte-protocol:models') + implementation project(':airbyte-integrations:connectors:source-jdbc') implementation 'org.apache.commons:commons-lang3:3.11' implementation 'org.projectlombok:lombok:1.18.20' diff --git a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/AbstractRelationsDbSource.java b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/AbstractRelationalDbSource.java similarity index 61% rename from airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/AbstractRelationsDbSource.java rename to airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/AbstractRelationalDbSource.java index 0a7be15336c0..9a7ecb6167d5 100644 --- a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/AbstractRelationsDbSource.java +++ b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/AbstractRelationalDbSource.java @@ -1,3 +1,27 @@ +/* + * MIT License + * + * Copyright (c) 2020 Airbyte + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + package io.airbyte.integrations.source.relationaldb; import com.fasterxml.jackson.databind.JsonNode; @@ -9,12 +33,15 @@ import io.airbyte.commons.type.Types; import io.airbyte.commons.util.AutoCloseableIterator; import io.airbyte.commons.util.AutoCloseableIterators; -import io.airbyte.db.jdbc.JdbcDatabase; import io.airbyte.db.jdbc.JdbcUtils; import io.airbyte.db.jdbc.SqlDatabase; import io.airbyte.integrations.BaseConnector; import io.airbyte.integrations.base.AirbyteStreamNameNamespacePair; import io.airbyte.integrations.base.Source; +import io.airbyte.integrations.source.jdbc.IncrementalUtils; +import io.airbyte.integrations.source.jdbc.JdbcStateManager; +import io.airbyte.integrations.source.jdbc.StateDecoratingIterator; +import io.airbyte.integrations.source.jdbc.models.JdbcState; import io.airbyte.protocol.models.AbstractField; import io.airbyte.protocol.models.AirbyteCatalog; import io.airbyte.protocol.models.AirbyteConnectionStatus; @@ -29,19 +56,16 @@ import io.airbyte.protocol.models.Field; import io.airbyte.protocol.models.JsonSchemaPrimitive; import io.airbyte.protocol.models.SyncMode; -import java.sql.JDBCType; -import java.sql.PreparedStatement; import java.sql.SQLException; import java.time.Instant; -import java.util.AbstractMap.SimpleImmutableEntry; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; -import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Optional; import java.util.Set; +import java.util.StringJoiner; import java.util.concurrent.atomic.AtomicLong; import java.util.function.Function; import java.util.function.Predicate; @@ -50,32 +74,10 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -public abstract class AbstractRelationsDbSource extends BaseConnector implements Source { - - private static final Logger LOGGER = LoggerFactory.getLogger(AbstractRelationsDbSource.class); - - private final String driverClass; - - public AbstractRelationsDbSource(final String driverClass) { - this.driverClass = driverClass; - } +public abstract class AbstractRelationalDbSource extends BaseConnector implements Source { - /** - * @TODO - * Map a database implementation-specific configuration to json object that adheres to the - * AbstractJdbcSource config spec. See resources/spec.json. - * - * @param config database implementation-specific configuration. - * @return jdbc spec. - */ - public abstract JsonNode toConfig(JsonNode config); + private static final Logger LOGGER = LoggerFactory.getLogger(AbstractRelationalDbSource.class); - /** - * Set of namespaces that are internal to the database (e.g. system schemas) and should not be included - * in the catalog. - * - * @return set of schemas to be ignored. - */ public abstract Set getExcludedInternalNameSpaces(); @Override @@ -104,8 +106,7 @@ public AirbyteConnectionStatus check(JsonNode config) { @Override public AirbyteCatalog discover(JsonNode config) throws Exception { try (final SqlDatabase database = createDatabase(config)) { - Optional databaseName = Optional.ofNullable(config.get("database")).map(JsonNode::asText); - List streams = getTables(database, databaseName).stream() + List streams = getTables(database).stream() .map(tableInfo -> CatalogHelpers .createAirbyteStream(tableInfo.getName(), tableInfo.getNameSpace(), tableInfo.getFields()) .withSupportedSyncModes(Lists.newArrayList(SyncMode.FULL_REFRESH, SyncMode.INCREMENTAL)) @@ -122,16 +123,16 @@ public AutoCloseableIterator read(JsonNode config, ConfiguredAir catalog); final Instant emittedAt = Instant.now(); - final JdbcDatabase database = createDatabase(config); + final SqlDatabase database = createDatabase(config); final Map>> fullyQualifiedTableNameToInfo = - discoverInternal(database, Optional.ofNullable(config.get("database")).map(JsonNode::asText)) + discoverWithoutSystemTables(database) .stream() .collect(Collectors.toMap(t -> String.format("%s.%s", t.getNameSpace(), t.getName()), Function .identity())); final List> incrementalIterators = - getIncrementalIterators(config, database, catalog, fullyQualifiedTableNameToInfo, stateManager, emittedAt); + getIncrementalIterators(database, catalog, fullyQualifiedTableNameToInfo, stateManager, emittedAt); final List> fullRefreshIterators = getFullRefreshIterators(database, catalog, fullyQualifiedTableNameToInfo, stateManager, emittedAt); final List> iteratorList = Stream @@ -147,12 +148,11 @@ public AutoCloseableIterator read(JsonNode config, ConfiguredAir }); } - public List> getIncrementalIterators(JsonNode config, - JdbcDatabase database, - ConfiguredAirbyteCatalog catalog, - Map>> tableNameToTable, - JdbcStateManager stateManager, - Instant emittedAt) { + public List> getIncrementalIterators(SqlDatabase database, + ConfiguredAirbyteCatalog catalog, + Map>> tableNameToTable, + JdbcStateManager stateManager, + Instant emittedAt) { return getSelectedIterators( database, catalog, @@ -162,11 +162,11 @@ public List> getIncrementalIterators(JsonN configuredStream -> configuredStream.getSyncMode().equals(SyncMode.INCREMENTAL)); } - public List> getFullRefreshIterators(JdbcDatabase database, - ConfiguredAirbyteCatalog catalog, - Map>> tableNameToTable, - JdbcStateManager stateManager, - Instant emittedAt) { + public List> getFullRefreshIterators(SqlDatabase database, + ConfiguredAirbyteCatalog catalog, + Map>> tableNameToTable, + JdbcStateManager stateManager, + Instant emittedAt) { return getSelectedIterators( database, catalog, @@ -176,14 +176,12 @@ public List> getFullRefreshIterators(JdbcD configuredStream -> configuredStream.getSyncMode().equals(SyncMode.FULL_REFRESH)); } - // TODO(dchia): Refactor the following functions and objects so they better operate around a Table - // abstraction. Indexes and strings are currently hardcoded all around making code brittle. - private List> getSelectedIterators(JdbcDatabase database, - ConfiguredAirbyteCatalog catalog, - Map>> tableNameToTable, - JdbcStateManager stateManager, - Instant emittedAt, - Predicate selector) { + private List> getSelectedIterators(SqlDatabase database, + ConfiguredAirbyteCatalog catalog, + Map>> tableNameToTable, + JdbcStateManager stateManager, + Instant emittedAt, + Predicate selector) { final List> iteratorList = new ArrayList<>(); for (final ConfiguredAirbyteStream airbyteStream : catalog.getStreams()) { if (selector.test(airbyteStream)) { @@ -208,18 +206,18 @@ private List> getSelectedIterators(JdbcDat return iteratorList; } - private AutoCloseableIterator createReadIterator(JdbcDatabase database, - ConfiguredAirbyteStream airbyteStream, - TableInfo> table, - JdbcStateManager stateManager, - Instant emittedAt) { + private AutoCloseableIterator createReadIterator(SqlDatabase database, + ConfiguredAirbyteStream airbyteStream, + TableInfo> table, + JdbcStateManager stateManager, + Instant emittedAt) { final String streamName = airbyteStream.getStream().getName(); final String namespace = airbyteStream.getStream().getNamespace(); final AirbyteStreamNameNamespacePair pair = new AirbyteStreamNameNamespacePair(streamName, namespace); final Set selectedFieldsInCatalog = CatalogHelpers.getTopLevelFieldNames(airbyteStream); final List selectedDatabaseFields = table.getFields() .stream() - .map(AbstractField::getColumnName) + .map(AbstractField::getName) .filter(selectedFieldsInCatalog::contains) .collect(Collectors.toList()); @@ -239,19 +237,19 @@ private AutoCloseableIterator createReadIterator(JdbcDatabase da final JsonSchemaPrimitive cursorType = IncrementalUtils.getCursorType(airbyteStream, cursorField); iterator = AutoCloseableIterators.transform(autoCloseableIterator -> new StateDecoratingIterator( - autoCloseableIterator, - stateManager, - pair, - cursorField, - cursorOptional.orElse(null), - cursorType), + autoCloseableIterator, + stateManager, + pair, + cursorField, + cursorOptional.orElse(null), + cursorType), airbyteMessageIterator); } else if (airbyteStream.getSyncMode() == SyncMode.FULL_REFRESH) { iterator = getFullRefreshStream(database, streamName, namespace, selectedDatabaseFields, table, emittedAt); } else if (airbyteStream.getSyncMode() == null) { - throw new IllegalArgumentException(String.format("%s requires a source sync mode", AbstractJdbcSource.class)); + throw new IllegalArgumentException(String.format("%s requires a source sync mode", this.getClass())); } else { - throw new IllegalArgumentException(String.format("%s does not support sync mode: %s.", AbstractJdbcSource.class, airbyteStream.getSyncMode())); + throw new IllegalArgumentException(String.format("%s does not support sync mode: %s.", this.getClass(), airbyteStream.getSyncMode())); } final AtomicLong recordCount = new AtomicLong(); @@ -264,28 +262,28 @@ private AutoCloseableIterator createReadIterator(JdbcDatabase da }); } - private AutoCloseableIterator getIncrementalStream(JdbcDatabase database, - ConfiguredAirbyteStream airbyteStream, - List selectedDatabaseFields, - TableInfo> table, - String cursor, - Instant emittedAt) { + private AutoCloseableIterator getIncrementalStream(SqlDatabase database, + ConfiguredAirbyteStream airbyteStream, + List selectedDatabaseFields, + TableInfo> table, + String cursor, + Instant emittedAt) { final String streamName = airbyteStream.getStream().getName(); final String namespace = airbyteStream.getStream().getNamespace(); final String cursorField = IncrementalUtils.getCursorField(airbyteStream); - final JDBCType cursorJdbcType = table.getFields().stream() - .filter(info -> info.getColumnName().equals(cursorField)) - .map(AbstractField::getColumnType) + final T cursorJdbcType = table.getFields().stream() + .filter(info -> info.getName().equals(cursorField)) + .map(AbstractField::getType) .findFirst() .orElseThrow(); - Preconditions.checkState(table.getFields().stream().anyMatch(f -> f.getColumnName().equals(cursorField)), + Preconditions.checkState(table.getFields().stream().anyMatch(f -> f.getName().equals(cursorField)), String.format("Could not find cursor field %s in table %s", cursorField, table.getName())); final AutoCloseableIterator queryIterator = queryTableIncremental( database, selectedDatabaseFields, - table.getSchemaName(), + table.getNameSpace(), table.getName(), cursorField, cursorJdbcType, @@ -294,14 +292,14 @@ private AutoCloseableIterator getIncrementalStream(JdbcDatabase return getMessageIterator(queryIterator, streamName, namespace, emittedAt.toEpochMilli()); } - private AutoCloseableIterator getFullRefreshStream(JdbcDatabase database, - String streamName, - String namespace, - List selectedDatabaseFields, - TableInfo> table, - Instant emittedAt) { + private AutoCloseableIterator getFullRefreshStream(SqlDatabase database, + String streamName, + String namespace, + List selectedDatabaseFields, + TableInfo> table, + Instant emittedAt) { final AutoCloseableIterator queryStream = - queryTableFullRefresh(database, selectedDatabaseFields, table.getSchemaName(), table.getName()); + queryTableFullRefresh(database, selectedDatabaseFields, table.getNameSpace(), table.getName()); return getMessageIterator(queryStream, streamName, namespace, emittedAt.toEpochMilli()); } @@ -309,10 +307,9 @@ private AutoCloseableIterator getFullRefreshStream(JdbcDatabase protected abstract String getFullyQualifiedTableName(String nameSpace, String tableName); - @SuppressWarnings("OptionalUsedAsFieldOrParameterType") - private List> getTables(final SqlDatabase database, final Optional databaseOptional) throws Exception { - final List>> tableInfos = discoverInternal(database, databaseOptional); - final Map> fullyQualifiedTableNameToPrimaryKeys = discoverPrimaryKeys(database, databaseOptional, tableInfos); + private List> getTables(final SqlDatabase database) throws Exception { + final List>> tableInfos = discoverWithoutSystemTables(database); + final Map> fullyQualifiedTableNameToPrimaryKeys = discoverPrimaryKeys(database, tableInfos); return tableInfos.stream() .map(t -> { @@ -329,7 +326,8 @@ private List> getTables(final SqlDatabase database, final Optio final List primaryKeys = fullyQualifiedTableNameToPrimaryKeys.getOrDefault(fullyQualifiedTableName, Collections .emptyList()); - return new TableInfo.TableInfoBuilder().nameSpace(t.getNameSpace()).name(t.getName()).fields(fields).primaryKeys(primaryKeys).build(); + return new TableInfo.TableInfoBuilder().nameSpace(t.getNameSpace()).name(t.getName()).fields(fields).primaryKeys(primaryKeys) + .build(); }) .collect(Collectors.toList()); } @@ -344,24 +342,7 @@ private List> getTables(final SqlDatabase database, final Optio * first, if it doesn't work, we retry one table at a time. */ protected abstract Map> discoverPrimaryKeys(SqlDatabase database, - Optional databaseOptional, - List>> tableInfos); - - /** - * Aggregate list of @param entries of StreamName and PrimaryKey and - * - * @return a map by StreamName to associated list of primary keys - */ - protected Map> aggregatePrimateKeys(List> entries) { - final Map> result = new HashMap<>(); - entries.forEach(entry -> { - if (!result.containsKey(entry.getKey())) { - result.put(entry.getKey(), new ArrayList<>()); - } - result.get(entry.getKey()).add(entry.getValue()); - }); - return result; - } + List>> tableInfos); protected void assertColumnsWithSameNameAreSame(String nameSpace, String tableName, List> columns) { columns.stream() @@ -379,14 +360,21 @@ protected void assertColumnsWithSameNameAreSame(String nameSpace, String tableNa }); } - @SuppressWarnings("OptionalUsedAsFieldOrParameterType") - protected abstract List>> discoverInternal(final SqlDatabase database, final Optional databaseOptional) + protected List>> discoverWithoutSystemTables(final SqlDatabase database) throws Exception { + Set systemNameSpaces = getExcludedInternalNameSpaces(); + List>> discoveredTables = discoverInternal(database); + return (systemNameSpaces == null || systemNameSpaces.isEmpty() ? discoveredTables + : discoveredTables.stream().filter(table -> !systemNameSpaces.contains(table.getNameSpace())).collect( + Collectors.toList())); + } + + protected abstract List>> discoverInternal(final SqlDatabase database) throws Exception; public AutoCloseableIterator getMessageIterator(AutoCloseableIterator recordIterator, - String streamName, - String namespace, - long emittedAt) { + String streamName, + String namespace, + long emittedAt) { return AutoCloseableIterators.transform(recordIterator, r -> new AirbyteMessage() .withType(Type.RECORD) .withRecord(new AirbyteRecordMessage() @@ -396,24 +384,29 @@ public AutoCloseableIterator getMessageIterator(AutoCloseableIte .withData(r))); } - public AutoCloseableIterator queryTableFullRefresh(JdbcDatabase database, - List columnNames, - String schemaName, - String tableName) { - LOGGER.info("Queueing query for table: {}", tableName); + protected abstract String getQuoteString(); + + private String getIdentifierWithQuoting(String identifier) { + return getQuoteString() + identifier + getQuoteString(); + } + + private String enquoteIdentifierList(List identifiers) { + final StringJoiner joiner = new StringJoiner(","); + for (String identifier : identifiers) { + joiner.add(getIdentifierWithQuoting(identifier)); + } + return joiner.toString(); + } + + private String getFullTableName(String nameSpace, String tableName) { + return (nameSpace == null || nameSpace.isEmpty() ? getIdentifierWithQuoting(tableName) + : getIdentifierWithQuoting(nameSpace) + "." + getIdentifierWithQuoting(tableName)); + } + + protected AutoCloseableIterator queryTable(SqlDatabase database, String sqlQuery) { return AutoCloseableIterators.lazyIterator(() -> { try { - final Stream stream = database.query( - connection -> { - LOGGER.info("Preparing query for table: {}", tableName); - final String sql = String.format("SELECT %s FROM %s", - JdbcUtils.enquoteIdentifierList(connection, columnNames), - JdbcUtils.getFullyQualifiedTableNameWithQuoting(connection, schemaName, tableName)); - final PreparedStatement preparedStatement = connection.prepareStatement(sql); - LOGGER.info("Executing query for table: {}", tableName); - return preparedStatement; - }, - JdbcUtils::rowToJson); + final Stream stream = database.query(sqlQuery); return AutoCloseableIterators.fromStream(stream); } catch (SQLException e) { throw new RuntimeException(e); @@ -421,36 +414,32 @@ public AutoCloseableIterator queryTableFullRefresh(JdbcDatabase databa }); } - public AutoCloseableIterator queryTableIncremental(JdbcDatabase database, - List columnNames, - String schemaName, - String tableName, - String cursorField, - JDBCType cursorFieldType, - String cursor) { + public AutoCloseableIterator queryTableFullRefresh(SqlDatabase database, + List columnNames, + String schemaName, + String tableName) { + LOGGER.info("Queueing query for table: {}", tableName); + return queryTable(database, String.format("SELECT %s FROM %s", + enquoteIdentifierList(columnNames), + getFullTableName(schemaName, tableName))); + } + + protected abstract String getCursorValue(T cursorFieldType, String cursor); + + public AutoCloseableIterator queryTableIncremental(SqlDatabase database, + List columnNames, + String schemaName, + String tableName, + String cursorField, + T cursorFieldType, + String cursor) { LOGGER.info("Queueing query for table: {}", tableName); - return AutoCloseableIterators.lazyIterator(() -> { - try { - final Stream stream = database.query( - connection -> { - LOGGER.info("Preparing query for table: {}", tableName); - final String sql = String.format("SELECT %s FROM %s WHERE %s > ?", - JdbcUtils.enquoteIdentifierList(connection, columnNames), - JdbcUtils.getFullyQualifiedTableNameWithQuoting(connection, schemaName, tableName), - JdbcUtils.enquoteIdentifier(connection, cursorField)); - - final PreparedStatement preparedStatement = connection.prepareStatement(sql); - JdbcUtils.setStatementField(preparedStatement, 1, cursorFieldType, cursor); - LOGGER.info("Executing query for table: {}", tableName); - return preparedStatement; - }, - JdbcUtils::rowToJson); - return AutoCloseableIterators.fromStream(stream); - } catch (SQLException e) { - throw new RuntimeException(e); - } - }); + return queryTable(database, String.format("SELECT %s FROM %s WHERE %s > %s", + enquoteIdentifierList(columnNames), + getFullTableName(schemaName, tableName), + getIdentifierWithQuoting(cursorField), + getCursorValue(cursorFieldType, cursor))); } protected abstract SqlDatabase createDatabase(JsonNode config); diff --git a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/RelationalDbColumnInfo.java b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/RelationalDbColumnInfo.java deleted file mode 100644 index 2cf02a02bf7e..000000000000 --- a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/RelationalDbColumnInfo.java +++ /dev/null @@ -1,49 +0,0 @@ -package io.airbyte.integrations.source.relationaldb; - -import java.sql.JDBCType; -import java.util.Objects; - -class RelationalDbColumnInfo extends AbstractField { - - private final String columnName; - private final JDBCType columnType; - - public RelationalDbColumnInfo(String columnName, JDBCType columnType) { - this.columnName = columnName; - this.columnType = columnType; - } - - public String getColumnName() { - return columnName; - } - - public JDBCType getColumnType() { - return columnType; - } - - @Override - public boolean equals(Object o) { - if (this == o) { - return true; - } - if (o == null || getClass() != o.getClass()) { - return false; - } - RelationalDbColumnInfo that = (RelationalDbColumnInfo) o; - return Objects.equals(columnName, that.columnName) && columnType == that.columnType; - } - - @Override - public int hashCode() { - return Objects.hash(columnName, columnType); - } - - @Override - public String toString() { - return "ColumnInfo{" + - "columnName='" + columnName + '\'' + - ", columnType=" + columnType + - '}'; - } - -} diff --git a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/TableInfo.java b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/TableInfo.java index d432a10b3f70..db9d686b2678 100644 --- a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/TableInfo.java +++ b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/TableInfo.java @@ -1,3 +1,27 @@ +/* + * MIT License + * + * Copyright (c) 2020 Airbyte + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + package io.airbyte.integrations.source.relationaldb; import java.util.List; diff --git a/airbyte-protocol/models/src/main/java/io/airbyte/protocol/models/AbstractField.java b/airbyte-protocol/models/src/main/java/io/airbyte/protocol/models/AbstractField.java index 3bbc263f0046..299d4f27a5a6 100644 --- a/airbyte-protocol/models/src/main/java/io/airbyte/protocol/models/AbstractField.java +++ b/airbyte-protocol/models/src/main/java/io/airbyte/protocol/models/AbstractField.java @@ -1,3 +1,27 @@ +/* + * MIT License + * + * Copyright (c) 2020 Airbyte + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + package io.airbyte.protocol.models; import java.util.Objects; @@ -38,4 +62,5 @@ public boolean equals(Object o) { public int hashCode() { return Objects.hash(name, type); } + } From 0aaf90418a55985a06ec1140ecc52ccbbe86e9ce Mon Sep 17 00:00:00 2001 From: Andrii Leonets Date: Mon, 14 Jun 2021 15:07:19 +0300 Subject: [PATCH 03/63] Migrate StateManager to new abstract level (JdbcSource -> RelationalSource) --- .../java/io/airbyte/db}/IncrementalUtils.java | 4 +- .../airbyte/db/jdbc/DefaultJdbcDatabase.java | 6 + .../java/io/airbyte/db/jdbc/JdbcDatabase.java | 3 + .../db/jdbc/StreamingJdbcDatabase.java | 6 + .../io/airbyte/db}/IncrementalUtilsTest.java | 15 +- .../source/clickhouse/ClickHouseSource.java | 2 +- .../connectors/source-jdbc/build.gradle | 22 +- .../source/jdbc/AbstractJdbcSource.java | 577 +++--------------- .../integrations/source/jdbc/JdbcSource.java | 2 +- .../AbstractJdbcSourceAcceptanceTest.java | 2 +- .../source/jdbc/DefaultJdbcStressTest.java | 2 +- .../source/jdbc/JdbcSourceStressTest.java | 2 +- .../jdbc/test/JdbcSourceAcceptanceTest.java | 4 +- .../source/mssql/MssqlSource.java | 2 +- .../connectors/source-mysql/build.gradle | 1 + .../mysql/AirbyteFileOffsetBackingStore.java | 4 +- .../mysql/AirbyteSchemaHistoryStorage.java | 4 +- .../source/mysql/MySqlSource.java | 22 +- .../source/oracle/OracleSource.java | 2 +- .../source/oracle/OracleStressTest.java | 2 +- .../connectors/source-postgres/build.gradle | 1 + .../source/postgres/PostgresSource.java | 22 +- .../source/postgres/PostgresStressTest.java | 2 +- .../source/redshift/RedshiftSource.java | 2 +- .../source-relational-db/build.gradle | 54 +- .../AbstractRelationalDbSource.java | 89 ++- .../source/relationaldb/CdcStateManager.java} | 10 +- .../source/relationaldb/CursorInfo.java | 98 +++ .../StateDecoratingIterator.java | 7 +- .../source/relationaldb/StateManager.java} | 91 +-- .../source/relationaldb/TableInfo.java | 2 +- .../resources/jdbc_models/jdbc_models.yaml | 0 .../StateDecoratingIteratorTest.java | 6 +- .../relationaldb/StateManagerTest.java} | 33 +- .../ScaffoldJavaJdbcSource.java | 2 +- .../SnowflakeSource.java | 2 +- 36 files changed, 353 insertions(+), 752 deletions(-) rename {airbyte-integrations/connectors/source-jdbc/src/main/java/io/airbyte/integrations/source/jdbc => airbyte-db/src/main/java/io/airbyte/db}/IncrementalUtils.java (96%) rename {airbyte-integrations/connectors/source-jdbc/src/test/java/io/airbyte/integrations/source/jdbc => airbyte-db/src/test/java/io/airbyte/db}/IncrementalUtilsTest.java (87%) rename airbyte-integrations/connectors/{source-jdbc/src/main/java/io/airbyte/integrations/source/jdbc/JdbcCdcStateManager.java => source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/CdcStateManager.java} (87%) create mode 100644 airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/CursorInfo.java rename airbyte-integrations/connectors/{source-jdbc/src/main/java/io/airbyte/integrations/source/jdbc => source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb}/StateDecoratingIterator.java (95%) rename airbyte-integrations/connectors/{source-jdbc/src/main/java/io/airbyte/integrations/source/jdbc/JdbcStateManager.java => source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/StateManager.java} (76%) rename airbyte-integrations/connectors/{source-jdbc => source-relational-db}/src/main/resources/jdbc_models/jdbc_models.yaml (100%) rename airbyte-integrations/connectors/{source-jdbc/src/test/java/io/airbyte/integrations/source/jdbc => source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb}/StateDecoratingIteratorTest.java (97%) rename airbyte-integrations/connectors/{source-jdbc/src/test/java/io/airbyte/integrations/source/jdbc/JdbcStateManagerTest.java => source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/StateManagerTest.java} (86%) diff --git a/airbyte-integrations/connectors/source-jdbc/src/main/java/io/airbyte/integrations/source/jdbc/IncrementalUtils.java b/airbyte-db/src/main/java/io/airbyte/db/IncrementalUtils.java similarity index 96% rename from airbyte-integrations/connectors/source-jdbc/src/main/java/io/airbyte/integrations/source/jdbc/IncrementalUtils.java rename to airbyte-db/src/main/java/io/airbyte/db/IncrementalUtils.java index d65c0291ee92..dd3b8b3cd1ee 100644 --- a/airbyte-integrations/connectors/source-jdbc/src/main/java/io/airbyte/integrations/source/jdbc/IncrementalUtils.java +++ b/airbyte-db/src/main/java/io/airbyte/db/IncrementalUtils.java @@ -22,7 +22,7 @@ * SOFTWARE. */ -package io.airbyte.integrations.source.jdbc; +package io.airbyte.db; import io.airbyte.protocol.models.ConfiguredAirbyteStream; import io.airbyte.protocol.models.JsonSchemaPrimitive; @@ -33,7 +33,7 @@ public static String getCursorField(ConfiguredAirbyteStream stream) { if (stream.getCursorField().size() == 0) { throw new IllegalStateException("No cursor field specified for stream attempting to do incremental."); } else if (stream.getCursorField().size() > 1) { - throw new IllegalStateException("JdbcSource does not support nested cursor fields."); + throw new IllegalStateException("Source does not support nested cursor fields."); } else { return stream.getCursorField().get(0); } diff --git a/airbyte-db/src/main/java/io/airbyte/db/jdbc/DefaultJdbcDatabase.java b/airbyte-db/src/main/java/io/airbyte/db/jdbc/DefaultJdbcDatabase.java index 7a9f3d0bf345..cc2040b4ab4f 100644 --- a/airbyte-db/src/main/java/io/airbyte/db/jdbc/DefaultJdbcDatabase.java +++ b/airbyte-db/src/main/java/io/airbyte/db/jdbc/DefaultJdbcDatabase.java @@ -28,6 +28,7 @@ import io.airbyte.commons.functional.CheckedFunction; import java.io.Closeable; import java.sql.Connection; +import java.sql.DatabaseMetaData; import java.sql.PreparedStatement; import java.sql.ResultSet; import java.sql.SQLException; @@ -86,6 +87,11 @@ public Stream resultSetQuery(CheckedFunction query(String sql, String... params) throws SQLException }, JdbcUtils::rowToJson); } + DatabaseMetaData getMetaData() throws SQLException; + } diff --git a/airbyte-db/src/main/java/io/airbyte/db/jdbc/StreamingJdbcDatabase.java b/airbyte-db/src/main/java/io/airbyte/db/jdbc/StreamingJdbcDatabase.java index 1729c5931c38..89d92840a792 100644 --- a/airbyte-db/src/main/java/io/airbyte/db/jdbc/StreamingJdbcDatabase.java +++ b/airbyte-db/src/main/java/io/airbyte/db/jdbc/StreamingJdbcDatabase.java @@ -27,6 +27,7 @@ import io.airbyte.commons.functional.CheckedConsumer; import io.airbyte.commons.functional.CheckedFunction; import java.sql.Connection; +import java.sql.DatabaseMetaData; import java.sql.PreparedStatement; import java.sql.ResultSet; import java.sql.SQLException; @@ -51,6 +52,11 @@ public StreamingJdbcDatabase(DataSource dataSource, JdbcDatabase database, JdbcS this.jdbcStreamingQueryConfiguration = jdbcStreamingQueryConfiguration; } + @Override + public DatabaseMetaData getMetaData() throws SQLException { + return database.getMetaData(); + } + @Override public void execute(CheckedConsumer query) throws SQLException { database.execute(query); diff --git a/airbyte-integrations/connectors/source-jdbc/src/test/java/io/airbyte/integrations/source/jdbc/IncrementalUtilsTest.java b/airbyte-db/src/test/java/io/airbyte/db/IncrementalUtilsTest.java similarity index 87% rename from airbyte-integrations/connectors/source-jdbc/src/test/java/io/airbyte/integrations/source/jdbc/IncrementalUtilsTest.java rename to airbyte-db/src/test/java/io/airbyte/db/IncrementalUtilsTest.java index e5f97287db2e..27c3f2c59ce3 100644 --- a/airbyte-integrations/connectors/source-jdbc/src/test/java/io/airbyte/integrations/source/jdbc/IncrementalUtilsTest.java +++ b/airbyte-db/src/test/java/io/airbyte/db/IncrementalUtilsTest.java @@ -22,9 +22,8 @@ * SOFTWARE. */ -package io.airbyte.integrations.source.jdbc; +package io.airbyte.db; -import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertThrows; import static org.junit.jupiter.api.Assertions.assertTrue; @@ -36,6 +35,7 @@ import io.airbyte.protocol.models.Field; import io.airbyte.protocol.models.JsonSchemaPrimitive; import java.util.Collections; +import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; class IncrementalUtilsTest { @@ -51,12 +51,13 @@ class IncrementalUtilsTest { void testGetCursorField() { final ConfiguredAirbyteStream stream = Jsons.clone(STREAM); stream.setCursorField(Lists.newArrayList(UUID_FIELD_NAME)); - assertEquals(UUID_FIELD_NAME, IncrementalUtils.getCursorField(stream)); + Assertions.assertEquals(UUID_FIELD_NAME, IncrementalUtils.getCursorField(stream)); } @Test void testGetCursorFieldNoCursorFieldSet() { - assertThrows(IllegalStateException.class, () -> assertEquals(UUID_FIELD_NAME, IncrementalUtils.getCursorField(STREAM))); + assertThrows(IllegalStateException.class, () -> Assertions + .assertEquals(UUID_FIELD_NAME, IncrementalUtils.getCursorField(STREAM))); } @Test @@ -68,7 +69,7 @@ void testGetCursorFieldCompositCursor() { @Test void testGetCursorType() { - assertEquals(JsonSchemaPrimitive.STRING, IncrementalUtils.getCursorType(STREAM, UUID_FIELD_NAME)); + Assertions.assertEquals(JsonSchemaPrimitive.STRING, IncrementalUtils.getCursorType(STREAM, UUID_FIELD_NAME)); } @Test @@ -93,13 +94,13 @@ void testGetCursorTypeCursorHasNoType() { @Test void testCompareCursors() { assertTrue(IncrementalUtils.compareCursors("abc", "def", JsonSchemaPrimitive.STRING) < 0); - assertEquals(0, IncrementalUtils.compareCursors("abc", "abc", JsonSchemaPrimitive.STRING)); + Assertions.assertEquals(0, IncrementalUtils.compareCursors("abc", "abc", JsonSchemaPrimitive.STRING)); assertTrue(IncrementalUtils.compareCursors("1", "2", JsonSchemaPrimitive.NUMBER) < 0); assertTrue(IncrementalUtils.compareCursors("5000000000", "5000000001", JsonSchemaPrimitive.NUMBER) < 0); assertTrue(IncrementalUtils.compareCursors("false", "true", JsonSchemaPrimitive.BOOLEAN) < 0); assertTrue(IncrementalUtils.compareCursors(null, "def", JsonSchemaPrimitive.STRING) < 1); assertTrue(IncrementalUtils.compareCursors("abc", null, JsonSchemaPrimitive.STRING) > 0); - assertEquals(0, IncrementalUtils.compareCursors(null, null, JsonSchemaPrimitive.STRING)); + Assertions.assertEquals(0, IncrementalUtils.compareCursors(null, null, JsonSchemaPrimitive.STRING)); assertThrows(IllegalStateException.class, () -> IncrementalUtils.compareCursors("a", "a", JsonSchemaPrimitive.ARRAY)); assertThrows(IllegalStateException.class, () -> IncrementalUtils.compareCursors("a", "a", JsonSchemaPrimitive.OBJECT)); assertThrows(IllegalStateException.class, () -> IncrementalUtils.compareCursors("a", "a", JsonSchemaPrimitive.NULL)); diff --git a/airbyte-integrations/connectors/source-clickhouse/src/main/java/io/airbyte/integrations/source/clickhouse/ClickHouseSource.java b/airbyte-integrations/connectors/source-clickhouse/src/main/java/io/airbyte/integrations/source/clickhouse/ClickHouseSource.java index cf5bc56c370c..f322de8cb8a2 100644 --- a/airbyte-integrations/connectors/source-clickhouse/src/main/java/io/airbyte/integrations/source/clickhouse/ClickHouseSource.java +++ b/airbyte-integrations/connectors/source-clickhouse/src/main/java/io/airbyte/integrations/source/clickhouse/ClickHouseSource.java @@ -104,7 +104,7 @@ public JsonNode toJdbcConfig(JsonNode config) { } @Override - public Set getExcludedInternalSchemas() { + public Set getExcludedInternalNameSpaces() { return Collections.singleton("system"); } diff --git a/airbyte-integrations/connectors/source-jdbc/build.gradle b/airbyte-integrations/connectors/source-jdbc/build.gradle index 4408897ace92..7e384bf074f8 100644 --- a/airbyte-integrations/connectors/source-jdbc/build.gradle +++ b/airbyte-integrations/connectors/source-jdbc/build.gradle @@ -1,5 +1,3 @@ -import org.jsonschema2pojo.SourceType - plugins { id 'application' id 'airbyte-docker' @@ -7,7 +5,6 @@ plugins { id "java-library" // https://docs.gradle.org/current/userguide/java_testing.html#sec:java_test_fixtures id "java-test-fixtures" - id 'com.github.eirnym.js2p' version '1.0' } application { @@ -24,6 +21,7 @@ dependencies { implementation project(':airbyte-db') implementation project(':airbyte-integrations:bases:base-java') implementation project(':airbyte-protocol:models') + implementation project(':airbyte-integrations:connectors:source-relational-db') implementation 'org.apache.commons:commons-lang3:3.11' @@ -38,7 +36,7 @@ dependencies { testFixturesImplementation project(':airbyte-protocol:models') testFixturesImplementation project(':airbyte-db') testFixturesImplementation project(':airbyte-integrations:bases:base-java') - testFixturesImplementation project(':airbyte-integrations:connectors:source-jdbc') +// testFixturesImplementation project(':airbyte-integrations:connectors:source-jdbc') // todo (cgardens) - the java-test-fixtures plugin doesn't by default extend from test. // we cannot make it depend on the dependencies of source-jdbc:test, because source-jdbc:test @@ -52,18 +50,4 @@ dependencies { testFixturesImplementation group: 'org.mockito', name: 'mockito-junit-jupiter', version: '3.4.6' implementation files(project(':airbyte-integrations:bases:base-java').airbyteDocker.outputs) -} - -jsonSchema2Pojo { - sourceType = SourceType.YAMLSCHEMA - source = files("${sourceSets.main.output.resourcesDir}/jdbc_models") - targetDirectory = new File(project.buildDir, 'generated/src/gen/java/') - removeOldOutput = true - - targetPackage = 'io.airbyte.integrations.source.jdbc.models' - - useLongIntegers = true - generateBuilders = true - includeConstructors = false - includeSetters = true -} +} \ No newline at end of file diff --git a/airbyte-integrations/connectors/source-jdbc/src/main/java/io/airbyte/integrations/source/jdbc/AbstractJdbcSource.java b/airbyte-integrations/connectors/source-jdbc/src/main/java/io/airbyte/integrations/source/jdbc/AbstractJdbcSource.java index e7c86ffd8684..c7a3b8306d3e 100644 --- a/airbyte-integrations/connectors/source-jdbc/src/main/java/io/airbyte/integrations/source/jdbc/AbstractJdbcSource.java +++ b/airbyte-integrations/connectors/source-jdbc/src/main/java/io/airbyte/integrations/source/jdbc/AbstractJdbcSource.java @@ -25,62 +25,39 @@ package io.airbyte.integrations.source.jdbc; import com.fasterxml.jackson.databind.JsonNode; -import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; -import com.google.common.collect.Lists; import io.airbyte.commons.functional.CheckedConsumer; import io.airbyte.commons.json.Jsons; -import io.airbyte.commons.lang.Exceptions; -import io.airbyte.commons.type.Types; import io.airbyte.commons.util.AutoCloseableIterator; import io.airbyte.commons.util.AutoCloseableIterators; import io.airbyte.db.Databases; import io.airbyte.db.jdbc.JdbcDatabase; import io.airbyte.db.jdbc.JdbcStreamingQueryConfiguration; import io.airbyte.db.jdbc.JdbcUtils; -import io.airbyte.integrations.BaseConnector; -import io.airbyte.integrations.base.AirbyteStreamNameNamespacePair; import io.airbyte.integrations.base.Source; -import io.airbyte.integrations.source.jdbc.models.JdbcState; -import io.airbyte.protocol.models.AirbyteCatalog; -import io.airbyte.protocol.models.AirbyteConnectionStatus; -import io.airbyte.protocol.models.AirbyteConnectionStatus.Status; -import io.airbyte.protocol.models.AirbyteMessage; -import io.airbyte.protocol.models.AirbyteMessage.Type; -import io.airbyte.protocol.models.AirbyteRecordMessage; -import io.airbyte.protocol.models.AirbyteStream; -import io.airbyte.protocol.models.CatalogHelpers; -import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; -import io.airbyte.protocol.models.ConfiguredAirbyteStream; -import io.airbyte.protocol.models.Field; +import io.airbyte.integrations.source.relationaldb.AbstractRelationalDbSource; +import io.airbyte.integrations.source.relationaldb.TableInfo; +import io.airbyte.protocol.models.AbstractField; import io.airbyte.protocol.models.JsonSchemaPrimitive; -import io.airbyte.protocol.models.SyncMode; import java.sql.JDBCType; import java.sql.PreparedStatement; import java.sql.SQLException; -import java.time.Instant; import java.util.AbstractMap.SimpleImmutableEntry; import java.util.ArrayList; -import java.util.Collection; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; -import java.util.Objects; -import java.util.Optional; import java.util.Set; -import java.util.concurrent.atomic.AtomicLong; -import java.util.function.Function; -import java.util.function.Predicate; import java.util.stream.Collectors; import java.util.stream.Stream; import org.apache.commons.lang3.tuple.ImmutablePair; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -public abstract class AbstractJdbcSource extends BaseConnector implements Source { +public abstract class AbstractJdbcSource extends AbstractRelationalDbSource implements Source { private static final Logger LOGGER = LoggerFactory.getLogger(AbstractJdbcSource.class); @@ -103,6 +80,8 @@ public abstract class AbstractJdbcSource extends BaseConnector implements Source private final String driverClass; private final JdbcStreamingQueryConfiguration jdbcStreamingQueryConfiguration; + private JsonNode jdbcConfig; + private String quoteString; public AbstractJdbcSource(final String driverClass, final JdbcStreamingQueryConfiguration jdbcStreamingQueryConfiguration) { this.driverClass = driverClass; @@ -118,28 +97,8 @@ public AbstractJdbcSource(final String driverClass, final JdbcStreamingQueryConf */ public abstract JsonNode toJdbcConfig(JsonNode config); - /** - * Set of schemas that are internal to the database (e.g. system schemas) and should not be included - * in the catalog. - * - * @return set of schemas to be ignored. - */ - public abstract Set getExcludedInternalSchemas(); - - @Override - public AirbyteConnectionStatus check(JsonNode config) { - try (final JdbcDatabase database = createDatabase(config)) { - for (CheckedConsumer checkOperation : getCheckOperations(config)) { - checkOperation.accept(database); - } - - return new AirbyteConnectionStatus().withStatus(Status.SUCCEEDED); - } catch (Exception e) { - LOGGER.info("Exception while checking connection: ", e); - return new AirbyteConnectionStatus() - .withStatus(Status.FAILED) - .withMessage("Could not connect with provided configuration. Error: " + e.getMessage()); - } + public JsonNode getJdbcConfig() { + return jdbcConfig; } /** @@ -154,276 +113,6 @@ public List> getCheckOperations(JsonNod }); } - @Override - public AirbyteCatalog discover(JsonNode config) throws Exception { - try (final JdbcDatabase database = createDatabase(config)) { - Optional databaseName = Optional.ofNullable(config.get("database")).map(JsonNode::asText); - List streams = getTables(database, databaseName).stream() - .map(tableInfo -> CatalogHelpers.createAirbyteStream(tableInfo.getName(), tableInfo.getSchemaName(), tableInfo.getFields()) - .withSupportedSyncModes(Lists.newArrayList(SyncMode.FULL_REFRESH, SyncMode.INCREMENTAL)) - .withSourceDefinedPrimaryKey(Types.boxToListofList(tableInfo.getPrimaryKeys()))) - .collect(Collectors.toList()); - return new AirbyteCatalog().withStreams(streams); - } - } - - @Override - public AutoCloseableIterator read(JsonNode config, ConfiguredAirbyteCatalog catalog, JsonNode state) throws Exception { - final JdbcStateManager stateManager = new JdbcStateManager( - state == null ? JdbcStateManager.emptyState() : Jsons.object(state, JdbcState.class), - catalog); - final Instant emittedAt = Instant.now(); - - final JdbcDatabase database = createDatabase(config); - - final Map fullyQualifiedTableNameToInfo = - discoverInternal(database, Optional.ofNullable(config.get("database")).map(JsonNode::asText)) - .stream() - .collect(Collectors.toMap(t -> String.format("%s.%s", t.getSchemaName(), t.getName()), Function.identity())); - - final List> incrementalIterators = - getIncrementalIterators(config, database, catalog, fullyQualifiedTableNameToInfo, stateManager, emittedAt); - final List> fullRefreshIterators = - getFullRefreshIterators(database, catalog, fullyQualifiedTableNameToInfo, stateManager, emittedAt); - final List> iteratorList = Stream.of(incrementalIterators, fullRefreshIterators) - .flatMap(Collection::stream) - .collect(Collectors.toList()); - - return AutoCloseableIterators.appendOnClose(AutoCloseableIterators.concatWithEagerClose(iteratorList), () -> { - LOGGER.info("Closing database connection pool."); - Exceptions.toRuntime(database::close); - LOGGER.info("Closed database connection pool."); - }); - } - - public List> getIncrementalIterators(JsonNode config, - JdbcDatabase database, - ConfiguredAirbyteCatalog catalog, - Map tableNameToTable, - JdbcStateManager stateManager, - Instant emittedAt) { - return getSelectedIterators( - database, - catalog, - tableNameToTable, - stateManager, - emittedAt, - configuredStream -> configuredStream.getSyncMode().equals(SyncMode.INCREMENTAL)); - } - - public List> getFullRefreshIterators(JdbcDatabase database, - ConfiguredAirbyteCatalog catalog, - Map tableNameToTable, - JdbcStateManager stateManager, - Instant emittedAt) { - return getSelectedIterators( - database, - catalog, - tableNameToTable, - stateManager, - emittedAt, - configuredStream -> configuredStream.getSyncMode().equals(SyncMode.FULL_REFRESH)); - } - - // TODO(dchia): Refactor the following functions and objects so they better operate around a Table - // abstraction. Indexes and strings are currently hardcoded all around making code brittle. - private List> getSelectedIterators(JdbcDatabase database, - ConfiguredAirbyteCatalog catalog, - Map tableNameToTable, - JdbcStateManager stateManager, - Instant emittedAt, - Predicate selector) { - final List> iteratorList = new ArrayList<>(); - for (final ConfiguredAirbyteStream airbyteStream : catalog.getStreams()) { - if (selector.test(airbyteStream)) { - final AirbyteStream stream = airbyteStream.getStream(); - final String fullyQualifiedTableName = JdbcUtils.getFullyQualifiedTableName(stream.getNamespace(), stream.getName()); - if (!tableNameToTable.containsKey(fullyQualifiedTableName)) { - LOGGER.info("Skipping stream {} because it is not in the source", fullyQualifiedTableName); - continue; - } - - final TableInfoInternal table = tableNameToTable.get(fullyQualifiedTableName); - final AutoCloseableIterator tableReadIterator = createReadIterator( - database, - airbyteStream, - table, - stateManager, - emittedAt); - iteratorList.add(tableReadIterator); - } - } - - return iteratorList; - } - - private AutoCloseableIterator createReadIterator(JdbcDatabase database, - ConfiguredAirbyteStream airbyteStream, - TableInfoInternal table, - JdbcStateManager stateManager, - Instant emittedAt) { - final String streamName = airbyteStream.getStream().getName(); - final String namespace = airbyteStream.getStream().getNamespace(); - final AirbyteStreamNameNamespacePair pair = new AirbyteStreamNameNamespacePair(streamName, namespace); - final Set selectedFieldsInCatalog = CatalogHelpers.getTopLevelFieldNames(airbyteStream); - final List selectedDatabaseFields = table.getFields() - .stream() - .map(ColumnInfo::getColumnName) - .filter(selectedFieldsInCatalog::contains) - .collect(Collectors.toList()); - - final AutoCloseableIterator iterator; - if (airbyteStream.getSyncMode() == SyncMode.INCREMENTAL) { - final String cursorField = IncrementalUtils.getCursorField(airbyteStream); - final Optional cursorOptional = stateManager.getCursor(pair); - - final AutoCloseableIterator airbyteMessageIterator; - if (cursorOptional.isPresent()) { - airbyteMessageIterator = getIncrementalStream(database, airbyteStream, selectedDatabaseFields, table, cursorOptional.get(), emittedAt); - } else { - // if no cursor is present then this is the first read for is the same as doing a full refresh read. - airbyteMessageIterator = getFullRefreshStream(database, streamName, namespace, selectedDatabaseFields, table, emittedAt); - } - - final JsonSchemaPrimitive cursorType = IncrementalUtils.getCursorType(airbyteStream, cursorField); - - iterator = AutoCloseableIterators.transform(autoCloseableIterator -> new StateDecoratingIterator( - autoCloseableIterator, - stateManager, - pair, - cursorField, - cursorOptional.orElse(null), - cursorType), - airbyteMessageIterator); - } else if (airbyteStream.getSyncMode() == SyncMode.FULL_REFRESH) { - iterator = getFullRefreshStream(database, streamName, namespace, selectedDatabaseFields, table, emittedAt); - } else if (airbyteStream.getSyncMode() == null) { - throw new IllegalArgumentException(String.format("%s requires a source sync mode", AbstractJdbcSource.class)); - } else { - throw new IllegalArgumentException(String.format("%s does not support sync mode: %s.", AbstractJdbcSource.class, airbyteStream.getSyncMode())); - } - - final AtomicLong recordCount = new AtomicLong(); - return AutoCloseableIterators.transform(iterator, r -> { - final long count = recordCount.incrementAndGet(); - if (count % 10000 == 0) { - LOGGER.info("Reading stream {}. Records read: {}", streamName, count); - } - return r; - }); - } - - private static AutoCloseableIterator getIncrementalStream(JdbcDatabase database, - ConfiguredAirbyteStream airbyteStream, - List selectedDatabaseFields, - TableInfoInternal table, - String cursor, - Instant emittedAt) { - final String streamName = airbyteStream.getStream().getName(); - final String namespace = airbyteStream.getStream().getNamespace(); - final String cursorField = IncrementalUtils.getCursorField(airbyteStream); - final JDBCType cursorJdbcType = table.getFields().stream() - .filter(info -> info.getColumnName().equals(cursorField)) - .map(ColumnInfo::getColumnType) - .findFirst() - .orElseThrow(); - - Preconditions.checkState(table.getFields().stream().anyMatch(f -> f.getColumnName().equals(cursorField)), - String.format("Could not find cursor field %s in table %s", cursorField, table.getName())); - - final AutoCloseableIterator queryIterator = queryTableIncremental( - database, - selectedDatabaseFields, - table.getSchemaName(), - table.getName(), - cursorField, - cursorJdbcType, - cursor); - - return getMessageIterator(queryIterator, streamName, namespace, emittedAt.toEpochMilli()); - } - - private static AutoCloseableIterator getFullRefreshStream(JdbcDatabase database, - String streamName, - String namespace, - List selectedDatabaseFields, - TableInfoInternal table, - Instant emittedAt) { - final AutoCloseableIterator queryStream = - queryTableFullRefresh(database, selectedDatabaseFields, table.getSchemaName(), table.getName()); - return getMessageIterator(queryStream, streamName, namespace, emittedAt.toEpochMilli()); - } - - @SuppressWarnings("OptionalUsedAsFieldOrParameterType") - private List getTables(final JdbcDatabase database, final Optional databaseOptional) throws Exception { - final List tableInfos = discoverInternal(database, databaseOptional); - final Map> fullyQualifiedTableNameToPrimaryKeys = discoverPrimaryKeys(database, databaseOptional, tableInfos); - return tableInfos.stream() - .map(t -> { - // some databases return multiple copies of the same record for a column (e.g. redshift) because - // they have at least once delivery guarantees. we want to dedupe these, but first we check that the - // records are actually the same and provide a good error message if they are not. - assertColumnsWithSameNameAreSame(t.getSchemaName(), t.getName(), t.getFields()); - final List fields = t.getFields() - .stream() - .map(f -> Field.of(f.getColumnName(), JdbcUtils.getType(f.getColumnType()))) - .distinct() - .collect(Collectors.toList()); - final String fullyQualifiedTableName = JdbcUtils.getFullyQualifiedTableName(t.getSchemaName(), t.getName()); - final List primaryKeys = fullyQualifiedTableNameToPrimaryKeys.getOrDefault(fullyQualifiedTableName, Collections.emptyList()); - - return new TableInfo(t.getSchemaName(), t.getName(), fields, primaryKeys); - }) - .collect(Collectors.toList()); - } - - /** - * Discover Primary keys for each table and @return a map of schema.table name to their associated - * list of primary key fields. - * - * When invoking the conn.getMetaData().getPrimaryKeys() function without a table name, it may fail - * on some databases (for example MySql) but works on others (for instance Postgres). To avoid - * making repeated queries to the DB, we try to get all primary keys without specifying a table - * first, if it doesn't work, we retry one table at a time. - */ - protected Map> discoverPrimaryKeys(JdbcDatabase database, - Optional databaseOptional, - List tableInfos) { - try { - // Get all primary keys without specifying a table name - final Map> tablePrimaryKeys = aggregatePrimateKeys(database.bufferedResultSetQuery( - conn -> conn.getMetaData().getPrimaryKeys(databaseOptional.orElse(null), null, null), - r -> { - final String schemaName = - r.getObject(JDBC_COLUMN_SCHEMA_NAME) != null ? r.getString(JDBC_COLUMN_SCHEMA_NAME) : r.getString(JDBC_COLUMN_DATABASE_NAME); - final String streamName = JdbcUtils.getFullyQualifiedTableName(schemaName, r.getString(JDBC_COLUMN_TABLE_NAME)); - final String primaryKey = r.getString(JDBC_COLUMN_COLUMN_NAME); - return new SimpleImmutableEntry<>(streamName, primaryKey); - })); - if (!tablePrimaryKeys.isEmpty()) { - return tablePrimaryKeys; - } - } catch (SQLException e) { - LOGGER.debug(String.format("Could not retrieve primary keys without a table name (%s), retrying", e)); - } - // Get primary keys one table at a time - return tableInfos.stream() - .collect(Collectors.toMap( - tableInfo -> JdbcUtils.getFullyQualifiedTableName(tableInfo.getSchemaName(), tableInfo.getName()), - tableInfo -> { - final String streamName = JdbcUtils.getFullyQualifiedTableName(tableInfo.getSchemaName(), tableInfo.getName()); - try { - final Map> primaryKeys = aggregatePrimateKeys(database.bufferedResultSetQuery( - conn -> conn.getMetaData().getPrimaryKeys(databaseOptional.orElse(null), tableInfo.getSchemaName(), tableInfo.getName()), - r -> new SimpleImmutableEntry<>(streamName, r.getString(JDBC_COLUMN_COLUMN_NAME)))); - return primaryKeys.getOrDefault(streamName, Collections.emptyList()); - } catch (SQLException e) { - LOGGER.error(String.format("Could not retrieve primary keys for %s: %s", streamName, e)); - return Collections.emptyList(); - } - })); - } - /** * Aggregate list of @param entries of StreamName and PrimaryKey and * @@ -440,28 +129,17 @@ private static Map> aggregatePrimateKeys(List columns) { - columns.stream() - .collect(Collectors.groupingBy(ColumnInfo::getColumnName)) - .values() - .forEach(columnsWithSameName -> { - final ColumnInfo comparisonColumn = columnsWithSameName.get(0); - columnsWithSameName.forEach(column -> { - if (!column.equals(comparisonColumn)) { - throw new RuntimeException( - String.format("Found multiple columns with same name: %s in table: %s.%s but the columns are not the same. columns: %s", - comparisonColumn.getColumnName(), schemaName, tableName, columns)); - } - }); - }); + private String getCatalog() { + return (jdbcConfig.has("database") ? jdbcConfig.get("database").asText() : null); + } - @SuppressWarnings("OptionalUsedAsFieldOrParameterType") - private List discoverInternal(final JdbcDatabase database, final Optional databaseOptional) + @Override + public List>> discoverInternal(final JdbcDatabase database) throws Exception { - final Set internalSchemas = new HashSet<>(getExcludedInternalSchemas()); + final Set internalSchemas = new HashSet<>(getExcludedInternalNameSpaces()); return database.bufferedResultSetQuery( - conn -> conn.getMetaData().getColumns(databaseOptional.orElse(null), null, null, null), + conn -> conn.getMetaData().getColumns(getCatalog(), null, null, null), resultSet -> Jsons.jsonNode(ImmutableMap.builder() // we always want a namespace, if we cannot get a schema, use db name. .put(INTERNAL_SCHEMA_NAME, @@ -478,10 +156,10 @@ private List discoverInternal(final JdbcDatabase database, fi .collect(Collectors.groupingBy(t -> ImmutablePair.of(t.get(INTERNAL_SCHEMA_NAME).asText(), t.get(INTERNAL_TABLE_NAME).asText()))) .values() .stream() - .map(fields -> new TableInfoInternal( - fields.get(0).get(INTERNAL_SCHEMA_NAME).asText(), - fields.get(0).get(INTERNAL_TABLE_NAME).asText(), - fields.stream() + .map(fields -> TableInfo.>builder() + .nameSpace(fields.get(0).get(INTERNAL_SCHEMA_NAME).asText()) + .name(fields.get(0).get(INTERNAL_TABLE_NAME).asText()) + .fields(fields.stream() .map(f -> { JDBCType jdbcType; try { @@ -494,58 +172,74 @@ private List discoverInternal(final JdbcDatabase database, fi f.get(INTERNAL_COLUMN_TYPE))); jdbcType = JDBCType.VARCHAR; } - return new ColumnInfo(f.get(INTERNAL_COLUMN_NAME).asText(), jdbcType); + return new AbstractField(f.get(INTERNAL_COLUMN_NAME).asText(), jdbcType) {}; }) - .collect(Collectors.toList()))) + .collect(Collectors.toList())) + .build()) .collect(Collectors.toList()); } - public static AutoCloseableIterator getMessageIterator(AutoCloseableIterator recordIterator, - String streamName, - String namespace, - long emittedAt) { - return AutoCloseableIterators.transform(recordIterator, r -> new AirbyteMessage() - .withType(Type.RECORD) - .withRecord(new AirbyteRecordMessage() - .withStream(streamName) - .withNamespace(namespace) - .withEmittedAt(emittedAt) - .withData(r))); + @Override + protected JsonSchemaPrimitive getType(JDBCType columnType) { + return JdbcUtils.getType(columnType); } - public static AutoCloseableIterator queryTableFullRefresh(JdbcDatabase database, - List columnNames, - String schemaName, - String tableName) { - LOGGER.info("Queueing query for table: {}", tableName); - return AutoCloseableIterators.lazyIterator(() -> { - try { - final Stream stream = database.query( - connection -> { - LOGGER.info("Preparing query for table: {}", tableName); - final String sql = String.format("SELECT %s FROM %s", - JdbcUtils.enquoteIdentifierList(connection, columnNames), - JdbcUtils.getFullyQualifiedTableNameWithQuoting(connection, schemaName, tableName)); - final PreparedStatement preparedStatement = connection.prepareStatement(sql); - LOGGER.info("Executing query for table: {}", tableName); - return preparedStatement; - }, - JdbcUtils::rowToJson); - return AutoCloseableIterators.fromStream(stream); - } catch (SQLException e) { - throw new RuntimeException(e); + @Override + protected String getFullyQualifiedTableName(String nameSpace, String tableName) { + return JdbcUtils.getFullyQualifiedTableName(nameSpace, tableName); + } + + @Override + protected Map> discoverPrimaryKeys(JdbcDatabase database, + List>> tableInfos) { + try { + // Get all primary keys without specifying a table name + final Map> tablePrimaryKeys = aggregatePrimateKeys(database.bufferedResultSetQuery( + conn -> conn.getMetaData().getPrimaryKeys(getCatalog(), null, null), + r -> { + final String schemaName = + r.getObject(JDBC_COLUMN_SCHEMA_NAME) != null ? r.getString(JDBC_COLUMN_SCHEMA_NAME) : r.getString(JDBC_COLUMN_DATABASE_NAME); + final String streamName = JdbcUtils.getFullyQualifiedTableName(schemaName, r.getString(JDBC_COLUMN_TABLE_NAME)); + final String primaryKey = r.getString(JDBC_COLUMN_COLUMN_NAME); + return new SimpleImmutableEntry<>(streamName, primaryKey); + })); + if (!tablePrimaryKeys.isEmpty()) { + return tablePrimaryKeys; } - }); + } catch (SQLException e) { + LOGGER.debug(String.format("Could not retrieve primary keys without a table name (%s), retrying", e)); + } + // Get primary keys one table at a time + return tableInfos.stream() + .collect(Collectors.toMap( + tableInfo -> JdbcUtils.getFullyQualifiedTableName(tableInfo.getNameSpace(), tableInfo.getName()), + tableInfo -> { + final String streamName = JdbcUtils.getFullyQualifiedTableName(tableInfo.getNameSpace(), tableInfo.getName()); + try { + final Map> primaryKeys = aggregatePrimateKeys(database.bufferedResultSetQuery( + conn -> conn.getMetaData().getPrimaryKeys(getCatalog(), tableInfo.getNameSpace(), tableInfo.getName()), + r -> new SimpleImmutableEntry<>(streamName, r.getString(JDBC_COLUMN_COLUMN_NAME)))); + return primaryKeys.getOrDefault(streamName, Collections.emptyList()); + } catch (SQLException e) { + LOGGER.error(String.format("Could not retrieve primary keys for %s: %s", streamName, e)); + return Collections.emptyList(); + } + })); } - public static AutoCloseableIterator queryTableIncremental(JdbcDatabase database, - List columnNames, - String schemaName, - String tableName, - String cursorField, - JDBCType cursorFieldType, - String cursor) { + @Override + protected String getQuoteString() { + return quoteString; + } + @Override + public AutoCloseableIterator queryTableIncremental(JdbcDatabase database, + List columnNames, + String schemaName, + String tableName, + String cursorField, + JDBCType cursorFieldType, + String cursor) { LOGGER.info("Queueing query for table: {}", tableName); return AutoCloseableIterators.lazyIterator(() -> { try { @@ -570,126 +264,21 @@ public static AutoCloseableIterator queryTableIncremental(JdbcDatabase }); } - private JdbcDatabase createDatabase(JsonNode config) { - final JsonNode jdbcConfig = toJdbcConfig(config); + @Override + public JdbcDatabase createDatabase(JsonNode config) throws SQLException { + jdbcConfig = toJdbcConfig(config); - return Databases.createStreamingJdbcDatabase( + JdbcDatabase database = Databases.createStreamingJdbcDatabase( jdbcConfig.get("username").asText(), jdbcConfig.has("password") ? jdbcConfig.get("password").asText() : null, jdbcConfig.get("jdbc_url").asText(), driverClass, jdbcStreamingQueryConfiguration, jdbcConfig.has("connection_properties") ? jdbcConfig.get("connection_properties").asText() : null); - } - - /** - * This class encapsulates all externally relevant Table information. - */ - protected static class TableInfo { - - private final String schemaName; - private final String name; - private final List fields; - private final List primaryKeys; - - public TableInfo(String schemaName, String name, List fields, List primaryKeys) { - this.schemaName = schemaName; - this.name = name; - this.fields = fields; - this.primaryKeys = primaryKeys; - } - - public String getSchemaName() { - return schemaName; - } - - public String getName() { - return name; - } - - public List getFields() { - return fields; - } - - public List getPrimaryKeys() { - return primaryKeys; - } - - } - - /** - * The following two classes are internal data structures to ease managing tables. Any external - * information should be revealed through the {@link TableInfo} class. - */ - public static class TableInfoInternal { - - private final String schemaName; - private final String name; - private final List fields; - - public TableInfoInternal(String schemaName, String tableName, List fields) { - this.schemaName = schemaName; - this.name = tableName; - this.fields = fields; - } - - public String getSchemaName() { - return schemaName; - } - - public String getName() { - return name; - } - - public List getFields() { - return fields; - } - - } - - static class ColumnInfo { - - private final String columnName; - private final JDBCType columnType; - - public ColumnInfo(String columnName, JDBCType columnType) { - this.columnName = columnName; - this.columnType = columnType; - } - - public String getColumnName() { - return columnName; - } - - public JDBCType getColumnType() { - return columnType; - } - - @Override - public boolean equals(Object o) { - if (this == o) { - return true; - } - if (o == null || getClass() != o.getClass()) { - return false; - } - ColumnInfo that = (ColumnInfo) o; - return Objects.equals(columnName, that.columnName) && columnType == that.columnType; - } - @Override - public int hashCode() { - return Objects.hash(columnName, columnType); - } - - @Override - public String toString() { - return "ColumnInfo{" + - "columnName='" + columnName + '\'' + - ", columnType=" + columnType + - '}'; - } + quoteString = database.getMetaData().getIdentifierQuoteString(); + return database; } } diff --git a/airbyte-integrations/connectors/source-jdbc/src/main/java/io/airbyte/integrations/source/jdbc/JdbcSource.java b/airbyte-integrations/connectors/source-jdbc/src/main/java/io/airbyte/integrations/source/jdbc/JdbcSource.java index 5402565b3e55..b98609efdec5 100644 --- a/airbyte-integrations/connectors/source-jdbc/src/main/java/io/airbyte/integrations/source/jdbc/JdbcSource.java +++ b/airbyte-integrations/connectors/source-jdbc/src/main/java/io/airbyte/integrations/source/jdbc/JdbcSource.java @@ -47,7 +47,7 @@ public JsonNode toJdbcConfig(JsonNode config) { } @Override - public Set getExcludedInternalSchemas() { + public Set getExcludedInternalNameSpaces() { return Set.of("information_schema", "pg_catalog", "pg_internal", "catalog_history"); } diff --git a/airbyte-integrations/connectors/source-jdbc/src/test/java/io/airbyte/integrations/source/jdbc/AbstractJdbcSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-jdbc/src/test/java/io/airbyte/integrations/source/jdbc/AbstractJdbcSourceAcceptanceTest.java index 8a29b9013da8..4afab918e110 100644 --- a/airbyte-integrations/connectors/source-jdbc/src/test/java/io/airbyte/integrations/source/jdbc/AbstractJdbcSourceAcceptanceTest.java +++ b/airbyte-integrations/connectors/source-jdbc/src/test/java/io/airbyte/integrations/source/jdbc/AbstractJdbcSourceAcceptanceTest.java @@ -131,7 +131,7 @@ public JsonNode toJdbcConfig(JsonNode config) { } @Override - public Set getExcludedInternalSchemas() { + public Set getExcludedInternalNameSpaces() { return Set.of("information_schema", "pg_catalog", "pg_internal", "catalog_history"); } diff --git a/airbyte-integrations/connectors/source-jdbc/src/test/java/io/airbyte/integrations/source/jdbc/DefaultJdbcStressTest.java b/airbyte-integrations/connectors/source-jdbc/src/test/java/io/airbyte/integrations/source/jdbc/DefaultJdbcStressTest.java index 308c2a5528f1..db975c9328a0 100644 --- a/airbyte-integrations/connectors/source-jdbc/src/test/java/io/airbyte/integrations/source/jdbc/DefaultJdbcStressTest.java +++ b/airbyte-integrations/connectors/source-jdbc/src/test/java/io/airbyte/integrations/source/jdbc/DefaultJdbcStressTest.java @@ -136,7 +136,7 @@ public JsonNode toJdbcConfig(JsonNode config) { } @Override - public Set getExcludedInternalSchemas() { + public Set getExcludedInternalNameSpaces() { return Set.of("information_schema", "pg_catalog", "pg_internal", "catalog_history"); } diff --git a/airbyte-integrations/connectors/source-jdbc/src/test/java/io/airbyte/integrations/source/jdbc/JdbcSourceStressTest.java b/airbyte-integrations/connectors/source-jdbc/src/test/java/io/airbyte/integrations/source/jdbc/JdbcSourceStressTest.java index 8687f16b9453..ba15c7ca054a 100644 --- a/airbyte-integrations/connectors/source-jdbc/src/test/java/io/airbyte/integrations/source/jdbc/JdbcSourceStressTest.java +++ b/airbyte-integrations/connectors/source-jdbc/src/test/java/io/airbyte/integrations/source/jdbc/JdbcSourceStressTest.java @@ -134,7 +134,7 @@ public JsonNode toJdbcConfig(JsonNode config) { } @Override - public Set getExcludedInternalSchemas() { + public Set getExcludedInternalNameSpaces() { return Set.of("information_schema", "pg_catalog", "pg_internal", "catalog_history"); } diff --git a/airbyte-integrations/connectors/source-jdbc/src/testFixtures/java/io/airbyte/integrations/source/jdbc/test/JdbcSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-jdbc/src/testFixtures/java/io/airbyte/integrations/source/jdbc/test/JdbcSourceAcceptanceTest.java index 720ac3553ecb..7872b9f21e2d 100644 --- a/airbyte-integrations/connectors/source-jdbc/src/testFixtures/java/io/airbyte/integrations/source/jdbc/test/JdbcSourceAcceptanceTest.java +++ b/airbyte-integrations/connectors/source-jdbc/src/testFixtures/java/io/airbyte/integrations/source/jdbc/test/JdbcSourceAcceptanceTest.java @@ -43,8 +43,8 @@ import io.airbyte.db.jdbc.JdbcDatabase; import io.airbyte.db.jdbc.JdbcUtils; import io.airbyte.integrations.source.jdbc.AbstractJdbcSource; -import io.airbyte.integrations.source.jdbc.models.JdbcState; -import io.airbyte.integrations.source.jdbc.models.JdbcStreamState; +import io.airbyte.integrations.source.relationaldb.models.JdbcState; +import io.airbyte.integrations.source.relationaldb.models.JdbcStreamState; import io.airbyte.protocol.models.AirbyteCatalog; import io.airbyte.protocol.models.AirbyteConnectionStatus; import io.airbyte.protocol.models.AirbyteConnectionStatus.Status; diff --git a/airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/MssqlSource.java b/airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/MssqlSource.java index 41ae3881a54a..d70d2967eace 100644 --- a/airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/MssqlSource.java +++ b/airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/MssqlSource.java @@ -73,7 +73,7 @@ public JsonNode toJdbcConfig(JsonNode mssqlConfig) { } @Override - public Set getExcludedInternalSchemas() { + public Set getExcludedInternalNameSpaces() { return Set.of( "INFORMATION_SCHEMA", "sys", diff --git a/airbyte-integrations/connectors/source-mysql/build.gradle b/airbyte-integrations/connectors/source-mysql/build.gradle index 10c28b4d418c..2f109d32182a 100644 --- a/airbyte-integrations/connectors/source-mysql/build.gradle +++ b/airbyte-integrations/connectors/source-mysql/build.gradle @@ -13,6 +13,7 @@ dependencies { implementation project(':airbyte-integrations:bases:base-java') implementation project(':airbyte-integrations:connectors:source-jdbc') implementation project(':airbyte-protocol:models') + implementation project(':airbyte-integrations:connectors:source-relational-db') implementation 'io.debezium:debezium-api:1.4.2.Final' implementation 'io.debezium:debezium-connector-mysql:1.4.2.Final' diff --git a/airbyte-integrations/connectors/source-mysql/src/main/java/io/airbyte/integrations/source/mysql/AirbyteFileOffsetBackingStore.java b/airbyte-integrations/connectors/source-mysql/src/main/java/io/airbyte/integrations/source/mysql/AirbyteFileOffsetBackingStore.java index 33f490b7e32c..5677df3491ef 100644 --- a/airbyte-integrations/connectors/source-mysql/src/main/java/io/airbyte/integrations/source/mysql/AirbyteFileOffsetBackingStore.java +++ b/airbyte-integrations/connectors/source-mysql/src/main/java/io/airbyte/integrations/source/mysql/AirbyteFileOffsetBackingStore.java @@ -29,8 +29,8 @@ import com.fasterxml.jackson.databind.JsonNode; import com.google.common.base.Preconditions; import io.airbyte.commons.json.Jsons; -import io.airbyte.integrations.source.jdbc.JdbcStateManager; import io.airbyte.integrations.source.jdbc.models.CdcState; +import io.airbyte.integrations.source.relationaldb.StateManager; import java.io.EOFException; import java.io.IOException; import java.io.ObjectOutputStream; @@ -160,7 +160,7 @@ private void save(Map data) { } } - static AirbyteFileOffsetBackingStore initializeState(JdbcStateManager stateManager) { + static AirbyteFileOffsetBackingStore initializeState(StateManager stateManager) { final Path cdcWorkingDir; try { cdcWorkingDir = Files.createTempDirectory(Path.of("/tmp"), "cdc-state-offset"); diff --git a/airbyte-integrations/connectors/source-mysql/src/main/java/io/airbyte/integrations/source/mysql/AirbyteSchemaHistoryStorage.java b/airbyte-integrations/connectors/source-mysql/src/main/java/io/airbyte/integrations/source/mysql/AirbyteSchemaHistoryStorage.java index 9a1aaf4f708e..d5c9a962f34f 100644 --- a/airbyte-integrations/connectors/source-mysql/src/main/java/io/airbyte/integrations/source/mysql/AirbyteSchemaHistoryStorage.java +++ b/airbyte-integrations/connectors/source-mysql/src/main/java/io/airbyte/integrations/source/mysql/AirbyteSchemaHistoryStorage.java @@ -27,8 +27,8 @@ import static io.airbyte.integrations.source.mysql.MySqlSource.MYSQL_DB_HISTORY; import io.airbyte.commons.json.Jsons; -import io.airbyte.integrations.source.jdbc.JdbcStateManager; import io.airbyte.integrations.source.jdbc.models.CdcState; +import io.airbyte.integrations.source.relationaldb.StateManager; import io.debezium.document.Document; import io.debezium.document.DocumentReader; import io.debezium.document.DocumentWriter; @@ -152,7 +152,7 @@ private void writeToFile(String fileAsString) { } } - static AirbyteSchemaHistoryStorage initializeDBHistory(JdbcStateManager stateManager) { + static AirbyteSchemaHistoryStorage initializeDBHistory(StateManager stateManager) { final Path dbHistoryWorkingDir; try { dbHistoryWorkingDir = Files.createTempDirectory(Path.of("/tmp"), "cdc-db-history"); diff --git a/airbyte-integrations/connectors/source-mysql/src/main/java/io/airbyte/integrations/source/mysql/MySqlSource.java b/airbyte-integrations/connectors/source-mysql/src/main/java/io/airbyte/integrations/source/mysql/MySqlSource.java index 79c772b96a81..45e17b15dab6 100644 --- a/airbyte-integrations/connectors/source-mysql/src/main/java/io/airbyte/integrations/source/mysql/MySqlSource.java +++ b/airbyte-integrations/connectors/source-mysql/src/main/java/io/airbyte/integrations/source/mysql/MySqlSource.java @@ -41,8 +41,10 @@ import io.airbyte.integrations.base.IntegrationRunner; import io.airbyte.integrations.base.Source; import io.airbyte.integrations.source.jdbc.AbstractJdbcSource; -import io.airbyte.integrations.source.jdbc.JdbcStateManager; import io.airbyte.integrations.source.jdbc.models.CdcState; +import io.airbyte.integrations.source.relationaldb.StateManager; +import io.airbyte.integrations.source.relationaldb.TableInfo; +import io.airbyte.protocol.models.AbstractField; import io.airbyte.protocol.models.AirbyteCatalog; import io.airbyte.protocol.models.AirbyteMessage; import io.airbyte.protocol.models.AirbyteMessage.Type; @@ -52,6 +54,7 @@ import io.airbyte.protocol.models.ConfiguredAirbyteStream; import io.airbyte.protocol.models.SyncMode; import io.debezium.engine.ChangeEvent; +import java.sql.JDBCType; import java.time.Instant; import java.util.ArrayList; import java.util.Collections; @@ -221,20 +224,19 @@ private static boolean shouldUseCDC(ConfiguredAirbyteCatalog catalog) { } @Override - public List> getIncrementalIterators(JsonNode config, - JdbcDatabase database, + public List> getIncrementalIterators(JdbcDatabase database, ConfiguredAirbyteCatalog catalog, - Map tableNameToTable, - JdbcStateManager stateManager, + Map>> tableNameToTable, + StateManager stateManager, Instant emittedAt) { - if (isCdc(config) && shouldUseCDC(catalog)) { + if (isCdc(getJdbcConfig()) && shouldUseCDC(catalog)) { LOGGER.info("using CDC: {}", true); // TODO: Figure out how to set the isCDC of stateManager to true. Its always false final AirbyteFileOffsetBackingStore offsetManager = initializeState(stateManager); AirbyteSchemaHistoryStorage schemaHistoryManager = initializeDBHistory(stateManager); - FilteredFileDatabaseHistory.setDatabaseName(config.get("database").asText()); + FilteredFileDatabaseHistory.setDatabaseName(getJdbcConfig().get("database").asText()); final LinkedBlockingQueue> queue = new LinkedBlockingQueue<>(); - final DebeziumRecordPublisher publisher = new DebeziumRecordPublisher(config, catalog, offsetManager, schemaHistoryManager); + final DebeziumRecordPublisher publisher = new DebeziumRecordPublisher(getJdbcConfig(), catalog, offsetManager, schemaHistoryManager); publisher.start(queue); Optional targetFilePosition = TargetFilePosition @@ -288,13 +290,13 @@ public List> getIncrementalIterators(JsonN return Collections.singletonList(messageIteratorWithStateDecorator); } else { LOGGER.info("using CDC: {}", false); - return super.getIncrementalIterators(config, database, catalog, tableNameToTable, stateManager, + return super.getIncrementalIterators(database, catalog, tableNameToTable, stateManager, emittedAt); } } @Override - public Set getExcludedInternalSchemas() { + public Set getExcludedInternalNameSpaces() { return Set.of( "information_schema", "mysql", diff --git a/airbyte-integrations/connectors/source-oracle/src/main/java/io/airbyte/integrations/source/oracle/OracleSource.java b/airbyte-integrations/connectors/source-oracle/src/main/java/io/airbyte/integrations/source/oracle/OracleSource.java index 349d620498cd..67475e6f09fe 100644 --- a/airbyte-integrations/connectors/source-oracle/src/main/java/io/airbyte/integrations/source/oracle/OracleSource.java +++ b/airbyte-integrations/connectors/source-oracle/src/main/java/io/airbyte/integrations/source/oracle/OracleSource.java @@ -62,7 +62,7 @@ public JsonNode toJdbcConfig(JsonNode config) { } @Override - public Set getExcludedInternalSchemas() { + public Set getExcludedInternalNameSpaces() { // need to add SYSTEM too but for that need create another user when creating the container. return Set.of("APEX_040000", "CTXSYS", "SYSTEM", "FLOWS_FILES", "HR", "MDSYS", "OUTLN", "SYS", "XDB"); } diff --git a/airbyte-integrations/connectors/source-oracle/src/test/java/io/airbyte/integrations/source/oracle/OracleStressTest.java b/airbyte-integrations/connectors/source-oracle/src/test/java/io/airbyte/integrations/source/oracle/OracleStressTest.java index 98dd8e759534..b93329571401 100644 --- a/airbyte-integrations/connectors/source-oracle/src/test/java/io/airbyte/integrations/source/oracle/OracleStressTest.java +++ b/airbyte-integrations/connectors/source-oracle/src/test/java/io/airbyte/integrations/source/oracle/OracleStressTest.java @@ -131,7 +131,7 @@ public JsonNode toJdbcConfig(JsonNode config) { } @Override - public Set getExcludedInternalSchemas() { + public Set getExcludedInternalNameSpaces() { // need to add SYSTEM too but for that need create another user when creating the container. return Set.of("APEX_040000", "CTXSYS", "FLOWS_FILES", "HR", "MDSYS", "OUTLN", "SYS", "XDB"); } diff --git a/airbyte-integrations/connectors/source-postgres/build.gradle b/airbyte-integrations/connectors/source-postgres/build.gradle index 65dc6edee667..cd00f01a4963 100644 --- a/airbyte-integrations/connectors/source-postgres/build.gradle +++ b/airbyte-integrations/connectors/source-postgres/build.gradle @@ -13,6 +13,7 @@ dependencies { implementation project(':airbyte-integrations:bases:base-java') implementation project(':airbyte-protocol:models') implementation project(':airbyte-integrations:connectors:source-jdbc') + implementation project(':airbyte-integrations:connectors:source-relational-db') implementation 'org.apache.commons:commons-lang3:3.11' implementation "org.postgresql:postgresql:42.2.18" diff --git a/airbyte-integrations/connectors/source-postgres/src/main/java/io/airbyte/integrations/source/postgres/PostgresSource.java b/airbyte-integrations/connectors/source-postgres/src/main/java/io/airbyte/integrations/source/postgres/PostgresSource.java index 6482eef659eb..6c5f33cdcdcf 100644 --- a/airbyte-integrations/connectors/source-postgres/src/main/java/io/airbyte/integrations/source/postgres/PostgresSource.java +++ b/airbyte-integrations/connectors/source-postgres/src/main/java/io/airbyte/integrations/source/postgres/PostgresSource.java @@ -44,7 +44,9 @@ import io.airbyte.integrations.base.IntegrationRunner; import io.airbyte.integrations.base.Source; import io.airbyte.integrations.source.jdbc.AbstractJdbcSource; -import io.airbyte.integrations.source.jdbc.JdbcStateManager; +import io.airbyte.integrations.source.relationaldb.StateManager; +import io.airbyte.integrations.source.relationaldb.TableInfo; +import io.airbyte.protocol.models.AbstractField; import io.airbyte.protocol.models.AirbyteCatalog; import io.airbyte.protocol.models.AirbyteConnectionStatus; import io.airbyte.protocol.models.AirbyteMessage; @@ -57,6 +59,7 @@ import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; +import java.sql.JDBCType; import java.sql.PreparedStatement; import java.sql.SQLException; import java.time.Instant; @@ -110,7 +113,7 @@ public JsonNode toJdbcConfig(JsonNode config) { } @Override - public Set getExcludedInternalSchemas() { + public Set getExcludedInternalNameSpaces() { return Set.of("information_schema", "pg_catalog", "pg_internal", "catalog_history"); } @@ -198,7 +201,7 @@ private static PgLsn getLsn(JdbcDatabase database) { } } - private AirbyteFileOffsetBackingStore initializeState(JdbcStateManager stateManager) { + private AirbyteFileOffsetBackingStore initializeState(StateManager stateManager) { final Path cdcWorkingDir; try { cdcWorkingDir = Files.createTempDirectory(Path.of("/tmp"), "cdc"); @@ -213,11 +216,10 @@ private AirbyteFileOffsetBackingStore initializeState(JdbcStateManager stateMana } @Override - public List> getIncrementalIterators(JsonNode config, - JdbcDatabase database, + public List> getIncrementalIterators(JdbcDatabase database, ConfiguredAirbyteCatalog catalog, - Map tableNameToTable, - JdbcStateManager stateManager, + Map>> tableNameToTable, + StateManager stateManager, Instant emittedAt) { /** * If a customer sets up a postgres source with cdc parameters (replication_slot and publication) @@ -227,7 +229,7 @@ public List> getIncrementalIterators(JsonN * have a check here as well to make sure that if no table is in INCREMENTAL mode then skip this * part */ - if (isCdc(config)) { + if (isCdc(getJdbcConfig())) { // State works differently in CDC than it does in convention incremental. The state is written to an // offset file that debezium reads from. Then once all records are replicated, we read back that // offset file (which will have been updated by debezium) and set it in the state. There is no @@ -239,7 +241,7 @@ public List> getIncrementalIterators(JsonN final LinkedBlockingQueue> queue = new LinkedBlockingQueue<>(); - final DebeziumRecordPublisher publisher = new DebeziumRecordPublisher(config, catalog, offsetManager); + final DebeziumRecordPublisher publisher = new DebeziumRecordPublisher(getJdbcConfig(), catalog, offsetManager); publisher.start(queue); // handle state machine around pub/sub logic. @@ -273,7 +275,7 @@ public List> getIncrementalIterators(JsonN return Collections.singletonList(messageIteratorWithStateDecorator); } else { - return super.getIncrementalIterators(config, database, catalog, tableNameToTable, stateManager, emittedAt); + return super.getIncrementalIterators(database, catalog, tableNameToTable, stateManager, emittedAt); } } diff --git a/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/PostgresStressTest.java b/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/PostgresStressTest.java index d2e43980f8e0..99c6fc2ca3b4 100644 --- a/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/PostgresStressTest.java +++ b/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/PostgresStressTest.java @@ -136,7 +136,7 @@ public JsonNode toJdbcConfig(JsonNode config) { } @Override - public Set getExcludedInternalSchemas() { + public Set getExcludedInternalNameSpaces() { return Set.of("information_schema", "pg_catalog", "pg_internal", "catalog_history"); } diff --git a/airbyte-integrations/connectors/source-redshift/src/main/java/io/airbyte/integrations/source/redshift/RedshiftSource.java b/airbyte-integrations/connectors/source-redshift/src/main/java/io/airbyte/integrations/source/redshift/RedshiftSource.java index ff2d9b0f9c3f..58f0927839e1 100644 --- a/airbyte-integrations/connectors/source-redshift/src/main/java/io/airbyte/integrations/source/redshift/RedshiftSource.java +++ b/airbyte-integrations/connectors/source-redshift/src/main/java/io/airbyte/integrations/source/redshift/RedshiftSource.java @@ -58,7 +58,7 @@ public JsonNode toJdbcConfig(JsonNode redshiftConfig) { } @Override - public Set getExcludedInternalSchemas() { + public Set getExcludedInternalNameSpaces() { return Set.of("information_schema", "pg_catalog", "pg_internal", "catalog_history"); } diff --git a/airbyte-integrations/connectors/source-relational-db/build.gradle b/airbyte-integrations/connectors/source-relational-db/build.gradle index ec81171769e0..1788edd7993b 100644 --- a/airbyte-integrations/connectors/source-relational-db/build.gradle +++ b/airbyte-integrations/connectors/source-relational-db/build.gradle @@ -1,55 +1,39 @@ +import org.jsonschema2pojo.SourceType + plugins { -// id 'application' -// id 'airbyte-docker' -// id 'airbyte-integration-test-java' id "java-library" - // https://docs.gradle.org/current/userguide/java_testing.html#sec:java_test_fixtures -// id "java-test-fixtures" -// id 'com.github.eirnym.js2p' version '1.0' + id "com.github.eirnym.js2p" version "1.0" } -//application { -// mainClass = 'io.airbyte.integrations.source.jdbc.JdbcSource' -//} - -//project.configurations { -// testFixturesImplementation.extendsFrom implementation -// testFixturesRuntimeOnly.extendsFrom runtimeOnly -//} - dependencies { implementation project(':airbyte-commons') implementation project(':airbyte-db') implementation project(':airbyte-integrations:bases:base-java') implementation project(':airbyte-protocol:models') - implementation project(':airbyte-integrations:connectors:source-jdbc') + implementation project(':airbyte-json-validation') implementation 'org.apache.commons:commons-lang3:3.11' implementation 'org.projectlombok:lombok:1.18.20' + annotationProcessor('org.projectlombok:lombok:1.18.20') testImplementation project(':airbyte-test-utils') testImplementation "org.postgresql:postgresql:42.2.18" testImplementation "org.testcontainers:postgresql:1.15.1" -// integrationTestJavaImplementation project(':airbyte-integrations:bases:standard-source-test') -// integrationTestJavaImplementation "org.testcontainers:postgresql:1.15.1" -// -// testFixturesImplementation project(':airbyte-protocol:models') -// testFixturesImplementation project(':airbyte-db') -// testFixturesImplementation project(':airbyte-integrations:bases:base-java') -// testFixturesImplementation project(':airbyte-integrations:connectors:source-jdbc') - - // todo (cgardens) - the java-test-fixtures plugin doesn't by default extend from test. - // we cannot make it depend on the dependencies of source-jdbc:test, because source-jdbc:test - // is going to depend on these fixtures. need to find a way to get fixtures to inherit the - // common test classes without duplicating them. this should be part of whatever solution we - // decide on for a "test-java-lib". the current implementation is leveraging the existing - // plugin, but we can something different if we don't like this tool. -// testFixturesRuntimeOnly 'org.junit.jupiter:junit-jupiter-engine:5.4.2' -// testFixturesImplementation 'org.junit.jupiter:junit-jupiter-api:5.4.2' -// testFixturesImplementation 'org.junit.jupiter:junit-jupiter-params:5.4.2' -// testFixturesImplementation group: 'org.mockito', name: 'mockito-junit-jupiter', version: '3.4.6' - implementation files(project(':airbyte-integrations:bases:base-java').airbyteDocker.outputs) +} + +jsonSchema2Pojo { + sourceType = SourceType.YAMLSCHEMA + source = files("${sourceSets.main.output.resourcesDir}/jdbc_models") + targetDirectory = new File(project.buildDir, 'generated/src/gen/java/') + removeOldOutput = true + + targetPackage = 'io.airbyte.integrations.source.relationaldb.models' + + useLongIntegers = true + generateBuilders = true + includeConstructors = false + includeSetters = true } \ No newline at end of file diff --git a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/AbstractRelationalDbSource.java b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/AbstractRelationalDbSource.java index 9a7ecb6167d5..0429f78b0454 100644 --- a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/AbstractRelationalDbSource.java +++ b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/AbstractRelationalDbSource.java @@ -33,15 +33,12 @@ import io.airbyte.commons.type.Types; import io.airbyte.commons.util.AutoCloseableIterator; import io.airbyte.commons.util.AutoCloseableIterators; -import io.airbyte.db.jdbc.JdbcUtils; +import io.airbyte.db.IncrementalUtils; import io.airbyte.db.jdbc.SqlDatabase; import io.airbyte.integrations.BaseConnector; import io.airbyte.integrations.base.AirbyteStreamNameNamespacePair; import io.airbyte.integrations.base.Source; -import io.airbyte.integrations.source.jdbc.IncrementalUtils; -import io.airbyte.integrations.source.jdbc.JdbcStateManager; -import io.airbyte.integrations.source.jdbc.StateDecoratingIterator; -import io.airbyte.integrations.source.jdbc.models.JdbcState; +import io.airbyte.integrations.source.relationaldb.models.JdbcState; import io.airbyte.protocol.models.AbstractField; import io.airbyte.protocol.models.AirbyteCatalog; import io.airbyte.protocol.models.AirbyteConnectionStatus; @@ -57,6 +54,7 @@ import io.airbyte.protocol.models.JsonSchemaPrimitive; import io.airbyte.protocol.models.SyncMode; import java.sql.SQLException; +import java.sql.SQLType; import java.time.Instant; import java.util.ArrayList; import java.util.Collection; @@ -74,7 +72,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -public abstract class AbstractRelationalDbSource extends BaseConnector implements Source { +public abstract class AbstractRelationalDbSource extends BaseConnector implements Source { private static final Logger LOGGER = LoggerFactory.getLogger(AbstractRelationalDbSource.class); @@ -82,8 +80,8 @@ public abstract class AbstractRelationalDbSource extends BaseConnector implem @Override public AirbyteConnectionStatus check(JsonNode config) { - try (final SqlDatabase database = createDatabase(config)) { - for (CheckedConsumer checkOperation : getCheckOperations(config)) { + try (final K database = createDatabase(config)) { + for (CheckedConsumer checkOperation : getCheckOperations(config)) { checkOperation.accept(database); } @@ -101,11 +99,11 @@ public AirbyteConnectionStatus check(JsonNode config) { * * @return list of consumers that run queries for the check command. */ - public abstract List> getCheckOperations(JsonNode config) throws Exception; + public abstract List> getCheckOperations(JsonNode config) throws Exception; @Override public AirbyteCatalog discover(JsonNode config) throws Exception { - try (final SqlDatabase database = createDatabase(config)) { + try (final K database = createDatabase(config)) { List streams = getTables(database).stream() .map(tableInfo -> CatalogHelpers .createAirbyteStream(tableInfo.getName(), tableInfo.getNameSpace(), tableInfo.getFields()) @@ -118,12 +116,12 @@ public AirbyteCatalog discover(JsonNode config) throws Exception { @Override public AutoCloseableIterator read(JsonNode config, ConfiguredAirbyteCatalog catalog, JsonNode state) throws Exception { - final JdbcStateManager stateManager = new JdbcStateManager( - state == null ? JdbcStateManager.emptyState() : Jsons.object(state, JdbcState.class), + final StateManager stateManager = new StateManager( + state == null ? StateManager.emptyState() : Jsons.object(state, JdbcState.class), catalog); final Instant emittedAt = Instant.now(); - final SqlDatabase database = createDatabase(config); + final K database = createDatabase(config); final Map>> fullyQualifiedTableNameToInfo = discoverWithoutSystemTables(database) @@ -148,10 +146,10 @@ public AutoCloseableIterator read(JsonNode config, ConfiguredAir }); } - public List> getIncrementalIterators(SqlDatabase database, + public List> getIncrementalIterators(K database, ConfiguredAirbyteCatalog catalog, Map>> tableNameToTable, - JdbcStateManager stateManager, + StateManager stateManager, Instant emittedAt) { return getSelectedIterators( database, @@ -162,10 +160,10 @@ public List> getIncrementalIterators(SqlDa configuredStream -> configuredStream.getSyncMode().equals(SyncMode.INCREMENTAL)); } - public List> getFullRefreshIterators(SqlDatabase database, + public List> getFullRefreshIterators(K database, ConfiguredAirbyteCatalog catalog, Map>> tableNameToTable, - JdbcStateManager stateManager, + StateManager stateManager, Instant emittedAt) { return getSelectedIterators( database, @@ -176,17 +174,17 @@ public List> getFullRefreshIterators(SqlDa configuredStream -> configuredStream.getSyncMode().equals(SyncMode.FULL_REFRESH)); } - private List> getSelectedIterators(SqlDatabase database, + private List> getSelectedIterators(K database, ConfiguredAirbyteCatalog catalog, Map>> tableNameToTable, - JdbcStateManager stateManager, + StateManager stateManager, Instant emittedAt, Predicate selector) { final List> iteratorList = new ArrayList<>(); for (final ConfiguredAirbyteStream airbyteStream : catalog.getStreams()) { if (selector.test(airbyteStream)) { final AirbyteStream stream = airbyteStream.getStream(); - final String fullyQualifiedTableName = JdbcUtils.getFullyQualifiedTableName(stream.getNamespace(), stream.getName()); + final String fullyQualifiedTableName = getFullyQualifiedTableName(stream.getNamespace(), stream.getName()); if (!tableNameToTable.containsKey(fullyQualifiedTableName)) { LOGGER.info("Skipping stream {} because it is not in the source", fullyQualifiedTableName); continue; @@ -206,10 +204,10 @@ private List> getSelectedIterators(SqlData return iteratorList; } - private AutoCloseableIterator createReadIterator(SqlDatabase database, + private AutoCloseableIterator createReadIterator(K database, ConfiguredAirbyteStream airbyteStream, TableInfo> table, - JdbcStateManager stateManager, + StateManager stateManager, Instant emittedAt) { final String streamName = airbyteStream.getStream().getName(); final String namespace = airbyteStream.getStream().getNamespace(); @@ -234,7 +232,8 @@ private AutoCloseableIterator createReadIterator(SqlDatabase dat airbyteMessageIterator = getFullRefreshStream(database, streamName, namespace, selectedDatabaseFields, table, emittedAt); } - final JsonSchemaPrimitive cursorType = IncrementalUtils.getCursorType(airbyteStream, cursorField); + final JsonSchemaPrimitive cursorType = IncrementalUtils + .getCursorType(airbyteStream, cursorField); iterator = AutoCloseableIterators.transform(autoCloseableIterator -> new StateDecoratingIterator( autoCloseableIterator, @@ -262,7 +261,7 @@ private AutoCloseableIterator createReadIterator(SqlDatabase dat }); } - private AutoCloseableIterator getIncrementalStream(SqlDatabase database, + private AutoCloseableIterator getIncrementalStream(K database, ConfiguredAirbyteStream airbyteStream, List selectedDatabaseFields, TableInfo> table, @@ -292,7 +291,7 @@ private AutoCloseableIterator getIncrementalStream(SqlDatabase d return getMessageIterator(queryIterator, streamName, namespace, emittedAt.toEpochMilli()); } - private AutoCloseableIterator getFullRefreshStream(SqlDatabase database, + private AutoCloseableIterator getFullRefreshStream(K database, String streamName, String namespace, List selectedDatabaseFields, @@ -307,7 +306,7 @@ private AutoCloseableIterator getFullRefreshStream(SqlDatabase d protected abstract String getFullyQualifiedTableName(String nameSpace, String tableName); - private List> getTables(final SqlDatabase database) throws Exception { + private List> getTables(final K database) throws Exception { final List>> tableInfos = discoverWithoutSystemTables(database); final Map> fullyQualifiedTableNameToPrimaryKeys = discoverPrimaryKeys(database, tableInfos); @@ -326,7 +325,7 @@ private List> getTables(final SqlDatabase database) throws Exce final List primaryKeys = fullyQualifiedTableNameToPrimaryKeys.getOrDefault(fullyQualifiedTableName, Collections .emptyList()); - return new TableInfo.TableInfoBuilder().nameSpace(t.getNameSpace()).name(t.getName()).fields(fields).primaryKeys(primaryKeys) + return TableInfo.builder().nameSpace(t.getNameSpace()).name(t.getName()).fields(fields).primaryKeys(primaryKeys) .build(); }) .collect(Collectors.toList()); @@ -341,7 +340,7 @@ private List> getTables(final SqlDatabase database) throws Exce * making repeated queries to the DB, we try to get all primary keys without specifying a table * first, if it doesn't work, we retry one table at a time. */ - protected abstract Map> discoverPrimaryKeys(SqlDatabase database, + protected abstract Map> discoverPrimaryKeys(K database, List>> tableInfos); protected void assertColumnsWithSameNameAreSame(String nameSpace, String tableName, List> columns) { @@ -360,7 +359,7 @@ protected void assertColumnsWithSameNameAreSame(String nameSpace, String tableNa }); } - protected List>> discoverWithoutSystemTables(final SqlDatabase database) throws Exception { + protected List>> discoverWithoutSystemTables(final K database) throws Exception { Set systemNameSpaces = getExcludedInternalNameSpaces(); List>> discoveredTables = discoverInternal(database); return (systemNameSpaces == null || systemNameSpaces.isEmpty() ? discoveredTables @@ -368,7 +367,7 @@ protected List>> discoverWithoutSystemTables(final Sq Collectors.toList())); } - protected abstract List>> discoverInternal(final SqlDatabase database) + protected abstract List>> discoverInternal(final K database) throws Exception; public AutoCloseableIterator getMessageIterator(AutoCloseableIterator recordIterator, @@ -403,7 +402,7 @@ private String getFullTableName(String nameSpace, String tableName) { : getIdentifierWithQuoting(nameSpace) + "." + getIdentifierWithQuoting(tableName)); } - protected AutoCloseableIterator queryTable(SqlDatabase database, String sqlQuery) { + protected AutoCloseableIterator queryTable(K database, String sqlQuery) { return AutoCloseableIterators.lazyIterator(() -> { try { final Stream stream = database.query(sqlQuery); @@ -414,7 +413,7 @@ protected AutoCloseableIterator queryTable(SqlDatabase database, Strin }); } - public AutoCloseableIterator queryTableFullRefresh(SqlDatabase database, + public AutoCloseableIterator queryTableFullRefresh(K database, List columnNames, String schemaName, String tableName) { @@ -424,24 +423,14 @@ public AutoCloseableIterator queryTableFullRefresh(SqlDatabase databas getFullTableName(schemaName, tableName))); } - protected abstract String getCursorValue(T cursorFieldType, String cursor); + public abstract AutoCloseableIterator queryTableIncremental(K database, + List columnNames, + String schemaName, + String tableName, + String cursorField, + T cursorFieldType, + String cursor); - public AutoCloseableIterator queryTableIncremental(SqlDatabase database, - List columnNames, - String schemaName, - String tableName, - String cursorField, - T cursorFieldType, - String cursor) { - - LOGGER.info("Queueing query for table: {}", tableName); - return queryTable(database, String.format("SELECT %s FROM %s WHERE %s > %s", - enquoteIdentifierList(columnNames), - getFullTableName(schemaName, tableName), - getIdentifierWithQuoting(cursorField), - getCursorValue(cursorFieldType, cursor))); - } - - protected abstract SqlDatabase createDatabase(JsonNode config); + protected abstract K createDatabase(JsonNode config) throws Exception; } diff --git a/airbyte-integrations/connectors/source-jdbc/src/main/java/io/airbyte/integrations/source/jdbc/JdbcCdcStateManager.java b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/CdcStateManager.java similarity index 87% rename from airbyte-integrations/connectors/source-jdbc/src/main/java/io/airbyte/integrations/source/jdbc/JdbcCdcStateManager.java rename to airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/CdcStateManager.java index 9461c1bfbd24..030d4b7c47bb 100644 --- a/airbyte-integrations/connectors/source-jdbc/src/main/java/io/airbyte/integrations/source/jdbc/JdbcCdcStateManager.java +++ b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/CdcStateManager.java @@ -22,24 +22,24 @@ * SOFTWARE. */ -package io.airbyte.integrations.source.jdbc; +package io.airbyte.integrations.source.relationaldb; import com.google.common.annotations.VisibleForTesting; import io.airbyte.commons.json.Jsons; -import io.airbyte.integrations.source.jdbc.models.CdcState; +import io.airbyte.integrations.source.relationaldb.models.CdcState; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -public class JdbcCdcStateManager { +public class CdcStateManager { - private static final Logger LOGGER = LoggerFactory.getLogger(JdbcStateManager.class); + private static final Logger LOGGER = LoggerFactory.getLogger(StateManager.class); private final CdcState initialState; private CdcState currentState; @VisibleForTesting - JdbcCdcStateManager(CdcState serialized) { + CdcStateManager(CdcState serialized) { this.initialState = serialized; this.currentState = serialized; diff --git a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/CursorInfo.java b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/CursorInfo.java new file mode 100644 index 000000000000..a5a947fde205 --- /dev/null +++ b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/CursorInfo.java @@ -0,0 +1,98 @@ +/* + * MIT License + * + * Copyright (c) 2020 Airbyte + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package io.airbyte.integrations.source.relationaldb; + +import java.util.Objects; + +public class CursorInfo { + + private final String originalCursorField; + private final String originalCursor; + + private final String cursorField; + private String cursor; + + public CursorInfo(String originalCursorField, + String originalCursor, + String cursorField, + String cursor) { + this.originalCursorField = originalCursorField; + this.originalCursor = originalCursor; + this.cursorField = cursorField; + this.cursor = cursor; + } + + public String getOriginalCursorField() { + return originalCursorField; + } + + public String getOriginalCursor() { + return originalCursor; + } + + public String getCursorField() { + return cursorField; + } + + public String getCursor() { + return cursor; + } + + @SuppressWarnings("UnusedReturnValue") + public CursorInfo setCursor(String cursor) { + this.cursor = cursor; + return this; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + CursorInfo that = (CursorInfo) o; + return Objects.equals(originalCursorField, that.originalCursorField) && Objects + .equals(originalCursor, that.originalCursor) + && Objects.equals(cursorField, that.cursorField) && Objects.equals(cursor, that.cursor); + } + + @Override + public int hashCode() { + return Objects.hash(originalCursorField, originalCursor, cursorField, cursor); + } + + @Override + public String toString() { + return "CursorInfo{" + + "originalCursorField='" + originalCursorField + '\'' + + ", originalCursor='" + originalCursor + '\'' + + ", cursorField='" + cursorField + '\'' + + ", cursor='" + cursor + '\'' + + '}'; + } + +} diff --git a/airbyte-integrations/connectors/source-jdbc/src/main/java/io/airbyte/integrations/source/jdbc/StateDecoratingIterator.java b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/StateDecoratingIterator.java similarity index 95% rename from airbyte-integrations/connectors/source-jdbc/src/main/java/io/airbyte/integrations/source/jdbc/StateDecoratingIterator.java rename to airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/StateDecoratingIterator.java index 58b2c979c655..2a7b85f15efd 100644 --- a/airbyte-integrations/connectors/source-jdbc/src/main/java/io/airbyte/integrations/source/jdbc/StateDecoratingIterator.java +++ b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/StateDecoratingIterator.java @@ -22,9 +22,10 @@ * SOFTWARE. */ -package io.airbyte.integrations.source.jdbc; +package io.airbyte.integrations.source.relationaldb; import com.google.common.collect.AbstractIterator; +import io.airbyte.db.IncrementalUtils; import io.airbyte.integrations.base.AirbyteStreamNameNamespacePair; import io.airbyte.protocol.models.AirbyteMessage; import io.airbyte.protocol.models.AirbyteMessage.Type; @@ -39,7 +40,7 @@ public class StateDecoratingIterator extends AbstractIterator im private static final Logger LOGGER = LoggerFactory.getLogger(StateDecoratingIterator.class); private final Iterator messageIterator; - private final JdbcStateManager stateManager; + private final StateManager stateManager; private final AirbyteStreamNameNamespacePair pair; private final String cursorField; private final JsonSchemaPrimitive cursorType; @@ -48,7 +49,7 @@ public class StateDecoratingIterator extends AbstractIterator im private boolean hasEmittedState; public StateDecoratingIterator(Iterator messageIterator, - JdbcStateManager stateManager, + StateManager stateManager, AirbyteStreamNameNamespacePair pair, String cursorField, String initialCursor, diff --git a/airbyte-integrations/connectors/source-jdbc/src/main/java/io/airbyte/integrations/source/jdbc/JdbcStateManager.java b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/StateManager.java similarity index 76% rename from airbyte-integrations/connectors/source-jdbc/src/main/java/io/airbyte/integrations/source/jdbc/JdbcStateManager.java rename to airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/StateManager.java index 5c609c2e0da7..75174ee739d1 100644 --- a/airbyte-integrations/connectors/source-jdbc/src/main/java/io/airbyte/integrations/source/jdbc/JdbcStateManager.java +++ b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/StateManager.java @@ -22,7 +22,7 @@ * SOFTWARE. */ -package io.airbyte.integrations.source.jdbc; +package io.airbyte.integrations.source.relationaldb; import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; @@ -30,8 +30,8 @@ import com.google.common.collect.Lists; import io.airbyte.commons.json.Jsons; import io.airbyte.integrations.base.AirbyteStreamNameNamespacePair; -import io.airbyte.integrations.source.jdbc.models.JdbcState; -import io.airbyte.integrations.source.jdbc.models.JdbcStreamState; +import io.airbyte.integrations.source.relationaldb.models.JdbcState; +import io.airbyte.integrations.source.relationaldb.models.JdbcStreamState; import io.airbyte.protocol.models.AirbyteStateMessage; import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; import io.airbyte.protocol.models.ConfiguredAirbyteStream; @@ -39,7 +39,6 @@ import java.util.HashMap; import java.util.Map; import java.util.Map.Entry; -import java.util.Objects; import java.util.Optional; import java.util.Set; import java.util.stream.Collectors; @@ -49,20 +48,20 @@ /** * Handles the state machine for the state of jdbc source implementations. */ -public class JdbcStateManager { +public class StateManager { - private static final Logger LOGGER = LoggerFactory.getLogger(JdbcStateManager.class); + private static final Logger LOGGER = LoggerFactory.getLogger(StateManager.class); private final Map pairToCursorInfo; private Boolean isCdc; - private final JdbcCdcStateManager cdcStateManager; + private final CdcStateManager cdcStateManager; public static JdbcState emptyState() { return new JdbcState(); } - public JdbcStateManager(JdbcState serialized, ConfiguredAirbyteCatalog catalog) { - this.cdcStateManager = new JdbcCdcStateManager(serialized.getCdcState()); + public StateManager(JdbcState serialized, ConfiguredAirbyteCatalog catalog) { + this.cdcStateManager = new CdcStateManager(serialized.getCdcState()); this.isCdc = serialized.getCdc(); if (serialized.getCdc() == null) { this.isCdc = false; @@ -78,12 +77,12 @@ private static Map createCursorInfoM .map(ConfiguredAirbyteStream::getStream) .map(AirbyteStreamNameNamespacePair::fromAirbyteSteam) .collect(Collectors.toSet()); - allStreamNames.addAll(serialized.getStreams().stream().map(JdbcStateManager::toAirbyteStreamNameNamespacePair).collect(Collectors.toSet())); + allStreamNames.addAll(serialized.getStreams().stream().map(StateManager::toAirbyteStreamNameNamespacePair).collect(Collectors.toSet())); final Map localMap = new HashMap<>(); final Map pairToState = serialized.getStreams() .stream() - .collect(Collectors.toMap(JdbcStateManager::toAirbyteStreamNameNamespacePair, a -> a)); + .collect(Collectors.toMap(StateManager::toAirbyteStreamNameNamespacePair, a -> a)); final Map pairToConfiguredAirbyteStream = catalog.getStreams().stream() .collect(Collectors.toMap(AirbyteStreamNameNamespacePair::fromConfiguredAirbyteSteam, s -> s)); @@ -190,7 +189,7 @@ public void setIsCdc(boolean isCdc) { } } - public JdbcCdcStateManager getCdcStateManager() { + public CdcStateManager getCdcStateManager() { return cdcStateManager; } @@ -214,72 +213,4 @@ private AirbyteStateMessage toState() { return new AirbyteStateMessage().withData(Jsons.jsonNode(jdbcState)); } - @VisibleForTesting - static class CursorInfo { - - private final String originalCursorField; - private final String originalCursor; - - private final String cursorField; - private String cursor; - - public CursorInfo(String originalCursorField, String originalCursor, String cursorField, String cursor) { - this.originalCursorField = originalCursorField; - this.originalCursor = originalCursor; - this.cursorField = cursorField; - this.cursor = cursor; - } - - public String getOriginalCursorField() { - return originalCursorField; - } - - public String getOriginalCursor() { - return originalCursor; - } - - public String getCursorField() { - return cursorField; - } - - public String getCursor() { - return cursor; - } - - @SuppressWarnings("UnusedReturnValue") - public CursorInfo setCursor(String cursor) { - this.cursor = cursor; - return this; - } - - @Override - public boolean equals(Object o) { - if (this == o) { - return true; - } - if (o == null || getClass() != o.getClass()) { - return false; - } - CursorInfo that = (CursorInfo) o; - return Objects.equals(originalCursorField, that.originalCursorField) && Objects.equals(originalCursor, that.originalCursor) - && Objects.equals(cursorField, that.cursorField) && Objects.equals(cursor, that.cursor); - } - - @Override - public int hashCode() { - return Objects.hash(originalCursorField, originalCursor, cursorField, cursor); - } - - @Override - public String toString() { - return "CursorInfo{" + - "originalCursorField='" + originalCursorField + '\'' + - ", originalCursor='" + originalCursor + '\'' + - ", cursorField='" + cursorField + '\'' + - ", cursor='" + cursor + '\'' + - '}'; - } - - } - } diff --git a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/TableInfo.java b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/TableInfo.java index db9d686b2678..c7a3716e6639 100644 --- a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/TableInfo.java +++ b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/TableInfo.java @@ -33,7 +33,7 @@ */ @Getter @Builder -class TableInfo { +public class TableInfo { private final String nameSpace; private final String name; diff --git a/airbyte-integrations/connectors/source-jdbc/src/main/resources/jdbc_models/jdbc_models.yaml b/airbyte-integrations/connectors/source-relational-db/src/main/resources/jdbc_models/jdbc_models.yaml similarity index 100% rename from airbyte-integrations/connectors/source-jdbc/src/main/resources/jdbc_models/jdbc_models.yaml rename to airbyte-integrations/connectors/source-relational-db/src/main/resources/jdbc_models/jdbc_models.yaml diff --git a/airbyte-integrations/connectors/source-jdbc/src/test/java/io/airbyte/integrations/source/jdbc/StateDecoratingIteratorTest.java b/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/StateDecoratingIteratorTest.java similarity index 97% rename from airbyte-integrations/connectors/source-jdbc/src/test/java/io/airbyte/integrations/source/jdbc/StateDecoratingIteratorTest.java rename to airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/StateDecoratingIteratorTest.java index f62efe27d9e8..d65190229fe1 100644 --- a/airbyte-integrations/connectors/source-jdbc/src/test/java/io/airbyte/integrations/source/jdbc/StateDecoratingIteratorTest.java +++ b/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/StateDecoratingIteratorTest.java @@ -22,7 +22,7 @@ * SOFTWARE. */ -package io.airbyte.integrations.source.jdbc; +package io.airbyte.integrations.source.relationaldb; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; @@ -62,13 +62,13 @@ class StateDecoratingIteratorTest { .withData(Jsons.jsonNode(ImmutableMap.of(UUID_FIELD_NAME, "def")))); private static Iterator messageIterator; - private JdbcStateManager stateManager; + private StateManager stateManager; private AirbyteStateMessage stateMessage; @BeforeEach void setup() { messageIterator = MoreIterators.of(RECORD_MESSAGE1, RECORD_MESSAGE2); - stateManager = mock(JdbcStateManager.class); + stateManager = mock(StateManager.class); stateMessage = mock(AirbyteStateMessage.class); when(stateManager.getOriginalCursorField(NAME_NAMESPACE_PAIR)).thenReturn(Optional.empty()); when(stateManager.getOriginalCursor(NAME_NAMESPACE_PAIR)).thenReturn(Optional.empty()); diff --git a/airbyte-integrations/connectors/source-jdbc/src/test/java/io/airbyte/integrations/source/jdbc/JdbcStateManagerTest.java b/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/StateManagerTest.java similarity index 86% rename from airbyte-integrations/connectors/source-jdbc/src/test/java/io/airbyte/integrations/source/jdbc/JdbcStateManagerTest.java rename to airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/StateManagerTest.java index d2af598eec02..a92d22d1e8d5 100644 --- a/airbyte-integrations/connectors/source-jdbc/src/test/java/io/airbyte/integrations/source/jdbc/JdbcStateManagerTest.java +++ b/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/StateManagerTest.java @@ -22,15 +22,14 @@ * SOFTWARE. */ -package io.airbyte.integrations.source.jdbc; +package io.airbyte.integrations.source.relationaldb; import static org.junit.jupiter.api.Assertions.assertEquals; import io.airbyte.commons.json.Jsons; import io.airbyte.integrations.base.AirbyteStreamNameNamespacePair; -import io.airbyte.integrations.source.jdbc.JdbcStateManager.CursorInfo; -import io.airbyte.integrations.source.jdbc.models.JdbcState; -import io.airbyte.integrations.source.jdbc.models.JdbcStreamState; +import io.airbyte.integrations.source.relationaldb.models.JdbcState; +import io.airbyte.integrations.source.relationaldb.models.JdbcStreamState; import io.airbyte.protocol.models.AirbyteStateMessage; import io.airbyte.protocol.models.AirbyteStream; import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; @@ -42,7 +41,7 @@ import org.junit.jupiter.api.Test; import org.testcontainers.shaded.com.google.common.collect.Lists; -class JdbcStateManagerTest { +class StateManagerTest { private static final String NAMESPACE = "public"; private static final String STREAM_NAME1 = "cars"; @@ -57,46 +56,50 @@ class JdbcStateManagerTest { @Test void testCreateCursorInfoCatalogAndStateSameCursorField() { final CursorInfo actual = - JdbcStateManager.createCursorInfoForStream(NAME_NAMESPACE_PAIR1, getState(CURSOR_FIELD1, CURSOR), getCatalog(CURSOR_FIELD1)); + StateManager.createCursorInfoForStream(NAME_NAMESPACE_PAIR1, getState(CURSOR_FIELD1, CURSOR), getCatalog(CURSOR_FIELD1)); assertEquals(new CursorInfo(CURSOR_FIELD1, CURSOR, CURSOR_FIELD1, CURSOR), actual); } @Test void testCreateCursorInfoCatalogAndStateSameCursorFieldButNoCursor() { final CursorInfo actual = - JdbcStateManager.createCursorInfoForStream(NAME_NAMESPACE_PAIR1, getState(CURSOR_FIELD1, null), getCatalog(CURSOR_FIELD1)); + StateManager.createCursorInfoForStream(NAME_NAMESPACE_PAIR1, getState(CURSOR_FIELD1, null), getCatalog(CURSOR_FIELD1)); assertEquals(new CursorInfo(CURSOR_FIELD1, null, CURSOR_FIELD1, null), actual); } @Test void testCreateCursorInfoCatalogAndStateChangeInCursorFieldName() { final CursorInfo actual = - JdbcStateManager.createCursorInfoForStream(NAME_NAMESPACE_PAIR1, getState(CURSOR_FIELD1, CURSOR), getCatalog(CURSOR_FIELD2)); + StateManager.createCursorInfoForStream(NAME_NAMESPACE_PAIR1, getState(CURSOR_FIELD1, CURSOR), getCatalog(CURSOR_FIELD2)); assertEquals(new CursorInfo(CURSOR_FIELD1, CURSOR, CURSOR_FIELD2, null), actual); } @Test void testCreateCursorInfoCatalogAndNoState() { - final CursorInfo actual = JdbcStateManager.createCursorInfoForStream(NAME_NAMESPACE_PAIR1, Optional.empty(), getCatalog(CURSOR_FIELD1)); + final CursorInfo actual = StateManager + .createCursorInfoForStream(NAME_NAMESPACE_PAIR1, Optional.empty(), getCatalog(CURSOR_FIELD1)); assertEquals(new CursorInfo(null, null, CURSOR_FIELD1, null), actual); } @Test void testCreateCursorInfoStateAndNoCatalog() { - final CursorInfo actual = JdbcStateManager.createCursorInfoForStream(NAME_NAMESPACE_PAIR1, getState(CURSOR_FIELD1, CURSOR), Optional.empty()); + final CursorInfo actual = StateManager + .createCursorInfoForStream(NAME_NAMESPACE_PAIR1, getState(CURSOR_FIELD1, CURSOR), Optional.empty()); assertEquals(new CursorInfo(CURSOR_FIELD1, CURSOR, null, null), actual); } // this is what full refresh looks like. @Test void testCreateCursorInfoNoCatalogAndNoState() { - final CursorInfo actual = JdbcStateManager.createCursorInfoForStream(NAME_NAMESPACE_PAIR1, Optional.empty(), Optional.empty()); + final CursorInfo actual = StateManager + .createCursorInfoForStream(NAME_NAMESPACE_PAIR1, Optional.empty(), Optional.empty()); assertEquals(new CursorInfo(null, null, null, null), actual); } @Test void testCreateCursorInfoStateAndCatalogButNoCursorField() { - final CursorInfo actual = JdbcStateManager.createCursorInfoForStream(NAME_NAMESPACE_PAIR1, getState(CURSOR_FIELD1, CURSOR), getCatalog(null)); + final CursorInfo actual = StateManager + .createCursorInfoForStream(NAME_NAMESPACE_PAIR1, getState(CURSOR_FIELD1, CURSOR), getCatalog(null)); assertEquals(new CursorInfo(CURSOR_FIELD1, CURSOR, null, null), actual); } @@ -129,7 +132,7 @@ void testGetters() { new ConfiguredAirbyteStream() .withStream(new AirbyteStream().withName(STREAM_NAME2).withNamespace(NAMESPACE)))); - final JdbcStateManager stateManager = new JdbcStateManager(state, catalog); + final StateManager stateManager = new StateManager(state, catalog); assertEquals(Optional.of(CURSOR_FIELD1), stateManager.getOriginalCursorField(NAME_NAMESPACE_PAIR1)); assertEquals(Optional.of(CURSOR), stateManager.getOriginalCursor(NAME_NAMESPACE_PAIR1)); @@ -155,7 +158,7 @@ void testToState() { new ConfiguredAirbyteStream() .withStream(new AirbyteStream().withName(STREAM_NAME3).withNamespace(NAMESPACE)))); - final JdbcStateManager stateManager = new JdbcStateManager(new JdbcState(), catalog); + final StateManager stateManager = new StateManager(new JdbcState(), catalog); final AirbyteStateMessage expectedFirstEmission = new AirbyteStateMessage() .withData(Jsons.jsonNode(new JdbcState().withStreams(Lists @@ -191,7 +194,7 @@ void testToStateNullCursorField() { .withCursorField(Lists.newArrayList(CURSOR_FIELD1)), new ConfiguredAirbyteStream() .withStream(new AirbyteStream().withName(STREAM_NAME2).withNamespace(NAMESPACE)))); - final JdbcStateManager stateManager = new JdbcStateManager(new JdbcState(), catalog); + final StateManager stateManager = new StateManager(new JdbcState(), catalog); final AirbyteStateMessage expectedFirstEmission = new AirbyteStateMessage() .withData(Jsons.jsonNode(new JdbcState().withStreams(Lists diff --git a/airbyte-integrations/connectors/source-scaffold-java-jdbc/src/main/java/io/airbyte/integrations/source/scaffold_java_jdbc/ScaffoldJavaJdbcSource.java b/airbyte-integrations/connectors/source-scaffold-java-jdbc/src/main/java/io/airbyte/integrations/source/scaffold_java_jdbc/ScaffoldJavaJdbcSource.java index a5a98384194e..a4fb56fdfaf9 100644 --- a/airbyte-integrations/connectors/source-scaffold-java-jdbc/src/main/java/io/airbyte/integrations/source/scaffold_java_jdbc/ScaffoldJavaJdbcSource.java +++ b/airbyte-integrations/connectors/source-scaffold-java-jdbc/src/main/java/io/airbyte/integrations/source/scaffold_java_jdbc/ScaffoldJavaJdbcSource.java @@ -55,7 +55,7 @@ public JsonNode toJdbcConfig(JsonNode aqqConfig) { } @Override - public Set getExcludedInternalSchemas() { + public Set getExcludedInternalNameSpaces() { // TODO Add tables to exaclude, Ex "INFORMATION_SCHEMA", "sys", "spt_fallback_db", etc return Set.of(""); } diff --git a/airbyte-integrations/connectors/source-snowflake/src/main/java/io.airbyte.integrations.source.snowflake/SnowflakeSource.java b/airbyte-integrations/connectors/source-snowflake/src/main/java/io.airbyte.integrations.source.snowflake/SnowflakeSource.java index 080cb80696b0..465af0896b8d 100644 --- a/airbyte-integrations/connectors/source-snowflake/src/main/java/io.airbyte.integrations.source.snowflake/SnowflakeSource.java +++ b/airbyte-integrations/connectors/source-snowflake/src/main/java/io.airbyte.integrations.source.snowflake/SnowflakeSource.java @@ -67,7 +67,7 @@ public JsonNode toJdbcConfig(JsonNode config) { } @Override - public Set getExcludedInternalSchemas() { + public Set getExcludedInternalNameSpaces() { return Set.of( "INFORMATION_SCHEMA"); } From 6dcedf53e333432cf84f95a64124325958a49fd5 Mon Sep 17 00:00:00 2001 From: Andrii Leonets Date: Mon, 14 Jun 2021 15:50:43 +0300 Subject: [PATCH 04/63] fix imports --- .../source/mysql/AirbyteFileOffsetBackingStore.java | 2 +- .../integrations/source/mysql/AirbyteSchemaHistoryStorage.java | 2 +- .../java/io/airbyte/integrations/source/mysql/MySqlSource.java | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/airbyte-integrations/connectors/source-mysql/src/main/java/io/airbyte/integrations/source/mysql/AirbyteFileOffsetBackingStore.java b/airbyte-integrations/connectors/source-mysql/src/main/java/io/airbyte/integrations/source/mysql/AirbyteFileOffsetBackingStore.java index 5677df3491ef..23a3a07c5226 100644 --- a/airbyte-integrations/connectors/source-mysql/src/main/java/io/airbyte/integrations/source/mysql/AirbyteFileOffsetBackingStore.java +++ b/airbyte-integrations/connectors/source-mysql/src/main/java/io/airbyte/integrations/source/mysql/AirbyteFileOffsetBackingStore.java @@ -29,8 +29,8 @@ import com.fasterxml.jackson.databind.JsonNode; import com.google.common.base.Preconditions; import io.airbyte.commons.json.Jsons; -import io.airbyte.integrations.source.jdbc.models.CdcState; import io.airbyte.integrations.source.relationaldb.StateManager; +import io.airbyte.integrations.source.relationaldb.models.CdcState; import java.io.EOFException; import java.io.IOException; import java.io.ObjectOutputStream; diff --git a/airbyte-integrations/connectors/source-mysql/src/main/java/io/airbyte/integrations/source/mysql/AirbyteSchemaHistoryStorage.java b/airbyte-integrations/connectors/source-mysql/src/main/java/io/airbyte/integrations/source/mysql/AirbyteSchemaHistoryStorage.java index d5c9a962f34f..a981bc602a3e 100644 --- a/airbyte-integrations/connectors/source-mysql/src/main/java/io/airbyte/integrations/source/mysql/AirbyteSchemaHistoryStorage.java +++ b/airbyte-integrations/connectors/source-mysql/src/main/java/io/airbyte/integrations/source/mysql/AirbyteSchemaHistoryStorage.java @@ -27,8 +27,8 @@ import static io.airbyte.integrations.source.mysql.MySqlSource.MYSQL_DB_HISTORY; import io.airbyte.commons.json.Jsons; -import io.airbyte.integrations.source.jdbc.models.CdcState; import io.airbyte.integrations.source.relationaldb.StateManager; +import io.airbyte.integrations.source.relationaldb.models.CdcState; import io.debezium.document.Document; import io.debezium.document.DocumentReader; import io.debezium.document.DocumentWriter; diff --git a/airbyte-integrations/connectors/source-mysql/src/main/java/io/airbyte/integrations/source/mysql/MySqlSource.java b/airbyte-integrations/connectors/source-mysql/src/main/java/io/airbyte/integrations/source/mysql/MySqlSource.java index 45e17b15dab6..bc87448b6aaa 100644 --- a/airbyte-integrations/connectors/source-mysql/src/main/java/io/airbyte/integrations/source/mysql/MySqlSource.java +++ b/airbyte-integrations/connectors/source-mysql/src/main/java/io/airbyte/integrations/source/mysql/MySqlSource.java @@ -41,9 +41,9 @@ import io.airbyte.integrations.base.IntegrationRunner; import io.airbyte.integrations.base.Source; import io.airbyte.integrations.source.jdbc.AbstractJdbcSource; -import io.airbyte.integrations.source.jdbc.models.CdcState; import io.airbyte.integrations.source.relationaldb.StateManager; import io.airbyte.integrations.source.relationaldb.TableInfo; +import io.airbyte.integrations.source.relationaldb.models.CdcState; import io.airbyte.protocol.models.AbstractField; import io.airbyte.protocol.models.AirbyteCatalog; import io.airbyte.protocol.models.AirbyteMessage; From 6d8e97633005b94d4f45b890738e2b5d23e19ba3 Mon Sep 17 00:00:00 2001 From: Andrii Leonets Date: Tue, 15 Jun 2021 01:41:59 +0300 Subject: [PATCH 05/63] move configs to Database level + fix MySql source --- .../airbyte/db/jdbc/DefaultJdbcDatabase.java | 2 +- .../java/io/airbyte/db/jdbc/JdbcDatabase.java | 26 ++++++++--------- .../java/io/airbyte/db/jdbc/SqlDatabase.java | 25 ++++++++++++++-- .../db/jdbc/StreamingJdbcDatabase.java | 2 +- .../source/clickhouse/ClickHouseSource.java | 2 +- .../source/jdbc/AbstractJdbcSource.java | 29 +++++-------------- .../integrations/source/jdbc/JdbcSource.java | 2 +- .../AbstractJdbcSourceAcceptanceTest.java | 2 +- .../source/jdbc/DefaultJdbcStressTest.java | 2 +- .../source/jdbc/JdbcSourceStressTest.java | 2 +- .../jdbc/test/JdbcSourceAcceptanceTest.java | 2 +- .../source/jdbc/test/JdbcStressTest.java | 2 +- .../source/mssql/MssqlSource.java | 2 +- .../source/mysql/MySqlSource.java | 9 +++--- .../source/oracle/OracleSource.java | 2 +- .../source/oracle/OracleStressTest.java | 2 +- .../source/postgres/PostgresSource.java | 2 +- .../source/postgres/PostgresStressTest.java | 2 +- .../source/redshift/RedshiftSource.java | 2 +- .../AbstractRelationalDbSource.java | 22 ++++++++++++-- .../ScaffoldJavaJdbcSource.java | 2 +- .../SnowflakeSource.java | 2 +- 22 files changed, 84 insertions(+), 61 deletions(-) diff --git a/airbyte-db/src/main/java/io/airbyte/db/jdbc/DefaultJdbcDatabase.java b/airbyte-db/src/main/java/io/airbyte/db/jdbc/DefaultJdbcDatabase.java index cc2040b4ab4f..234362fd3b0a 100644 --- a/airbyte-db/src/main/java/io/airbyte/db/jdbc/DefaultJdbcDatabase.java +++ b/airbyte-db/src/main/java/io/airbyte/db/jdbc/DefaultJdbcDatabase.java @@ -42,7 +42,7 @@ /** * Database object for interacting with a JDBC connection. Can be used for any JDBC compliant db. */ -public class DefaultJdbcDatabase implements JdbcDatabase { +public class DefaultJdbcDatabase extends JdbcDatabase { private static final Logger LOGGER = LoggerFactory.getLogger(DefaultJdbcDatabase.class); diff --git a/airbyte-db/src/main/java/io/airbyte/db/jdbc/JdbcDatabase.java b/airbyte-db/src/main/java/io/airbyte/db/jdbc/JdbcDatabase.java index 0b1fd240aad3..e830ae3efffe 100644 --- a/airbyte-db/src/main/java/io/airbyte/db/jdbc/JdbcDatabase.java +++ b/airbyte-db/src/main/java/io/airbyte/db/jdbc/JdbcDatabase.java @@ -38,7 +38,7 @@ /** * Database object for interacting with a JDBC connection. */ -public interface JdbcDatabase extends SqlDatabase { +public abstract class JdbcDatabase extends SqlDatabase { /** * Execute a database query. @@ -46,14 +46,14 @@ public interface JdbcDatabase extends SqlDatabase { * @param query the query to execute against the database. * @throws SQLException SQL related exceptions. */ - void execute(CheckedConsumer query) throws SQLException; + public abstract void execute(CheckedConsumer query) throws SQLException; @Override - default void execute(String sql) throws SQLException { + public void execute(String sql) throws SQLException { execute(connection -> connection.createStatement().execute(sql)); } - default void executeWithinTransaction(List queries) throws SQLException { + public void executeWithinTransaction(List queries) throws SQLException { execute(connection -> { connection.setAutoCommit(false); for (String s : queries) { @@ -77,8 +77,8 @@ default void executeWithinTransaction(List queries) throws SQLException * @return Result of the query mapped to a list. * @throws SQLException SQL related exceptions. */ - List bufferedResultSetQuery(CheckedFunction query, - CheckedFunction recordTransform) + public abstract List bufferedResultSetQuery(CheckedFunction query, + CheckedFunction recordTransform) throws SQLException; /** @@ -96,8 +96,8 @@ List bufferedResultSetQuery(CheckedFunction Stream resultSetQuery(CheckedFunction query, - CheckedFunction recordTransform) + public abstract Stream resultSetQuery(CheckedFunction query, + CheckedFunction recordTransform) throws SQLException; /** @@ -115,11 +115,11 @@ Stream resultSetQuery(CheckedFunction Stream query(CheckedFunction statementCreator, - CheckedFunction recordTransform) + public abstract Stream query(CheckedFunction statementCreator, + CheckedFunction recordTransform) throws SQLException; - default int queryInt(String sql, String... params) throws SQLException { + public int queryInt(String sql, String... params) throws SQLException { try (Stream q = query(c -> { PreparedStatement statement = c.prepareStatement(sql); int i = 1; @@ -135,7 +135,7 @@ default int queryInt(String sql, String... params) throws SQLException { } @Override - default Stream query(String sql, String... params) throws SQLException { + public Stream query(String sql, String... params) throws SQLException { return query(connection -> { PreparedStatement statement = connection.prepareStatement(sql); int i = 1; @@ -147,6 +147,6 @@ default Stream query(String sql, String... params) throws SQLException }, JdbcUtils::rowToJson); } - DatabaseMetaData getMetaData() throws SQLException; + public abstract DatabaseMetaData getMetaData() throws SQLException; } diff --git a/airbyte-db/src/main/java/io/airbyte/db/jdbc/SqlDatabase.java b/airbyte-db/src/main/java/io/airbyte/db/jdbc/SqlDatabase.java index f1a815bc9e38..1e471441d3e7 100644 --- a/airbyte-db/src/main/java/io/airbyte/db/jdbc/SqlDatabase.java +++ b/airbyte-db/src/main/java/io/airbyte/db/jdbc/SqlDatabase.java @@ -28,10 +28,29 @@ import java.sql.SQLException; import java.util.stream.Stream; -public interface SqlDatabase extends AutoCloseable { +public abstract class SqlDatabase implements AutoCloseable { - void execute(String sql) throws SQLException; + private JsonNode sourceConfig; + private JsonNode databaseConfig; - Stream query(String sql, String... params) throws SQLException; + public abstract void execute(String sql) throws SQLException; + + public abstract Stream query(String sql, String... params) throws SQLException; + + public JsonNode getSourceConfig() { + return sourceConfig; + } + + public void setSourceConfig(JsonNode sourceConfig) { + this.sourceConfig = sourceConfig; + } + + public JsonNode getDatabaseConfig() { + return databaseConfig; + } + + public void setDatabaseConfig(JsonNode databaseConfig) { + this.databaseConfig = databaseConfig; + } } diff --git a/airbyte-db/src/main/java/io/airbyte/db/jdbc/StreamingJdbcDatabase.java b/airbyte-db/src/main/java/io/airbyte/db/jdbc/StreamingJdbcDatabase.java index 89d92840a792..58149efd3be5 100644 --- a/airbyte-db/src/main/java/io/airbyte/db/jdbc/StreamingJdbcDatabase.java +++ b/airbyte-db/src/main/java/io/airbyte/db/jdbc/StreamingJdbcDatabase.java @@ -40,7 +40,7 @@ * allows the developer to specify the correct configuration in order for a * {@link PreparedStatement} to execute as in a streaming / chunked manner. */ -public class StreamingJdbcDatabase implements JdbcDatabase { +public class StreamingJdbcDatabase extends JdbcDatabase { private final DataSource dataSource; private final JdbcDatabase database; diff --git a/airbyte-integrations/connectors/source-clickhouse/src/main/java/io/airbyte/integrations/source/clickhouse/ClickHouseSource.java b/airbyte-integrations/connectors/source-clickhouse/src/main/java/io/airbyte/integrations/source/clickhouse/ClickHouseSource.java index f322de8cb8a2..a2ad615ec91c 100644 --- a/airbyte-integrations/connectors/source-clickhouse/src/main/java/io/airbyte/integrations/source/clickhouse/ClickHouseSource.java +++ b/airbyte-integrations/connectors/source-clickhouse/src/main/java/io/airbyte/integrations/source/clickhouse/ClickHouseSource.java @@ -92,7 +92,7 @@ public ClickHouseSource() { } @Override - public JsonNode toJdbcConfig(JsonNode config) { + public JsonNode toDatabaseConfig(JsonNode config) { return Jsons.jsonNode(ImmutableMap.builder() .put("username", config.get("username").asText()) .put("password", config.get("password").asText()) diff --git a/airbyte-integrations/connectors/source-jdbc/src/main/java/io/airbyte/integrations/source/jdbc/AbstractJdbcSource.java b/airbyte-integrations/connectors/source-jdbc/src/main/java/io/airbyte/integrations/source/jdbc/AbstractJdbcSource.java index c7a3b8306d3e..f301e18049b0 100644 --- a/airbyte-integrations/connectors/source-jdbc/src/main/java/io/airbyte/integrations/source/jdbc/AbstractJdbcSource.java +++ b/airbyte-integrations/connectors/source-jdbc/src/main/java/io/airbyte/integrations/source/jdbc/AbstractJdbcSource.java @@ -35,6 +35,7 @@ import io.airbyte.db.jdbc.JdbcDatabase; import io.airbyte.db.jdbc.JdbcStreamingQueryConfiguration; import io.airbyte.db.jdbc.JdbcUtils; +import io.airbyte.db.jdbc.SqlDatabase; import io.airbyte.integrations.base.Source; import io.airbyte.integrations.source.relationaldb.AbstractRelationalDbSource; import io.airbyte.integrations.source.relationaldb.TableInfo; @@ -80,7 +81,6 @@ public abstract class AbstractJdbcSource extends AbstractRelationalDbSource> aggregatePrimateKeys(List>> discoverInternal(final JdbcDatab throws Exception { final Set internalSchemas = new HashSet<>(getExcludedInternalNameSpaces()); return database.bufferedResultSetQuery( - conn -> conn.getMetaData().getColumns(getCatalog(), null, null, null), + conn -> conn.getMetaData().getColumns(getCatalog(database), null, null, null), resultSet -> Jsons.jsonNode(ImmutableMap.builder() // we always want a namespace, if we cannot get a schema, use db name. .put(INTERNAL_SCHEMA_NAME, @@ -195,7 +182,7 @@ protected Map> discoverPrimaryKeys(JdbcDatabase database, try { // Get all primary keys without specifying a table name final Map> tablePrimaryKeys = aggregatePrimateKeys(database.bufferedResultSetQuery( - conn -> conn.getMetaData().getPrimaryKeys(getCatalog(), null, null), + conn -> conn.getMetaData().getPrimaryKeys(getCatalog(database), null, null), r -> { final String schemaName = r.getObject(JDBC_COLUMN_SCHEMA_NAME) != null ? r.getString(JDBC_COLUMN_SCHEMA_NAME) : r.getString(JDBC_COLUMN_DATABASE_NAME); @@ -217,7 +204,7 @@ protected Map> discoverPrimaryKeys(JdbcDatabase database, final String streamName = JdbcUtils.getFullyQualifiedTableName(tableInfo.getNameSpace(), tableInfo.getName()); try { final Map> primaryKeys = aggregatePrimateKeys(database.bufferedResultSetQuery( - conn -> conn.getMetaData().getPrimaryKeys(getCatalog(), tableInfo.getNameSpace(), tableInfo.getName()), + conn -> conn.getMetaData().getPrimaryKeys(getCatalog(database), tableInfo.getNameSpace(), tableInfo.getName()), r -> new SimpleImmutableEntry<>(streamName, r.getString(JDBC_COLUMN_COLUMN_NAME)))); return primaryKeys.getOrDefault(streamName, Collections.emptyList()); } catch (SQLException e) { @@ -266,7 +253,7 @@ public AutoCloseableIterator queryTableIncremental(JdbcDatabase databa @Override public JdbcDatabase createDatabase(JsonNode config) throws SQLException { - jdbcConfig = toJdbcConfig(config); + JsonNode jdbcConfig = toDatabaseConfig(config); JdbcDatabase database = Databases.createStreamingJdbcDatabase( jdbcConfig.get("username").asText(), @@ -276,7 +263,7 @@ public JdbcDatabase createDatabase(JsonNode config) throws SQLException { jdbcStreamingQueryConfiguration, jdbcConfig.has("connection_properties") ? jdbcConfig.get("connection_properties").asText() : null); - quoteString = database.getMetaData().getIdentifierQuoteString(); + quoteString = (quoteString == null ? database.getMetaData().getIdentifierQuoteString() : quoteString); return database; } diff --git a/airbyte-integrations/connectors/source-jdbc/src/main/java/io/airbyte/integrations/source/jdbc/JdbcSource.java b/airbyte-integrations/connectors/source-jdbc/src/main/java/io/airbyte/integrations/source/jdbc/JdbcSource.java index b98609efdec5..16ed40cd5105 100644 --- a/airbyte-integrations/connectors/source-jdbc/src/main/java/io/airbyte/integrations/source/jdbc/JdbcSource.java +++ b/airbyte-integrations/connectors/source-jdbc/src/main/java/io/airbyte/integrations/source/jdbc/JdbcSource.java @@ -42,7 +42,7 @@ public JdbcSource() { // no-op for JdbcSource since the config it receives is designed to be use for JDBC. @Override - public JsonNode toJdbcConfig(JsonNode config) { + public JsonNode toDatabaseConfig(JsonNode config) { return config; } diff --git a/airbyte-integrations/connectors/source-jdbc/src/test/java/io/airbyte/integrations/source/jdbc/AbstractJdbcSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-jdbc/src/test/java/io/airbyte/integrations/source/jdbc/AbstractJdbcSourceAcceptanceTest.java index 4afab918e110..2f92d4233f41 100644 --- a/airbyte-integrations/connectors/source-jdbc/src/test/java/io/airbyte/integrations/source/jdbc/AbstractJdbcSourceAcceptanceTest.java +++ b/airbyte-integrations/connectors/source-jdbc/src/test/java/io/airbyte/integrations/source/jdbc/AbstractJdbcSourceAcceptanceTest.java @@ -115,7 +115,7 @@ public PostgresTestSource() { } @Override - public JsonNode toJdbcConfig(JsonNode config) { + public JsonNode toDatabaseConfig(JsonNode config) { ImmutableMap.Builder configBuilder = ImmutableMap.builder() .put("username", config.get("username").asText()) .put("jdbc_url", String.format("jdbc:postgresql://%s:%s/%s", diff --git a/airbyte-integrations/connectors/source-jdbc/src/test/java/io/airbyte/integrations/source/jdbc/DefaultJdbcStressTest.java b/airbyte-integrations/connectors/source-jdbc/src/test/java/io/airbyte/integrations/source/jdbc/DefaultJdbcStressTest.java index db975c9328a0..3d1334029f1f 100644 --- a/airbyte-integrations/connectors/source-jdbc/src/test/java/io/airbyte/integrations/source/jdbc/DefaultJdbcStressTest.java +++ b/airbyte-integrations/connectors/source-jdbc/src/test/java/io/airbyte/integrations/source/jdbc/DefaultJdbcStressTest.java @@ -120,7 +120,7 @@ public PostgresTestSource() { } @Override - public JsonNode toJdbcConfig(JsonNode config) { + public JsonNode toDatabaseConfig(JsonNode config) { ImmutableMap.Builder configBuilder = ImmutableMap.builder() .put("username", config.get("username").asText()) .put("jdbc_url", String.format("jdbc:postgresql://%s:%s/%s", diff --git a/airbyte-integrations/connectors/source-jdbc/src/test/java/io/airbyte/integrations/source/jdbc/JdbcSourceStressTest.java b/airbyte-integrations/connectors/source-jdbc/src/test/java/io/airbyte/integrations/source/jdbc/JdbcSourceStressTest.java index ba15c7ca054a..c05ce36f4d3c 100644 --- a/airbyte-integrations/connectors/source-jdbc/src/test/java/io/airbyte/integrations/source/jdbc/JdbcSourceStressTest.java +++ b/airbyte-integrations/connectors/source-jdbc/src/test/java/io/airbyte/integrations/source/jdbc/JdbcSourceStressTest.java @@ -118,7 +118,7 @@ public PostgresTestSource() { } @Override - public JsonNode toJdbcConfig(JsonNode config) { + public JsonNode toDatabaseConfig(JsonNode config) { ImmutableMap.Builder configBuilder = ImmutableMap.builder() .put("username", config.get("username").asText()) .put("jdbc_url", String.format("jdbc:postgresql://%s:%s/%s", diff --git a/airbyte-integrations/connectors/source-jdbc/src/testFixtures/java/io/airbyte/integrations/source/jdbc/test/JdbcSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-jdbc/src/testFixtures/java/io/airbyte/integrations/source/jdbc/test/JdbcSourceAcceptanceTest.java index 7872b9f21e2d..a4aca9236a69 100644 --- a/airbyte-integrations/connectors/source-jdbc/src/testFixtures/java/io/airbyte/integrations/source/jdbc/test/JdbcSourceAcceptanceTest.java +++ b/airbyte-integrations/connectors/source-jdbc/src/testFixtures/java/io/airbyte/integrations/source/jdbc/test/JdbcSourceAcceptanceTest.java @@ -168,7 +168,7 @@ protected String primaryKeyClause(List columns) { public void setup() throws Exception { source = getSource(); config = getConfig(); - final JsonNode jdbcConfig = source.toJdbcConfig(config); + final JsonNode jdbcConfig = source.toDatabaseConfig(config); streamName = TABLE_NAME; diff --git a/airbyte-integrations/connectors/source-jdbc/src/testFixtures/java/io/airbyte/integrations/source/jdbc/test/JdbcStressTest.java b/airbyte-integrations/connectors/source-jdbc/src/testFixtures/java/io/airbyte/integrations/source/jdbc/test/JdbcStressTest.java index 437d6a9d861e..45a26451f68d 100644 --- a/airbyte-integrations/connectors/source-jdbc/src/testFixtures/java/io/airbyte/integrations/source/jdbc/test/JdbcStressTest.java +++ b/airbyte-integrations/connectors/source-jdbc/src/testFixtures/java/io/airbyte/integrations/source/jdbc/test/JdbcStressTest.java @@ -128,7 +128,7 @@ public void setup() throws Exception { streamName = getDefaultSchemaName().map(val -> val + "." + TABLE_NAME).orElse(TABLE_NAME); config = getConfig(); - final JsonNode jdbcConfig = source.toJdbcConfig(config); + final JsonNode jdbcConfig = source.toDatabaseConfig(config); JdbcDatabase database = Databases.createJdbcDatabase( jdbcConfig.get("username").asText(), jdbcConfig.has("password") ? jdbcConfig.get("password").asText() : null, diff --git a/airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/MssqlSource.java b/airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/MssqlSource.java index d70d2967eace..ec48ca2e5159 100644 --- a/airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/MssqlSource.java +++ b/airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/MssqlSource.java @@ -49,7 +49,7 @@ public MssqlSource() { } @Override - public JsonNode toJdbcConfig(JsonNode mssqlConfig) { + public JsonNode toDatabaseConfig(JsonNode mssqlConfig) { List additionalParameters = new ArrayList<>(); final StringBuilder jdbcUrl = new StringBuilder(String.format("jdbc:sqlserver://%s:%s;databaseName=%s;", diff --git a/airbyte-integrations/connectors/source-mysql/src/main/java/io/airbyte/integrations/source/mysql/MySqlSource.java b/airbyte-integrations/connectors/source-mysql/src/main/java/io/airbyte/integrations/source/mysql/MySqlSource.java index bc87448b6aaa..c1d18b9b07a4 100644 --- a/airbyte-integrations/connectors/source-mysql/src/main/java/io/airbyte/integrations/source/mysql/MySqlSource.java +++ b/airbyte-integrations/connectors/source-mysql/src/main/java/io/airbyte/integrations/source/mysql/MySqlSource.java @@ -190,7 +190,7 @@ public AirbyteCatalog discover(JsonNode config) throws Exception { } @Override - public JsonNode toJdbcConfig(JsonNode config) { + public JsonNode toDatabaseConfig(JsonNode config) { final StringBuilder jdbc_url = new StringBuilder(String.format("jdbc:mysql://%s:%s/%s", config.get("host").asText(), config.get("port").asText(), @@ -229,14 +229,15 @@ public List> getIncrementalIterators(JdbcD Map>> tableNameToTable, StateManager stateManager, Instant emittedAt) { - if (isCdc(getJdbcConfig()) && shouldUseCDC(catalog)) { + JsonNode sourceConfig = database.getSourceConfig(); + if (isCdc(sourceConfig) && shouldUseCDC(catalog)) { LOGGER.info("using CDC: {}", true); // TODO: Figure out how to set the isCDC of stateManager to true. Its always false final AirbyteFileOffsetBackingStore offsetManager = initializeState(stateManager); AirbyteSchemaHistoryStorage schemaHistoryManager = initializeDBHistory(stateManager); - FilteredFileDatabaseHistory.setDatabaseName(getJdbcConfig().get("database").asText()); + FilteredFileDatabaseHistory.setDatabaseName(sourceConfig.get("database").asText()); final LinkedBlockingQueue> queue = new LinkedBlockingQueue<>(); - final DebeziumRecordPublisher publisher = new DebeziumRecordPublisher(getJdbcConfig(), catalog, offsetManager, schemaHistoryManager); + final DebeziumRecordPublisher publisher = new DebeziumRecordPublisher(sourceConfig, catalog, offsetManager, schemaHistoryManager); publisher.start(queue); Optional targetFilePosition = TargetFilePosition diff --git a/airbyte-integrations/connectors/source-oracle/src/main/java/io/airbyte/integrations/source/oracle/OracleSource.java b/airbyte-integrations/connectors/source-oracle/src/main/java/io/airbyte/integrations/source/oracle/OracleSource.java index 67475e6f09fe..1c84b3f866d1 100644 --- a/airbyte-integrations/connectors/source-oracle/src/main/java/io/airbyte/integrations/source/oracle/OracleSource.java +++ b/airbyte-integrations/connectors/source-oracle/src/main/java/io/airbyte/integrations/source/oracle/OracleSource.java @@ -46,7 +46,7 @@ public OracleSource() { } @Override - public JsonNode toJdbcConfig(JsonNode config) { + public JsonNode toDatabaseConfig(JsonNode config) { final ImmutableMap.Builder configBuilder = ImmutableMap.builder() .put("username", config.get("username").asText()) .put("jdbc_url", String.format("jdbc:oracle:thin:@//%s:%s/%s", diff --git a/airbyte-integrations/connectors/source-oracle/src/test/java/io/airbyte/integrations/source/oracle/OracleStressTest.java b/airbyte-integrations/connectors/source-oracle/src/test/java/io/airbyte/integrations/source/oracle/OracleStressTest.java index b93329571401..f7d860eb8c48 100644 --- a/airbyte-integrations/connectors/source-oracle/src/test/java/io/airbyte/integrations/source/oracle/OracleStressTest.java +++ b/airbyte-integrations/connectors/source-oracle/src/test/java/io/airbyte/integrations/source/oracle/OracleStressTest.java @@ -115,7 +115,7 @@ public OracleTestSource() { } @Override - public JsonNode toJdbcConfig(JsonNode config) { + public JsonNode toDatabaseConfig(JsonNode config) { final ImmutableMap.Builder configBuilder = ImmutableMap.builder() .put("username", config.get("username").asText()) .put("jdbc_url", String.format("jdbc:oracle:thin:@//%s:%s/xe", diff --git a/airbyte-integrations/connectors/source-postgres/src/main/java/io/airbyte/integrations/source/postgres/PostgresSource.java b/airbyte-integrations/connectors/source-postgres/src/main/java/io/airbyte/integrations/source/postgres/PostgresSource.java index 6c5f33cdcdcf..0c840ba9bbe7 100644 --- a/airbyte-integrations/connectors/source-postgres/src/main/java/io/airbyte/integrations/source/postgres/PostgresSource.java +++ b/airbyte-integrations/connectors/source-postgres/src/main/java/io/airbyte/integrations/source/postgres/PostgresSource.java @@ -85,7 +85,7 @@ public PostgresSource() { } @Override - public JsonNode toJdbcConfig(JsonNode config) { + public JsonNode toDatabaseConfig(JsonNode config) { List additionalParameters = new ArrayList<>(); diff --git a/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/PostgresStressTest.java b/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/PostgresStressTest.java index 99c6fc2ca3b4..d059aa0ce7b9 100644 --- a/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/PostgresStressTest.java +++ b/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/PostgresStressTest.java @@ -120,7 +120,7 @@ public PostgresTestSource() { } @Override - public JsonNode toJdbcConfig(JsonNode config) { + public JsonNode toDatabaseConfig(JsonNode config) { ImmutableMap.Builder configBuilder = ImmutableMap.builder() .put("username", config.get("username").asText()) .put("jdbc_url", String.format("jdbc:postgresql://%s:%s/%s", diff --git a/airbyte-integrations/connectors/source-redshift/src/main/java/io/airbyte/integrations/source/redshift/RedshiftSource.java b/airbyte-integrations/connectors/source-redshift/src/main/java/io/airbyte/integrations/source/redshift/RedshiftSource.java index 58f0927839e1..18aba29964fc 100644 --- a/airbyte-integrations/connectors/source-redshift/src/main/java/io/airbyte/integrations/source/redshift/RedshiftSource.java +++ b/airbyte-integrations/connectors/source-redshift/src/main/java/io/airbyte/integrations/source/redshift/RedshiftSource.java @@ -46,7 +46,7 @@ public RedshiftSource() { } @Override - public JsonNode toJdbcConfig(JsonNode redshiftConfig) { + public JsonNode toDatabaseConfig(JsonNode redshiftConfig) { return Jsons.jsonNode(ImmutableMap.builder() .put("username", redshiftConfig.get("username").asText()) .put("password", redshiftConfig.get("password").asText()) diff --git a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/AbstractRelationalDbSource.java b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/AbstractRelationalDbSource.java index 0429f78b0454..a16687c25a6e 100644 --- a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/AbstractRelationalDbSource.java +++ b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/AbstractRelationalDbSource.java @@ -80,7 +80,7 @@ public abstract class AbstractRelationalDbSource checkOperation : getCheckOperations(config)) { checkOperation.accept(database); } @@ -94,6 +94,15 @@ public AirbyteConnectionStatus check(JsonNode config) { } } + /** + * Map a database implementation-specific configuration to json object that adheres to the + * AbstractJdbcSource config spec. See resources/spec.json. + * + * @param config database implementation-specific configuration. + * @return jdbc spec. + */ + public abstract JsonNode toDatabaseConfig(JsonNode config); + /** * Configures a list of operations that can be used to check the connection to the source. * @@ -103,7 +112,7 @@ public AirbyteConnectionStatus check(JsonNode config) { @Override public AirbyteCatalog discover(JsonNode config) throws Exception { - try (final K database = createDatabase(config)) { + try (final K database = createDatabaseInternal(config)) { List streams = getTables(database).stream() .map(tableInfo -> CatalogHelpers .createAirbyteStream(tableInfo.getName(), tableInfo.getNameSpace(), tableInfo.getFields()) @@ -121,7 +130,7 @@ public AutoCloseableIterator read(JsonNode config, ConfiguredAir catalog); final Instant emittedAt = Instant.now(); - final K database = createDatabase(config); + final K database = createDatabaseInternal(config); final Map>> fullyQualifiedTableNameToInfo = discoverWithoutSystemTables(database) @@ -431,6 +440,13 @@ public abstract AutoCloseableIterator queryTableIncremental(K database T cursorFieldType, String cursor); + private K createDatabaseInternal(JsonNode sourceConfig) throws Exception { + K database = createDatabase(sourceConfig); + database.setSourceConfig(sourceConfig); + database.setDatabaseConfig(toDatabaseConfig(sourceConfig)); + return database; + } + protected abstract K createDatabase(JsonNode config) throws Exception; } diff --git a/airbyte-integrations/connectors/source-scaffold-java-jdbc/src/main/java/io/airbyte/integrations/source/scaffold_java_jdbc/ScaffoldJavaJdbcSource.java b/airbyte-integrations/connectors/source-scaffold-java-jdbc/src/main/java/io/airbyte/integrations/source/scaffold_java_jdbc/ScaffoldJavaJdbcSource.java index a4fb56fdfaf9..03b56c879a2c 100644 --- a/airbyte-integrations/connectors/source-scaffold-java-jdbc/src/main/java/io/airbyte/integrations/source/scaffold_java_jdbc/ScaffoldJavaJdbcSource.java +++ b/airbyte-integrations/connectors/source-scaffold-java-jdbc/src/main/java/io/airbyte/integrations/source/scaffold_java_jdbc/ScaffoldJavaJdbcSource.java @@ -48,7 +48,7 @@ public ScaffoldJavaJdbcSource() { // TODO The config is based on spec.json, update according to your DB @Override - public JsonNode toJdbcConfig(JsonNode aqqConfig) { + public JsonNode toDatabaseConfig(JsonNode aqqConfig) { // TODO create DB config. Ex: "Jsons.jsonNode(ImmutableMap.builder().put("username", // userName).put("password", pas)...build()); return null; diff --git a/airbyte-integrations/connectors/source-snowflake/src/main/java/io.airbyte.integrations.source.snowflake/SnowflakeSource.java b/airbyte-integrations/connectors/source-snowflake/src/main/java/io.airbyte.integrations.source.snowflake/SnowflakeSource.java index 465af0896b8d..481c0c24cfa9 100644 --- a/airbyte-integrations/connectors/source-snowflake/src/main/java/io.airbyte.integrations.source.snowflake/SnowflakeSource.java +++ b/airbyte-integrations/connectors/source-snowflake/src/main/java/io.airbyte.integrations.source.snowflake/SnowflakeSource.java @@ -51,7 +51,7 @@ public static void main(String[] args) throws Exception { } @Override - public JsonNode toJdbcConfig(JsonNode config) { + public JsonNode toDatabaseConfig(JsonNode config) { return Jsons.jsonNode(ImmutableMap.builder() .put("jdbc_url", String.format("jdbc:snowflake://%s/", config.get("host").asText())) From 042c5272df557040b75e3367ffdc628aed6312e0 Mon Sep 17 00:00:00 2001 From: Andrii Leonets Date: Tue, 15 Jun 2021 10:39:24 +0300 Subject: [PATCH 06/63] make in line jdbc source with a new impl --- .../source-java-jdbc/build.gradle | 1 + .../connectors/source-clickhouse/build.gradle | 1 + .../source/clickhouse/ClickHouseSource.java | 13 +++++++------ .../connectors/source-mssql/build.gradle | 1 + .../connectors/source-oracle/build.gradle | 1 + .../postgres/AirbyteFileOffsetBackingStore.java | 2 +- .../source/postgres/PostgresSource.java | 4 ++-- .../postgres/AirbyteFileOffsetBackingStoreTest.java | 2 +- .../connectors/source-redshift/build.gradle | 1 + .../source-scaffold-java-jdbc/build.gradle | 1 + .../connectors/source-snowflake/build.gradle | 1 + 11 files changed, 18 insertions(+), 10 deletions(-) diff --git a/airbyte-integrations/connector-templates/source-java-jdbc/build.gradle b/airbyte-integrations/connector-templates/source-java-jdbc/build.gradle index 91bde98a2db7..db0d82273bea 100644 --- a/airbyte-integrations/connector-templates/source-java-jdbc/build.gradle +++ b/airbyte-integrations/connector-templates/source-java-jdbc/build.gradle @@ -13,6 +13,7 @@ dependencies { implementation project(':airbyte-integrations:bases:base-java') implementation project(':airbyte-protocol:models') implementation project(':airbyte-integrations:connectors:source-jdbc') + implementation project(':airbyte-integrations:connectors:source-relational-db') //TODO Add jdbc driver import here. Ex: implementation 'com.microsoft.sqlserver:mssql-jdbc:8.4.1.jre14' diff --git a/airbyte-integrations/connectors/source-clickhouse/build.gradle b/airbyte-integrations/connectors/source-clickhouse/build.gradle index 4e1335ade14e..dd98dade6444 100644 --- a/airbyte-integrations/connectors/source-clickhouse/build.gradle +++ b/airbyte-integrations/connectors/source-clickhouse/build.gradle @@ -12,6 +12,7 @@ dependencies { implementation project(':airbyte-db') implementation project(':airbyte-integrations:bases:base-java') implementation project(':airbyte-integrations:connectors:source-jdbc') + implementation project(':airbyte-integrations:connectors:source-relational-db') implementation project(':airbyte-protocol:models') implementation files(project(':airbyte-integrations:bases:base-java').airbyteDocker.outputs) diff --git a/airbyte-integrations/connectors/source-clickhouse/src/main/java/io/airbyte/integrations/source/clickhouse/ClickHouseSource.java b/airbyte-integrations/connectors/source-clickhouse/src/main/java/io/airbyte/integrations/source/clickhouse/ClickHouseSource.java index a2ad615ec91c..0463a4406598 100644 --- a/airbyte-integrations/connectors/source-clickhouse/src/main/java/io/airbyte/integrations/source/clickhouse/ClickHouseSource.java +++ b/airbyte-integrations/connectors/source-clickhouse/src/main/java/io/airbyte/integrations/source/clickhouse/ClickHouseSource.java @@ -33,12 +33,14 @@ import io.airbyte.integrations.base.IntegrationRunner; import io.airbyte.integrations.base.Source; import io.airbyte.integrations.source.jdbc.AbstractJdbcSource; +import io.airbyte.integrations.source.relationaldb.TableInfo; +import io.airbyte.protocol.models.AbstractField; +import java.sql.JDBCType; import java.sql.PreparedStatement; import java.sql.SQLException; import java.util.Collections; import java.util.List; import java.util.Map; -import java.util.Optional; import java.util.Set; import java.util.stream.Collectors; import org.slf4j.Logger; @@ -53,21 +55,20 @@ public class ClickHouseSource extends AbstractJdbcSource implements Source { * query the system table mentioned here * https://clickhouse.tech/docs/en/operations/system-tables/columns/ to fetch the primary keys. */ + @Override protected Map> discoverPrimaryKeys(JdbcDatabase database, - Optional databaseOptional, - List tableInfos) { - + List>> tableInfos) { return tableInfos.stream() .collect(Collectors.toMap( tableInfo -> JdbcUtils - .getFullyQualifiedTableName(tableInfo.getSchemaName(), tableInfo.getName()), + .getFullyQualifiedTableName(tableInfo.getNameSpace(), tableInfo.getName()), tableInfo -> { try { return database.resultSetQuery(connection -> { String sql = "SELECT name FROM system.columns WHERE database = ? AND table = ? AND is_in_primary_key = 1"; PreparedStatement preparedStatement = connection.prepareStatement(sql); - preparedStatement.setString(1, tableInfo.getSchemaName()); + preparedStatement.setString(1, tableInfo.getNameSpace()); preparedStatement.setString(2, tableInfo.getName()); return preparedStatement.executeQuery(); diff --git a/airbyte-integrations/connectors/source-mssql/build.gradle b/airbyte-integrations/connectors/source-mssql/build.gradle index 74c337c27e45..fdfc0183929c 100644 --- a/airbyte-integrations/connectors/source-mssql/build.gradle +++ b/airbyte-integrations/connectors/source-mssql/build.gradle @@ -15,6 +15,7 @@ dependencies { implementation project(':airbyte-integrations:bases:base-java') implementation project(':airbyte-protocol:models') implementation project(':airbyte-integrations:connectors:source-jdbc') + implementation project(':airbyte-integrations:connectors:source-relational-db') implementation 'com.microsoft.sqlserver:mssql-jdbc:8.4.1.jre14' diff --git a/airbyte-integrations/connectors/source-oracle/build.gradle b/airbyte-integrations/connectors/source-oracle/build.gradle index f4d271d260e8..c2ce5d033d7d 100644 --- a/airbyte-integrations/connectors/source-oracle/build.gradle +++ b/airbyte-integrations/connectors/source-oracle/build.gradle @@ -17,6 +17,7 @@ dependencies { implementation project(':airbyte-integrations:bases:base-java') implementation project(':airbyte-protocol:models') implementation project(':airbyte-integrations:connectors:source-jdbc') + implementation project(':airbyte-integrations:connectors:source-relational-db') implementation "com.oracle.database.jdbc:ojdbc8-production:19.7.0.0" diff --git a/airbyte-integrations/connectors/source-postgres/src/main/java/io/airbyte/integrations/source/postgres/AirbyteFileOffsetBackingStore.java b/airbyte-integrations/connectors/source-postgres/src/main/java/io/airbyte/integrations/source/postgres/AirbyteFileOffsetBackingStore.java index 0976db4cbe3e..2a556a3ecb15 100644 --- a/airbyte-integrations/connectors/source-postgres/src/main/java/io/airbyte/integrations/source/postgres/AirbyteFileOffsetBackingStore.java +++ b/airbyte-integrations/connectors/source-postgres/src/main/java/io/airbyte/integrations/source/postgres/AirbyteFileOffsetBackingStore.java @@ -27,7 +27,7 @@ import com.fasterxml.jackson.databind.JsonNode; import com.google.common.base.Preconditions; import io.airbyte.commons.json.Jsons; -import io.airbyte.integrations.source.jdbc.models.CdcState; +import io.airbyte.integrations.source.relationaldb.models.CdcState; import java.io.EOFException; import java.io.IOException; import java.io.ObjectOutputStream; diff --git a/airbyte-integrations/connectors/source-postgres/src/main/java/io/airbyte/integrations/source/postgres/PostgresSource.java b/airbyte-integrations/connectors/source-postgres/src/main/java/io/airbyte/integrations/source/postgres/PostgresSource.java index 0c840ba9bbe7..845431d5194e 100644 --- a/airbyte-integrations/connectors/source-postgres/src/main/java/io/airbyte/integrations/source/postgres/PostgresSource.java +++ b/airbyte-integrations/connectors/source-postgres/src/main/java/io/airbyte/integrations/source/postgres/PostgresSource.java @@ -229,7 +229,7 @@ public List> getIncrementalIterators(JdbcD * have a check here as well to make sure that if no table is in INCREMENTAL mode then skip this * part */ - if (isCdc(getJdbcConfig())) { + if (isCdc(database.getSourceConfig())) { // State works differently in CDC than it does in convention incremental. The state is written to an // offset file that debezium reads from. Then once all records are replicated, we read back that // offset file (which will have been updated by debezium) and set it in the state. There is no @@ -241,7 +241,7 @@ public List> getIncrementalIterators(JdbcD final LinkedBlockingQueue> queue = new LinkedBlockingQueue<>(); - final DebeziumRecordPublisher publisher = new DebeziumRecordPublisher(getJdbcConfig(), catalog, offsetManager); + final DebeziumRecordPublisher publisher = new DebeziumRecordPublisher(database.getSourceConfig(), catalog, offsetManager); publisher.start(queue); // handle state machine around pub/sub logic. diff --git a/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/AirbyteFileOffsetBackingStoreTest.java b/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/AirbyteFileOffsetBackingStoreTest.java index f5638fec8914..66bfa02fab43 100644 --- a/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/AirbyteFileOffsetBackingStoreTest.java +++ b/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/AirbyteFileOffsetBackingStoreTest.java @@ -29,7 +29,7 @@ import io.airbyte.commons.io.IOs; import io.airbyte.commons.resources.MoreResources; -import io.airbyte.integrations.source.jdbc.models.CdcState; +import io.airbyte.integrations.source.relationaldb.models.CdcState; import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; diff --git a/airbyte-integrations/connectors/source-redshift/build.gradle b/airbyte-integrations/connectors/source-redshift/build.gradle index 9a184339ad5e..ab9f0929ba4d 100644 --- a/airbyte-integrations/connectors/source-redshift/build.gradle +++ b/airbyte-integrations/connectors/source-redshift/build.gradle @@ -17,6 +17,7 @@ dependencies { implementation project(':airbyte-integrations:bases:base-java') implementation project(':airbyte-protocol:models') implementation project(':airbyte-integrations:connectors:source-jdbc') + implementation project(':airbyte-integrations:connectors:source-relational-db') implementation 'com.amazon.redshift:redshift-jdbc42:1.2.43.1067' diff --git a/airbyte-integrations/connectors/source-scaffold-java-jdbc/build.gradle b/airbyte-integrations/connectors/source-scaffold-java-jdbc/build.gradle index 724ee928de6f..31ee3f6deb32 100644 --- a/airbyte-integrations/connectors/source-scaffold-java-jdbc/build.gradle +++ b/airbyte-integrations/connectors/source-scaffold-java-jdbc/build.gradle @@ -13,6 +13,7 @@ dependencies { implementation project(':airbyte-integrations:bases:base-java') implementation project(':airbyte-protocol:models') implementation project(':airbyte-integrations:connectors:source-jdbc') + implementation project(':airbyte-integrations:connectors:source-relational-db') //TODO Add jdbc driver import here. Ex: implementation 'com.microsoft.sqlserver:mssql-jdbc:8.4.1.jre14' diff --git a/airbyte-integrations/connectors/source-snowflake/build.gradle b/airbyte-integrations/connectors/source-snowflake/build.gradle index 115052766072..aa0cbc6d2bc3 100644 --- a/airbyte-integrations/connectors/source-snowflake/build.gradle +++ b/airbyte-integrations/connectors/source-snowflake/build.gradle @@ -12,6 +12,7 @@ dependencies { implementation project(':airbyte-db') implementation project(':airbyte-integrations:bases:base-java') implementation project(':airbyte-integrations:connectors:source-jdbc') + implementation project(':airbyte-integrations:connectors:source-relational-db') implementation project(':airbyte-protocol:models') implementation files(project(':airbyte-integrations:bases:base-java').airbyteDocker.outputs) implementation group: 'net.snowflake', name: 'snowflake-jdbc', version: '3.12.14' From 147e1662428a994dbaa18592b3f62850da57afae Mon Sep 17 00:00:00 2001 From: Andrii Leonets Date: Tue, 15 Jun 2021 17:50:13 +0300 Subject: [PATCH 07/63] Fix ScaffoldJavaJdbcSource template --- .../{{snakeCase name}}/{{pascalCase name}}Source.java.hbs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/airbyte-integrations/connector-templates/source-java-jdbc/src/main/java/io/airbyte/integrations/source/{{snakeCase name}}/{{pascalCase name}}Source.java.hbs b/airbyte-integrations/connector-templates/source-java-jdbc/src/main/java/io/airbyte/integrations/source/{{snakeCase name}}/{{pascalCase name}}Source.java.hbs index da50488e000f..ce5ed7254b71 100644 --- a/airbyte-integrations/connector-templates/source-java-jdbc/src/main/java/io/airbyte/integrations/source/{{snakeCase name}}/{{pascalCase name}}Source.java.hbs +++ b/airbyte-integrations/connector-templates/source-java-jdbc/src/main/java/io/airbyte/integrations/source/{{snakeCase name}}/{{pascalCase name}}Source.java.hbs @@ -48,14 +48,14 @@ public class {{pascalCase name}}Source extends AbstractJdbcSource implements Sou // TODO The config is based on spec.json, update according to your DB @Override - public JsonNode toJdbcConfig(JsonNode aqqConfig) { + public JsonNode toDatabaseConfig(JsonNode aqqConfig) { // TODO create DB config. Ex: "Jsons.jsonNode(ImmutableMap.builder().put("username", // userName).put("password", pas)...build()); return null; } @Override - public Set getExcludedInternalSchemas() { + public Set getExcludedInternalNameSpaces() { // TODO Add tables to exaclude, Ex "INFORMATION_SCHEMA", "sys", "spt_fallback_db", etc return Set.of(""); } From 246528a18de687b1a7cf34ba6584997f6097db2f Mon Sep 17 00:00:00 2001 From: Andrii Leonets Date: Thu, 17 Jun 2021 11:10:17 +0300 Subject: [PATCH 08/63] rename `AbstractField` to `CommonField`. Now it s not an abstract class. + add default implementation for `AbstractRelationalDbSource.getFullyQualifiedTableName` --- .../source/clickhouse/ClickHouseSource.java | 4 +- .../source/jdbc/AbstractJdbcSource.java | 15 +++---- .../source/mysql/MySqlSource.java | 4 +- .../source/postgres/PostgresSource.java | 4 +- .../AbstractRelationalDbSource.java | 42 ++++++++++--------- .../{AbstractField.java => CommonField.java} | 6 +-- .../io/airbyte/protocol/models/Field.java | 2 +- 7 files changed, 37 insertions(+), 40 deletions(-) rename airbyte-protocol/models/src/main/java/io/airbyte/protocol/models/{AbstractField.java => CommonField.java} (92%) diff --git a/airbyte-integrations/connectors/source-clickhouse/src/main/java/io/airbyte/integrations/source/clickhouse/ClickHouseSource.java b/airbyte-integrations/connectors/source-clickhouse/src/main/java/io/airbyte/integrations/source/clickhouse/ClickHouseSource.java index 0463a4406598..dacc68a2128d 100644 --- a/airbyte-integrations/connectors/source-clickhouse/src/main/java/io/airbyte/integrations/source/clickhouse/ClickHouseSource.java +++ b/airbyte-integrations/connectors/source-clickhouse/src/main/java/io/airbyte/integrations/source/clickhouse/ClickHouseSource.java @@ -34,7 +34,7 @@ import io.airbyte.integrations.base.Source; import io.airbyte.integrations.source.jdbc.AbstractJdbcSource; import io.airbyte.integrations.source.relationaldb.TableInfo; -import io.airbyte.protocol.models.AbstractField; +import io.airbyte.protocol.models.CommonField; import java.sql.JDBCType; import java.sql.PreparedStatement; import java.sql.SQLException; @@ -58,7 +58,7 @@ public class ClickHouseSource extends AbstractJdbcSource implements Source { @Override protected Map> discoverPrimaryKeys(JdbcDatabase database, - List>> tableInfos) { + List>> tableInfos) { return tableInfos.stream() .collect(Collectors.toMap( tableInfo -> JdbcUtils diff --git a/airbyte-integrations/connectors/source-jdbc/src/main/java/io/airbyte/integrations/source/jdbc/AbstractJdbcSource.java b/airbyte-integrations/connectors/source-jdbc/src/main/java/io/airbyte/integrations/source/jdbc/AbstractJdbcSource.java index f301e18049b0..e03a25d5853d 100644 --- a/airbyte-integrations/connectors/source-jdbc/src/main/java/io/airbyte/integrations/source/jdbc/AbstractJdbcSource.java +++ b/airbyte-integrations/connectors/source-jdbc/src/main/java/io/airbyte/integrations/source/jdbc/AbstractJdbcSource.java @@ -39,7 +39,7 @@ import io.airbyte.integrations.base.Source; import io.airbyte.integrations.source.relationaldb.AbstractRelationalDbSource; import io.airbyte.integrations.source.relationaldb.TableInfo; -import io.airbyte.protocol.models.AbstractField; +import io.airbyte.protocol.models.CommonField; import io.airbyte.protocol.models.JsonSchemaPrimitive; import java.sql.JDBCType; import java.sql.PreparedStatement; @@ -122,7 +122,7 @@ private String getCatalog(SqlDatabase database) { } @Override - public List>> discoverInternal(final JdbcDatabase database) + public List>> discoverInternal(final JdbcDatabase database) throws Exception { final Set internalSchemas = new HashSet<>(getExcludedInternalNameSpaces()); return database.bufferedResultSetQuery( @@ -143,7 +143,7 @@ public List>> discoverInternal(final JdbcDatab .collect(Collectors.groupingBy(t -> ImmutablePair.of(t.get(INTERNAL_SCHEMA_NAME).asText(), t.get(INTERNAL_TABLE_NAME).asText()))) .values() .stream() - .map(fields -> TableInfo.>builder() + .map(fields -> TableInfo.>builder() .nameSpace(fields.get(0).get(INTERNAL_SCHEMA_NAME).asText()) .name(fields.get(0).get(INTERNAL_TABLE_NAME).asText()) .fields(fields.stream() @@ -159,7 +159,7 @@ public List>> discoverInternal(final JdbcDatab f.get(INTERNAL_COLUMN_TYPE))); jdbcType = JDBCType.VARCHAR; } - return new AbstractField(f.get(INTERNAL_COLUMN_NAME).asText(), jdbcType) {}; + return new CommonField(f.get(INTERNAL_COLUMN_NAME).asText(), jdbcType) {}; }) .collect(Collectors.toList())) .build()) @@ -171,14 +171,9 @@ protected JsonSchemaPrimitive getType(JDBCType columnType) { return JdbcUtils.getType(columnType); } - @Override - protected String getFullyQualifiedTableName(String nameSpace, String tableName) { - return JdbcUtils.getFullyQualifiedTableName(nameSpace, tableName); - } - @Override protected Map> discoverPrimaryKeys(JdbcDatabase database, - List>> tableInfos) { + List>> tableInfos) { try { // Get all primary keys without specifying a table name final Map> tablePrimaryKeys = aggregatePrimateKeys(database.bufferedResultSetQuery( diff --git a/airbyte-integrations/connectors/source-mysql/src/main/java/io/airbyte/integrations/source/mysql/MySqlSource.java b/airbyte-integrations/connectors/source-mysql/src/main/java/io/airbyte/integrations/source/mysql/MySqlSource.java index c1d18b9b07a4..b0b32972d30c 100644 --- a/airbyte-integrations/connectors/source-mysql/src/main/java/io/airbyte/integrations/source/mysql/MySqlSource.java +++ b/airbyte-integrations/connectors/source-mysql/src/main/java/io/airbyte/integrations/source/mysql/MySqlSource.java @@ -44,7 +44,7 @@ import io.airbyte.integrations.source.relationaldb.StateManager; import io.airbyte.integrations.source.relationaldb.TableInfo; import io.airbyte.integrations.source.relationaldb.models.CdcState; -import io.airbyte.protocol.models.AbstractField; +import io.airbyte.protocol.models.CommonField; import io.airbyte.protocol.models.AirbyteCatalog; import io.airbyte.protocol.models.AirbyteMessage; import io.airbyte.protocol.models.AirbyteMessage.Type; @@ -226,7 +226,7 @@ private static boolean shouldUseCDC(ConfiguredAirbyteCatalog catalog) { @Override public List> getIncrementalIterators(JdbcDatabase database, ConfiguredAirbyteCatalog catalog, - Map>> tableNameToTable, + Map>> tableNameToTable, StateManager stateManager, Instant emittedAt) { JsonNode sourceConfig = database.getSourceConfig(); diff --git a/airbyte-integrations/connectors/source-postgres/src/main/java/io/airbyte/integrations/source/postgres/PostgresSource.java b/airbyte-integrations/connectors/source-postgres/src/main/java/io/airbyte/integrations/source/postgres/PostgresSource.java index 845431d5194e..92872bd20b54 100644 --- a/airbyte-integrations/connectors/source-postgres/src/main/java/io/airbyte/integrations/source/postgres/PostgresSource.java +++ b/airbyte-integrations/connectors/source-postgres/src/main/java/io/airbyte/integrations/source/postgres/PostgresSource.java @@ -46,7 +46,7 @@ import io.airbyte.integrations.source.jdbc.AbstractJdbcSource; import io.airbyte.integrations.source.relationaldb.StateManager; import io.airbyte.integrations.source.relationaldb.TableInfo; -import io.airbyte.protocol.models.AbstractField; +import io.airbyte.protocol.models.CommonField; import io.airbyte.protocol.models.AirbyteCatalog; import io.airbyte.protocol.models.AirbyteConnectionStatus; import io.airbyte.protocol.models.AirbyteMessage; @@ -218,7 +218,7 @@ private AirbyteFileOffsetBackingStore initializeState(StateManager stateManager) @Override public List> getIncrementalIterators(JdbcDatabase database, ConfiguredAirbyteCatalog catalog, - Map>> tableNameToTable, + Map>> tableNameToTable, StateManager stateManager, Instant emittedAt) { /** diff --git a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/AbstractRelationalDbSource.java b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/AbstractRelationalDbSource.java index a16687c25a6e..e3344e1e8896 100644 --- a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/AbstractRelationalDbSource.java +++ b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/AbstractRelationalDbSource.java @@ -39,7 +39,7 @@ import io.airbyte.integrations.base.AirbyteStreamNameNamespacePair; import io.airbyte.integrations.base.Source; import io.airbyte.integrations.source.relationaldb.models.JdbcState; -import io.airbyte.protocol.models.AbstractField; +import io.airbyte.protocol.models.CommonField; import io.airbyte.protocol.models.AirbyteCatalog; import io.airbyte.protocol.models.AirbyteConnectionStatus; import io.airbyte.protocol.models.AirbyteConnectionStatus.Status; @@ -132,7 +132,7 @@ public AutoCloseableIterator read(JsonNode config, ConfiguredAir final K database = createDatabaseInternal(config); - final Map>> fullyQualifiedTableNameToInfo = + final Map>> fullyQualifiedTableNameToInfo = discoverWithoutSystemTables(database) .stream() .collect(Collectors.toMap(t -> String.format("%s.%s", t.getNameSpace(), t.getName()), Function @@ -157,7 +157,7 @@ public AutoCloseableIterator read(JsonNode config, ConfiguredAir public List> getIncrementalIterators(K database, ConfiguredAirbyteCatalog catalog, - Map>> tableNameToTable, + Map>> tableNameToTable, StateManager stateManager, Instant emittedAt) { return getSelectedIterators( @@ -171,7 +171,7 @@ public List> getIncrementalIterators(K dat public List> getFullRefreshIterators(K database, ConfiguredAirbyteCatalog catalog, - Map>> tableNameToTable, + Map>> tableNameToTable, StateManager stateManager, Instant emittedAt) { return getSelectedIterators( @@ -185,7 +185,7 @@ public List> getFullRefreshIterators(K dat private List> getSelectedIterators(K database, ConfiguredAirbyteCatalog catalog, - Map>> tableNameToTable, + Map>> tableNameToTable, StateManager stateManager, Instant emittedAt, Predicate selector) { @@ -199,7 +199,7 @@ private List> getSelectedIterators(K datab continue; } - final TableInfo> table = tableNameToTable.get(fullyQualifiedTableName); + final TableInfo> table = tableNameToTable.get(fullyQualifiedTableName); final AutoCloseableIterator tableReadIterator = createReadIterator( database, airbyteStream, @@ -215,7 +215,7 @@ private List> getSelectedIterators(K datab private AutoCloseableIterator createReadIterator(K database, ConfiguredAirbyteStream airbyteStream, - TableInfo> table, + TableInfo> table, StateManager stateManager, Instant emittedAt) { final String streamName = airbyteStream.getStream().getName(); @@ -224,7 +224,7 @@ private AutoCloseableIterator createReadIterator(K database, final Set selectedFieldsInCatalog = CatalogHelpers.getTopLevelFieldNames(airbyteStream); final List selectedDatabaseFields = table.getFields() .stream() - .map(AbstractField::getName) + .map(CommonField::getName) .filter(selectedFieldsInCatalog::contains) .collect(Collectors.toList()); @@ -273,7 +273,7 @@ private AutoCloseableIterator createReadIterator(K database, private AutoCloseableIterator getIncrementalStream(K database, ConfiguredAirbyteStream airbyteStream, List selectedDatabaseFields, - TableInfo> table, + TableInfo> table, String cursor, Instant emittedAt) { final String streamName = airbyteStream.getStream().getName(); @@ -281,7 +281,7 @@ private AutoCloseableIterator getIncrementalStream(K database, final String cursorField = IncrementalUtils.getCursorField(airbyteStream); final T cursorJdbcType = table.getFields().stream() .filter(info -> info.getName().equals(cursorField)) - .map(AbstractField::getType) + .map(CommonField::getType) .findFirst() .orElseThrow(); @@ -304,7 +304,7 @@ private AutoCloseableIterator getFullRefreshStream(K database, String streamName, String namespace, List selectedDatabaseFields, - TableInfo> table, + TableInfo> table, Instant emittedAt) { final AutoCloseableIterator queryStream = queryTableFullRefresh(database, selectedDatabaseFields, table.getNameSpace(), table.getName()); @@ -313,10 +313,12 @@ private AutoCloseableIterator getFullRefreshStream(K database, protected abstract JsonSchemaPrimitive getType(T columnType); - protected abstract String getFullyQualifiedTableName(String nameSpace, String tableName); + protected String getFullyQualifiedTableName(String nameSpace, String tableName) { + return nameSpace != null ? nameSpace + "." + tableName : tableName; + } private List> getTables(final K database) throws Exception { - final List>> tableInfos = discoverWithoutSystemTables(database); + final List>> tableInfos = discoverWithoutSystemTables(database); final Map> fullyQualifiedTableNameToPrimaryKeys = discoverPrimaryKeys(database, tableInfos); return tableInfos.stream() @@ -350,14 +352,14 @@ private List> getTables(final K database) throws Exception { * first, if it doesn't work, we retry one table at a time. */ protected abstract Map> discoverPrimaryKeys(K database, - List>> tableInfos); + List>> tableInfos); - protected void assertColumnsWithSameNameAreSame(String nameSpace, String tableName, List> columns) { + protected void assertColumnsWithSameNameAreSame(String nameSpace, String tableName, List> columns) { columns.stream() - .collect(Collectors.groupingBy(AbstractField::getName)) + .collect(Collectors.groupingBy(CommonField::getName)) .values() .forEach(columnsWithSameName -> { - final AbstractField comparisonColumn = columnsWithSameName.get(0); + final CommonField comparisonColumn = columnsWithSameName.get(0); columnsWithSameName.forEach(column -> { if (!column.equals(comparisonColumn)) { throw new RuntimeException( @@ -368,15 +370,15 @@ protected void assertColumnsWithSameNameAreSame(String nameSpace, String tableNa }); } - protected List>> discoverWithoutSystemTables(final K database) throws Exception { + protected List>> discoverWithoutSystemTables(final K database) throws Exception { Set systemNameSpaces = getExcludedInternalNameSpaces(); - List>> discoveredTables = discoverInternal(database); + List>> discoveredTables = discoverInternal(database); return (systemNameSpaces == null || systemNameSpaces.isEmpty() ? discoveredTables : discoveredTables.stream().filter(table -> !systemNameSpaces.contains(table.getNameSpace())).collect( Collectors.toList())); } - protected abstract List>> discoverInternal(final K database) + protected abstract List>> discoverInternal(final K database) throws Exception; public AutoCloseableIterator getMessageIterator(AutoCloseableIterator recordIterator, diff --git a/airbyte-protocol/models/src/main/java/io/airbyte/protocol/models/AbstractField.java b/airbyte-protocol/models/src/main/java/io/airbyte/protocol/models/CommonField.java similarity index 92% rename from airbyte-protocol/models/src/main/java/io/airbyte/protocol/models/AbstractField.java rename to airbyte-protocol/models/src/main/java/io/airbyte/protocol/models/CommonField.java index 299d4f27a5a6..b36034e1583e 100644 --- a/airbyte-protocol/models/src/main/java/io/airbyte/protocol/models/AbstractField.java +++ b/airbyte-protocol/models/src/main/java/io/airbyte/protocol/models/CommonField.java @@ -26,12 +26,12 @@ import java.util.Objects; -public abstract class AbstractField { +public class CommonField { private final String name; private final T type; - public AbstractField(String name, T type) { + public CommonField(String name, T type) { this.name = name; this.type = type; } @@ -53,7 +53,7 @@ public boolean equals(Object o) { return false; } - AbstractField field = (AbstractField) o; + CommonField field = (CommonField) o; return name.equals(field.name) && type == field.type; } diff --git a/airbyte-protocol/models/src/main/java/io/airbyte/protocol/models/Field.java b/airbyte-protocol/models/src/main/java/io/airbyte/protocol/models/Field.java index e9db8c085493..446cbc05e599 100644 --- a/airbyte-protocol/models/src/main/java/io/airbyte/protocol/models/Field.java +++ b/airbyte-protocol/models/src/main/java/io/airbyte/protocol/models/Field.java @@ -24,7 +24,7 @@ package io.airbyte.protocol.models; -public class Field extends AbstractField { +public class Field extends CommonField { public Field(String name, JsonSchemaPrimitive type) { super(name, type); From e8d29c9870d21304264e9305be35e93e2ec4ea0b Mon Sep 17 00:00:00 2001 From: Andrii Leonets Date: Thu, 17 Jun 2021 13:30:16 +0300 Subject: [PATCH 09/63] format --- .../java/io/airbyte/integrations/source/mysql/MySqlSource.java | 2 +- .../io/airbyte/integrations/source/postgres/PostgresSource.java | 2 +- .../source/relationaldb/AbstractRelationalDbSource.java | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/airbyte-integrations/connectors/source-mysql/src/main/java/io/airbyte/integrations/source/mysql/MySqlSource.java b/airbyte-integrations/connectors/source-mysql/src/main/java/io/airbyte/integrations/source/mysql/MySqlSource.java index b0b32972d30c..8a07722c6c47 100644 --- a/airbyte-integrations/connectors/source-mysql/src/main/java/io/airbyte/integrations/source/mysql/MySqlSource.java +++ b/airbyte-integrations/connectors/source-mysql/src/main/java/io/airbyte/integrations/source/mysql/MySqlSource.java @@ -44,12 +44,12 @@ import io.airbyte.integrations.source.relationaldb.StateManager; import io.airbyte.integrations.source.relationaldb.TableInfo; import io.airbyte.integrations.source.relationaldb.models.CdcState; -import io.airbyte.protocol.models.CommonField; import io.airbyte.protocol.models.AirbyteCatalog; import io.airbyte.protocol.models.AirbyteMessage; import io.airbyte.protocol.models.AirbyteMessage.Type; import io.airbyte.protocol.models.AirbyteStateMessage; import io.airbyte.protocol.models.AirbyteStream; +import io.airbyte.protocol.models.CommonField; import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; import io.airbyte.protocol.models.ConfiguredAirbyteStream; import io.airbyte.protocol.models.SyncMode; diff --git a/airbyte-integrations/connectors/source-postgres/src/main/java/io/airbyte/integrations/source/postgres/PostgresSource.java b/airbyte-integrations/connectors/source-postgres/src/main/java/io/airbyte/integrations/source/postgres/PostgresSource.java index 92872bd20b54..d16e2a470b15 100644 --- a/airbyte-integrations/connectors/source-postgres/src/main/java/io/airbyte/integrations/source/postgres/PostgresSource.java +++ b/airbyte-integrations/connectors/source-postgres/src/main/java/io/airbyte/integrations/source/postgres/PostgresSource.java @@ -46,13 +46,13 @@ import io.airbyte.integrations.source.jdbc.AbstractJdbcSource; import io.airbyte.integrations.source.relationaldb.StateManager; import io.airbyte.integrations.source.relationaldb.TableInfo; -import io.airbyte.protocol.models.CommonField; import io.airbyte.protocol.models.AirbyteCatalog; import io.airbyte.protocol.models.AirbyteConnectionStatus; import io.airbyte.protocol.models.AirbyteMessage; import io.airbyte.protocol.models.AirbyteMessage.Type; import io.airbyte.protocol.models.AirbyteStateMessage; import io.airbyte.protocol.models.AirbyteStream; +import io.airbyte.protocol.models.CommonField; import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; import io.airbyte.protocol.models.SyncMode; import io.debezium.engine.ChangeEvent; diff --git a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/AbstractRelationalDbSource.java b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/AbstractRelationalDbSource.java index e3344e1e8896..75d3d037826a 100644 --- a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/AbstractRelationalDbSource.java +++ b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/AbstractRelationalDbSource.java @@ -39,7 +39,6 @@ import io.airbyte.integrations.base.AirbyteStreamNameNamespacePair; import io.airbyte.integrations.base.Source; import io.airbyte.integrations.source.relationaldb.models.JdbcState; -import io.airbyte.protocol.models.CommonField; import io.airbyte.protocol.models.AirbyteCatalog; import io.airbyte.protocol.models.AirbyteConnectionStatus; import io.airbyte.protocol.models.AirbyteConnectionStatus.Status; @@ -48,6 +47,7 @@ import io.airbyte.protocol.models.AirbyteRecordMessage; import io.airbyte.protocol.models.AirbyteStream; import io.airbyte.protocol.models.CatalogHelpers; +import io.airbyte.protocol.models.CommonField; import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; import io.airbyte.protocol.models.ConfiguredAirbyteStream; import io.airbyte.protocol.models.Field; From 111abae92e4d9ebdcd4c16a0050e0e3e51146044 Mon Sep 17 00:00:00 2001 From: Andrii Leonets Date: Thu, 17 Jun 2021 22:46:20 +0300 Subject: [PATCH 10/63] rename generated files in line with their location --- .../jdbc/test/JdbcSourceAcceptanceTest.java | 36 ++-- .../source-relational-db/build.gradle | 2 +- .../AbstractRelationalDbSource.java | 187 ++++++++++-------- .../source/relationaldb/StateManager.java | 36 ++-- .../db_models.yaml} | 12 +- .../source/relationaldb/StateManagerTest.java | 46 ++--- 6 files changed, 174 insertions(+), 145 deletions(-) rename airbyte-integrations/connectors/source-relational-db/src/main/resources/{jdbc_models/jdbc_models.yaml => db_models/db_models.yaml} (87%) diff --git a/airbyte-integrations/connectors/source-jdbc/src/testFixtures/java/io/airbyte/integrations/source/jdbc/test/JdbcSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-jdbc/src/testFixtures/java/io/airbyte/integrations/source/jdbc/test/JdbcSourceAcceptanceTest.java index a4aca9236a69..e086700faaf5 100644 --- a/airbyte-integrations/connectors/source-jdbc/src/testFixtures/java/io/airbyte/integrations/source/jdbc/test/JdbcSourceAcceptanceTest.java +++ b/airbyte-integrations/connectors/source-jdbc/src/testFixtures/java/io/airbyte/integrations/source/jdbc/test/JdbcSourceAcceptanceTest.java @@ -43,8 +43,8 @@ import io.airbyte.db.jdbc.JdbcDatabase; import io.airbyte.db.jdbc.JdbcUtils; import io.airbyte.integrations.source.jdbc.AbstractJdbcSource; -import io.airbyte.integrations.source.relationaldb.models.JdbcState; -import io.airbyte.integrations.source.relationaldb.models.JdbcStreamState; +import io.airbyte.integrations.source.relationaldb.models.DbState; +import io.airbyte.integrations.source.relationaldb.models.DbStreamState; import io.airbyte.protocol.models.AirbyteCatalog; import io.airbyte.protocol.models.AirbyteConnectionStatus; import io.airbyte.protocol.models.AirbyteConnectionStatus.Status; @@ -551,8 +551,8 @@ void testReadOneTableIncrementallyTwice() throws Exception { airbyteStream.setDestinationSyncMode(DestinationSyncMode.APPEND); }); - final JdbcState state = new JdbcState() - .withStreams(Lists.newArrayList(new JdbcStreamState().withStreamName(streamName).withStreamNamespace(namespace))); + final DbState state = new DbState() + .withStreams(Lists.newArrayList(new DbStreamState().withStreamName(streamName).withStreamNamespace(namespace))); final List actualMessagesFirstSync = MoreIterators .toList(source.read(config, configuredCatalog, Jsons.jsonNode(state))); @@ -591,9 +591,9 @@ void testReadOneTableIncrementallyTwice() throws Exception { expectedMessages.add(new AirbyteMessage() .withType(Type.STATE) .withState(new AirbyteStateMessage() - .withData(Jsons.jsonNode(new JdbcState() + .withData(Jsons.jsonNode(new DbState() .withCdc(false) - .withStreams(Lists.newArrayList(new JdbcStreamState() + .withStreams(Lists.newArrayList(new DbStreamState() .withStreamName(streamName) .withStreamNamespace(namespace) .withCursorField(ImmutableList.of(COL_ID)) @@ -638,8 +638,8 @@ void testReadMultipleTablesIncrementally() throws Exception { airbyteStream.setDestinationSyncMode(DestinationSyncMode.APPEND); }); - final JdbcState state = new JdbcState() - .withStreams(Lists.newArrayList(new JdbcStreamState().withStreamName(streamName).withStreamNamespace(namespace))); + final DbState state = new DbState() + .withStreams(Lists.newArrayList(new DbStreamState().withStreamName(streamName).withStreamNamespace(namespace))); final List actualMessagesFirstSync = MoreIterators .toList(source.read(config, configuredCatalog, Jsons.jsonNode(state))); @@ -665,15 +665,15 @@ void testReadMultipleTablesIncrementally() throws Exception { expectedMessagesFirstSync.add(new AirbyteMessage() .withType(Type.STATE) .withState(new AirbyteStateMessage() - .withData(Jsons.jsonNode(new JdbcState() + .withData(Jsons.jsonNode(new DbState() .withCdc(false) .withStreams(Lists.newArrayList( - new JdbcStreamState() + new DbStreamState() .withStreamName(streamName) .withStreamNamespace(namespace) .withCursorField(ImmutableList.of(COL_ID)) .withCursor("3"), - new JdbcStreamState() + new DbStreamState() .withStreamName(streamName2) .withStreamNamespace(namespace) .withCursorField(ImmutableList.of(COL_ID)))))))); @@ -682,15 +682,15 @@ void testReadMultipleTablesIncrementally() throws Exception { expectedMessagesFirstSync.add(new AirbyteMessage() .withType(Type.STATE) .withState(new AirbyteStateMessage() - .withData(Jsons.jsonNode(new JdbcState() + .withData(Jsons.jsonNode(new DbState() .withCdc(false) .withStreams(Lists.newArrayList( - new JdbcStreamState() + new DbStreamState() .withStreamName(streamName) .withStreamNamespace(namespace) .withCursorField(ImmutableList.of(COL_ID)) .withCursor("3"), - new JdbcStreamState() + new DbStreamState() .withStreamName(streamName2) .withStreamNamespace(namespace) .withCursorField(ImmutableList.of(COL_ID)) @@ -738,8 +738,8 @@ private void incrementalCursorCheck( airbyteStream.setCursorField(Lists.newArrayList(cursorField)); airbyteStream.setDestinationSyncMode(DestinationSyncMode.APPEND); - final JdbcState state = new JdbcState() - .withStreams(Lists.newArrayList(new JdbcStreamState() + final DbState state = new DbState() + .withStreams(Lists.newArrayList(new DbStreamState() .withStreamName(airbyteStream.getStream().getName()) .withStreamNamespace(airbyteStream.getStream().getNamespace()) .withCursorField(ImmutableList.of(initialCursorField)) @@ -757,9 +757,9 @@ private void incrementalCursorCheck( expectedMessages.add(new AirbyteMessage() .withType(Type.STATE) .withState(new AirbyteStateMessage() - .withData(Jsons.jsonNode(new JdbcState() + .withData(Jsons.jsonNode(new DbState() .withCdc(false) - .withStreams(Lists.newArrayList(new JdbcStreamState() + .withStreams(Lists.newArrayList(new DbStreamState() .withStreamName(airbyteStream.getStream().getName()) .withStreamNamespace(airbyteStream.getStream().getNamespace()) .withCursorField(ImmutableList.of(cursorField)) diff --git a/airbyte-integrations/connectors/source-relational-db/build.gradle b/airbyte-integrations/connectors/source-relational-db/build.gradle index 1788edd7993b..307eda9ed13d 100644 --- a/airbyte-integrations/connectors/source-relational-db/build.gradle +++ b/airbyte-integrations/connectors/source-relational-db/build.gradle @@ -26,7 +26,7 @@ dependencies { jsonSchema2Pojo { sourceType = SourceType.YAMLSCHEMA - source = files("${sourceSets.main.output.resourcesDir}/jdbc_models") + source = files("${sourceSets.main.output.resourcesDir}/db_models") targetDirectory = new File(project.buildDir, 'generated/src/gen/java/') removeOldOutput = true diff --git a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/AbstractRelationalDbSource.java b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/AbstractRelationalDbSource.java index 75d3d037826a..2e52b90f4990 100644 --- a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/AbstractRelationalDbSource.java +++ b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/AbstractRelationalDbSource.java @@ -38,7 +38,7 @@ import io.airbyte.integrations.BaseConnector; import io.airbyte.integrations.base.AirbyteStreamNameNamespacePair; import io.airbyte.integrations.base.Source; -import io.airbyte.integrations.source.relationaldb.models.JdbcState; +import io.airbyte.integrations.source.relationaldb.models.DbState; import io.airbyte.protocol.models.AirbyteCatalog; import io.airbyte.protocol.models.AirbyteConnectionStatus; import io.airbyte.protocol.models.AirbyteConnectionStatus.Status; @@ -76,8 +76,74 @@ public abstract class AbstractRelationalDbSource> getCheckOperations(JsonNode config) throws Exception; + + /** + * Map source types and Airbyte types + * + * @param columnType source data type + * @return airbyte data type + */ + protected abstract JsonSchemaPrimitive getType(T columnType); + + /** + * Get list of system namespaces(schemas) in order to exclude them from the discover result list. + * + * @return + */ public abstract Set getExcludedInternalNameSpaces(); + /** + * Discover all available tables in the source database. + * + * @param database source database + * @return list of the source tables + * @throws Exception access to the database might lead to an exceptions. + */ + protected abstract List>> discoverInternal(final K database) + throws Exception; + + /** + * Discover Primary keys for each table and @return a map of namespace.table name to their + * associated list of primary key fields. + * + * @param database source database + * @param tableInfos list of tables + * @return map of namespace.table and primary key fields. + */ + protected abstract Map> discoverPrimaryKeys(K database, + List>> tableInfos); + + /** + * Returns quote symbol of the database + * + * @return quote symbol + */ + protected abstract String getQuoteString(); + @Override public AirbyteConnectionStatus check(JsonNode config) { try (final K database = createDatabaseInternal(config)) { @@ -94,22 +160,6 @@ public AirbyteConnectionStatus check(JsonNode config) { } } - /** - * Map a database implementation-specific configuration to json object that adheres to the - * AbstractJdbcSource config spec. See resources/spec.json. - * - * @param config database implementation-specific configuration. - * @return jdbc spec. - */ - public abstract JsonNode toDatabaseConfig(JsonNode config); - - /** - * Configures a list of operations that can be used to check the connection to the source. - * - * @return list of consumers that run queries for the check command. - */ - public abstract List> getCheckOperations(JsonNode config) throws Exception; - @Override public AirbyteCatalog discover(JsonNode config) throws Exception { try (final K database = createDatabaseInternal(config)) { @@ -126,7 +176,7 @@ public AirbyteCatalog discover(JsonNode config) throws Exception { @Override public AutoCloseableIterator read(JsonNode config, ConfiguredAirbyteCatalog catalog, JsonNode state) throws Exception { final StateManager stateManager = new StateManager( - state == null ? StateManager.emptyState() : Jsons.object(state, JdbcState.class), + state == null ? StateManager.emptyState() : Jsons.object(state, DbState.class), catalog); final Instant emittedAt = Instant.now(); @@ -183,12 +233,12 @@ public List> getFullRefreshIterators(K dat configuredStream -> configuredStream.getSyncMode().equals(SyncMode.FULL_REFRESH)); } - private List> getSelectedIterators(K database, - ConfiguredAirbyteCatalog catalog, - Map>> tableNameToTable, - StateManager stateManager, - Instant emittedAt, - Predicate selector) { + protected List> getSelectedIterators(K database, + ConfiguredAirbyteCatalog catalog, + Map>> tableNameToTable, + StateManager stateManager, + Instant emittedAt, + Predicate selector) { final List> iteratorList = new ArrayList<>(); for (final ConfiguredAirbyteStream airbyteStream : catalog.getStreams()) { if (selector.test(airbyteStream)) { @@ -213,11 +263,11 @@ private List> getSelectedIterators(K datab return iteratorList; } - private AutoCloseableIterator createReadIterator(K database, - ConfiguredAirbyteStream airbyteStream, - TableInfo> table, - StateManager stateManager, - Instant emittedAt) { + protected AutoCloseableIterator createReadIterator(K database, + ConfiguredAirbyteStream airbyteStream, + TableInfo> table, + StateManager stateManager, + Instant emittedAt) { final String streamName = airbyteStream.getStream().getName(); final String namespace = airbyteStream.getStream().getNamespace(); final AirbyteStreamNameNamespacePair pair = new AirbyteStreamNameNamespacePair(streamName, namespace); @@ -270,12 +320,12 @@ private AutoCloseableIterator createReadIterator(K database, }); } - private AutoCloseableIterator getIncrementalStream(K database, - ConfiguredAirbyteStream airbyteStream, - List selectedDatabaseFields, - TableInfo> table, - String cursor, - Instant emittedAt) { + protected AutoCloseableIterator getIncrementalStream(K database, + ConfiguredAirbyteStream airbyteStream, + List selectedDatabaseFields, + TableInfo> table, + String cursor, + Instant emittedAt) { final String streamName = airbyteStream.getStream().getName(); final String namespace = airbyteStream.getStream().getNamespace(); final String cursorField = IncrementalUtils.getCursorField(airbyteStream); @@ -300,24 +350,22 @@ private AutoCloseableIterator getIncrementalStream(K database, return getMessageIterator(queryIterator, streamName, namespace, emittedAt.toEpochMilli()); } - private AutoCloseableIterator getFullRefreshStream(K database, - String streamName, - String namespace, - List selectedDatabaseFields, - TableInfo> table, - Instant emittedAt) { + protected AutoCloseableIterator getFullRefreshStream(K database, + String streamName, + String namespace, + List selectedDatabaseFields, + TableInfo> table, + Instant emittedAt) { final AutoCloseableIterator queryStream = queryTableFullRefresh(database, selectedDatabaseFields, table.getNameSpace(), table.getName()); return getMessageIterator(queryStream, streamName, namespace, emittedAt.toEpochMilli()); } - protected abstract JsonSchemaPrimitive getType(T columnType); - protected String getFullyQualifiedTableName(String nameSpace, String tableName) { return nameSpace != null ? nameSpace + "." + tableName : tableName; } - private List> getTables(final K database) throws Exception { + protected List> getTables(final K database) throws Exception { final List>> tableInfos = discoverWithoutSystemTables(database); final Map> fullyQualifiedTableNameToPrimaryKeys = discoverPrimaryKeys(database, tableInfos); @@ -342,18 +390,6 @@ private List> getTables(final K database) throws Exception { .collect(Collectors.toList()); } - /** - * Discover Primary keys for each table and @return a map of schema.table name to their associated - * list of primary key fields. - * - * When invoking the conn.getMetaData().getPrimaryKeys() function without a table name, it may fail - * on some databases (for example MySql) but works on others (for instance Postgres). To avoid - * making repeated queries to the DB, we try to get all primary keys without specifying a table - * first, if it doesn't work, we retry one table at a time. - */ - protected abstract Map> discoverPrimaryKeys(K database, - List>> tableInfos); - protected void assertColumnsWithSameNameAreSame(String nameSpace, String tableName, List> columns) { columns.stream() .collect(Collectors.groupingBy(CommonField::getName)) @@ -378,29 +414,11 @@ protected List>> discoverWithoutSystemTables(final K da Collectors.toList())); } - protected abstract List>> discoverInternal(final K database) - throws Exception; - - public AutoCloseableIterator getMessageIterator(AutoCloseableIterator recordIterator, - String streamName, - String namespace, - long emittedAt) { - return AutoCloseableIterators.transform(recordIterator, r -> new AirbyteMessage() - .withType(Type.RECORD) - .withRecord(new AirbyteRecordMessage() - .withStream(streamName) - .withNamespace(namespace) - .withEmittedAt(emittedAt) - .withData(r))); - } - - protected abstract String getQuoteString(); - - private String getIdentifierWithQuoting(String identifier) { + protected String getIdentifierWithQuoting(String identifier) { return getQuoteString() + identifier + getQuoteString(); } - private String enquoteIdentifierList(List identifiers) { + protected String enquoteIdentifierList(List identifiers) { final StringJoiner joiner = new StringJoiner(","); for (String identifier : identifiers) { joiner.add(getIdentifierWithQuoting(identifier)); @@ -408,11 +426,24 @@ private String enquoteIdentifierList(List identifiers) { return joiner.toString(); } - private String getFullTableName(String nameSpace, String tableName) { + protected String getFullTableName(String nameSpace, String tableName) { return (nameSpace == null || nameSpace.isEmpty() ? getIdentifierWithQuoting(tableName) : getIdentifierWithQuoting(nameSpace) + "." + getIdentifierWithQuoting(tableName)); } + public AutoCloseableIterator getMessageIterator(AutoCloseableIterator recordIterator, + String streamName, + String namespace, + long emittedAt) { + return AutoCloseableIterators.transform(recordIterator, r -> new AirbyteMessage() + .withType(Type.RECORD) + .withRecord(new AirbyteRecordMessage() + .withStream(streamName) + .withNamespace(namespace) + .withEmittedAt(emittedAt) + .withData(r))); + } + protected AutoCloseableIterator queryTable(K database, String sqlQuery) { return AutoCloseableIterators.lazyIterator(() -> { try { @@ -449,6 +480,4 @@ private K createDatabaseInternal(JsonNode sourceConfig) throws Exception { return database; } - protected abstract K createDatabase(JsonNode config) throws Exception; - } diff --git a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/StateManager.java b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/StateManager.java index 75174ee739d1..f49884e5ba7d 100644 --- a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/StateManager.java +++ b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/StateManager.java @@ -30,8 +30,8 @@ import com.google.common.collect.Lists; import io.airbyte.commons.json.Jsons; import io.airbyte.integrations.base.AirbyteStreamNameNamespacePair; -import io.airbyte.integrations.source.relationaldb.models.JdbcState; -import io.airbyte.integrations.source.relationaldb.models.JdbcStreamState; +import io.airbyte.integrations.source.relationaldb.models.DbState; +import io.airbyte.integrations.source.relationaldb.models.DbStreamState; import io.airbyte.protocol.models.AirbyteStateMessage; import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; import io.airbyte.protocol.models.ConfiguredAirbyteStream; @@ -56,11 +56,11 @@ public class StateManager { private Boolean isCdc; private final CdcStateManager cdcStateManager; - public static JdbcState emptyState() { - return new JdbcState(); + public static DbState emptyState() { + return new DbState(); } - public StateManager(JdbcState serialized, ConfiguredAirbyteCatalog catalog) { + public StateManager(DbState serialized, ConfiguredAirbyteCatalog catalog) { this.cdcStateManager = new CdcStateManager(serialized.getCdcState()); this.isCdc = serialized.getCdc(); if (serialized.getCdc() == null) { @@ -71,7 +71,7 @@ public StateManager(JdbcState serialized, ConfiguredAirbyteCatalog catalog) { new ImmutableMap.Builder().putAll(createCursorInfoMap(serialized, catalog)).build(); } - private static Map createCursorInfoMap(JdbcState serialized, ConfiguredAirbyteCatalog catalog) { + private static Map createCursorInfoMap(DbState serialized, ConfiguredAirbyteCatalog catalog) { final Set allStreamNames = catalog.getStreams() .stream() .map(ConfiguredAirbyteStream::getStream) @@ -80,14 +80,14 @@ private static Map createCursorInfoM allStreamNames.addAll(serialized.getStreams().stream().map(StateManager::toAirbyteStreamNameNamespacePair).collect(Collectors.toSet())); final Map localMap = new HashMap<>(); - final Map pairToState = serialized.getStreams() + final Map pairToState = serialized.getStreams() .stream() .collect(Collectors.toMap(StateManager::toAirbyteStreamNameNamespacePair, a -> a)); final Map pairToConfiguredAirbyteStream = catalog.getStreams().stream() .collect(Collectors.toMap(AirbyteStreamNameNamespacePair::fromConfiguredAirbyteSteam, s -> s)); for (final AirbyteStreamNameNamespacePair pair : allStreamNames) { - final Optional stateOptional = Optional.ofNullable(pairToState.get(pair)); + final Optional stateOptional = Optional.ofNullable(pairToState.get(pair)); final Optional streamOptional = Optional.ofNullable(pairToConfiguredAirbyteStream.get(pair)); localMap.put(pair, createCursorInfoForStream(pair, stateOptional, streamOptional)); } @@ -95,20 +95,20 @@ private static Map createCursorInfoM return localMap; } - private static AirbyteStreamNameNamespacePair toAirbyteStreamNameNamespacePair(JdbcStreamState state) { + private static AirbyteStreamNameNamespacePair toAirbyteStreamNameNamespacePair(DbStreamState state) { return new AirbyteStreamNameNamespacePair(state.getStreamName(), state.getStreamNamespace()); } @VisibleForTesting @SuppressWarnings("OptionalUsedAsFieldOrParameterType") static CursorInfo createCursorInfoForStream(AirbyteStreamNameNamespacePair pair, - Optional stateOptional, + Optional stateOptional, Optional streamOptional) { final String originalCursorField = stateOptional - .map(JdbcStreamState::getCursorField) + .map(DbStreamState::getCursorField) .flatMap(f -> f.size() > 0 ? Optional.of(f.get(0)) : Optional.empty()) .orElse(null); - final String originalCursor = stateOptional.map(JdbcStreamState::getCursor).orElse(null); + final String originalCursor = stateOptional.map(DbStreamState::getCursor).orElse(null); final String cursor; final String cursorField; @@ -120,10 +120,10 @@ static CursorInfo createCursorInfoForStream(AirbyteStreamNameNamespacePair pair, .flatMap(f -> f.size() > 0 ? Optional.of(f.get(0)) : Optional.empty()) .orElse(null); // if cursor field is set in state. - if (stateOptional.map(JdbcStreamState::getCursorField).isPresent()) { + if (stateOptional.map(DbStreamState::getCursorField).isPresent()) { // if cursor field in catalog and state are the same. - if (stateOptional.map(JdbcStreamState::getCursorField).equals(streamOptional.map(ConfiguredAirbyteStream::getCursorField))) { - cursor = stateOptional.map(JdbcStreamState::getCursor).orElse(null); + if (stateOptional.map(DbStreamState::getCursorField).equals(streamOptional.map(ConfiguredAirbyteStream::getCursorField))) { + cursor = stateOptional.map(DbStreamState::getCursor).orElse(null); LOGGER.info("Found matching cursor in state. Stream: {}. Cursor Field: {} Value: {}", pair, cursorField, cursor); // if cursor field in catalog and state are different. } else { @@ -198,11 +198,11 @@ public AirbyteStateMessage emit() { } private AirbyteStateMessage toState() { - final JdbcState jdbcState = new JdbcState() + final DbState DbState = new DbState() .withCdc(isCdc) .withStreams(pairToCursorInfo.entrySet().stream() .sorted(Entry.comparingByKey()) // sort by stream name then namespace for sanity. - .map(e -> new JdbcStreamState() + .map(e -> new DbStreamState() .withStreamName(e.getKey().getName()) .withStreamNamespace(e.getKey().getNamespace()) .withCursorField(e.getValue().getCursorField() == null ? Collections.emptyList() : Lists.newArrayList(e.getValue().getCursorField())) @@ -210,7 +210,7 @@ private AirbyteStateMessage toState() { .collect(Collectors.toList())) .withCdcState(cdcStateManager.getCdcState()); - return new AirbyteStateMessage().withData(Jsons.jsonNode(jdbcState)); + return new AirbyteStateMessage().withData(Jsons.jsonNode(DbState)); } } diff --git a/airbyte-integrations/connectors/source-relational-db/src/main/resources/jdbc_models/jdbc_models.yaml b/airbyte-integrations/connectors/source-relational-db/src/main/resources/db_models/db_models.yaml similarity index 87% rename from airbyte-integrations/connectors/source-relational-db/src/main/resources/jdbc_models/jdbc_models.yaml rename to airbyte-integrations/connectors/source-relational-db/src/main/resources/db_models/db_models.yaml index ac3bee010351..d058b5e306ee 100644 --- a/airbyte-integrations/connectors/source-relational-db/src/main/resources/jdbc_models/jdbc_models.yaml +++ b/airbyte-integrations/connectors/source-relational-db/src/main/resources/db_models/db_models.yaml @@ -1,14 +1,14 @@ --- "$schema": http://json-schema.org/draft-07/schema# "$id": https://github.com/airbytehq/airbyte/blob/master/airbyte-protocol/models/src/main/resources/airbyte_protocol/airbyte_protocol.yaml -title: JdbcSourceModels +title: SourceModels type: object -description: Jdbc Source Models +description: Source Models properties: state: - "$ref": "#/definitions/JdbcState" + "$ref": "#/definitions/DbState" definitions: - JdbcState: + DbState: type: object properties: cdc: @@ -24,8 +24,8 @@ definitions: description: "State for each stream." type: array items: - "$ref": "#/definitions/JdbcStreamState" - JdbcStreamState: + "$ref": "#/definitions/DbStreamState" + DbStreamState: type: object additionalProperties: false required: diff --git a/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/StateManagerTest.java b/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/StateManagerTest.java index a92d22d1e8d5..5ba1aa0e8080 100644 --- a/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/StateManagerTest.java +++ b/airbyte-integrations/connectors/source-relational-db/src/test/java/io/airbyte/integrations/source/relationaldb/StateManagerTest.java @@ -28,8 +28,8 @@ import io.airbyte.commons.json.Jsons; import io.airbyte.integrations.base.AirbyteStreamNameNamespacePair; -import io.airbyte.integrations.source.relationaldb.models.JdbcState; -import io.airbyte.integrations.source.relationaldb.models.JdbcStreamState; +import io.airbyte.integrations.source.relationaldb.models.DbState; +import io.airbyte.integrations.source.relationaldb.models.DbStreamState; import io.airbyte.protocol.models.AirbyteStateMessage; import io.airbyte.protocol.models.AirbyteStream; import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; @@ -104,8 +104,8 @@ void testCreateCursorInfoStateAndCatalogButNoCursorField() { } @SuppressWarnings("SameParameterValue") - private static Optional getState(String cursorField, String cursor) { - return Optional.of(new JdbcStreamState() + private static Optional getState(String cursorField, String cursor) { + return Optional.of(new DbStreamState() .withStreamName(STREAM_NAME1) .withCursorField(Lists.newArrayList(cursorField)) .withCursor(cursor)); @@ -119,10 +119,10 @@ private static Optional getCatalog(String cursorField) @Test void testGetters() { - final JdbcState state = new JdbcState().withStreams(Lists.newArrayList( - new JdbcStreamState().withStreamName(STREAM_NAME1).withStreamNamespace(NAMESPACE).withCursorField(Lists.newArrayList(CURSOR_FIELD1)) + final DbState state = new DbState().withStreams(Lists.newArrayList( + new DbStreamState().withStreamName(STREAM_NAME1).withStreamNamespace(NAMESPACE).withCursorField(Lists.newArrayList(CURSOR_FIELD1)) .withCursor(CURSOR), - new JdbcStreamState().withStreamName(STREAM_NAME2).withStreamNamespace(NAMESPACE))); + new DbStreamState().withStreamName(STREAM_NAME2).withStreamNamespace(NAMESPACE))); final ConfiguredAirbyteCatalog catalog = new ConfiguredAirbyteCatalog() .withStreams(Lists.newArrayList( @@ -158,28 +158,28 @@ void testToState() { new ConfiguredAirbyteStream() .withStream(new AirbyteStream().withName(STREAM_NAME3).withNamespace(NAMESPACE)))); - final StateManager stateManager = new StateManager(new JdbcState(), catalog); + final StateManager stateManager = new StateManager(new DbState(), catalog); final AirbyteStateMessage expectedFirstEmission = new AirbyteStateMessage() - .withData(Jsons.jsonNode(new JdbcState().withStreams(Lists + .withData(Jsons.jsonNode(new DbState().withStreams(Lists .newArrayList( - new JdbcStreamState().withStreamName(STREAM_NAME1).withStreamNamespace(NAMESPACE).withCursorField(Lists.newArrayList(CURSOR_FIELD1)) + new DbStreamState().withStreamName(STREAM_NAME1).withStreamNamespace(NAMESPACE).withCursorField(Lists.newArrayList(CURSOR_FIELD1)) .withCursor("a"), - new JdbcStreamState().withStreamName(STREAM_NAME2).withStreamNamespace(NAMESPACE).withCursorField(Lists.newArrayList(CURSOR_FIELD2)), - new JdbcStreamState().withStreamName(STREAM_NAME3).withStreamNamespace(NAMESPACE)) - .stream().sorted(Comparator.comparing(JdbcStreamState::getStreamName)).collect(Collectors.toList())) + new DbStreamState().withStreamName(STREAM_NAME2).withStreamNamespace(NAMESPACE).withCursorField(Lists.newArrayList(CURSOR_FIELD2)), + new DbStreamState().withStreamName(STREAM_NAME3).withStreamNamespace(NAMESPACE)) + .stream().sorted(Comparator.comparing(DbStreamState::getStreamName)).collect(Collectors.toList())) .withCdc(false))); final AirbyteStateMessage actualFirstEmission = stateManager.updateAndEmit(NAME_NAMESPACE_PAIR1, "a"); assertEquals(expectedFirstEmission, actualFirstEmission); final AirbyteStateMessage expectedSecondEmission = new AirbyteStateMessage() - .withData(Jsons.jsonNode(new JdbcState().withStreams(Lists + .withData(Jsons.jsonNode(new DbState().withStreams(Lists .newArrayList( - new JdbcStreamState().withStreamName(STREAM_NAME1).withStreamNamespace(NAMESPACE).withCursorField(Lists.newArrayList(CURSOR_FIELD1)) + new DbStreamState().withStreamName(STREAM_NAME1).withStreamNamespace(NAMESPACE).withCursorField(Lists.newArrayList(CURSOR_FIELD1)) .withCursor("a"), - new JdbcStreamState().withStreamName(STREAM_NAME2).withStreamNamespace(NAMESPACE).withCursorField(Lists.newArrayList(CURSOR_FIELD2)) + new DbStreamState().withStreamName(STREAM_NAME2).withStreamNamespace(NAMESPACE).withCursorField(Lists.newArrayList(CURSOR_FIELD2)) .withCursor("b"), - new JdbcStreamState().withStreamName(STREAM_NAME3).withStreamNamespace(NAMESPACE)) - .stream().sorted(Comparator.comparing(JdbcStreamState::getStreamName)).collect(Collectors.toList())) + new DbStreamState().withStreamName(STREAM_NAME3).withStreamNamespace(NAMESPACE)) + .stream().sorted(Comparator.comparing(DbStreamState::getStreamName)).collect(Collectors.toList())) .withCdc(false))); final AirbyteStateMessage actualSecondEmission = stateManager.updateAndEmit(NAME_NAMESPACE_PAIR2, "b"); assertEquals(expectedSecondEmission, actualSecondEmission); @@ -194,15 +194,15 @@ void testToStateNullCursorField() { .withCursorField(Lists.newArrayList(CURSOR_FIELD1)), new ConfiguredAirbyteStream() .withStream(new AirbyteStream().withName(STREAM_NAME2).withNamespace(NAMESPACE)))); - final StateManager stateManager = new StateManager(new JdbcState(), catalog); + final StateManager stateManager = new StateManager(new DbState(), catalog); final AirbyteStateMessage expectedFirstEmission = new AirbyteStateMessage() - .withData(Jsons.jsonNode(new JdbcState().withStreams(Lists + .withData(Jsons.jsonNode(new DbState().withStreams(Lists .newArrayList( - new JdbcStreamState().withStreamName(STREAM_NAME1).withStreamNamespace(NAMESPACE).withCursorField(Lists.newArrayList(CURSOR_FIELD1)) + new DbStreamState().withStreamName(STREAM_NAME1).withStreamNamespace(NAMESPACE).withCursorField(Lists.newArrayList(CURSOR_FIELD1)) .withCursor("a"), - new JdbcStreamState().withStreamName(STREAM_NAME2).withStreamNamespace(NAMESPACE)) - .stream().sorted(Comparator.comparing(JdbcStreamState::getStreamName)).collect(Collectors.toList())) + new DbStreamState().withStreamName(STREAM_NAME2).withStreamNamespace(NAMESPACE)) + .stream().sorted(Comparator.comparing(DbStreamState::getStreamName)).collect(Collectors.toList())) .withCdc(false))); final AirbyteStateMessage actualFirstEmission = stateManager.updateAndEmit(NAME_NAMESPACE_PAIR1, "a"); From 3bc83b28430141de8e5fd45f148a3ec1b85a1853 Mon Sep 17 00:00:00 2001 From: Andrii Leonets Date: Thu, 17 Jun 2021 22:58:13 +0300 Subject: [PATCH 11/63] bonus renaming --- .../main/java/io/airbyte/db/{jdbc => }/SqlDatabase.java | 2 +- .../src/main/java/io/airbyte/db/jdbc/JdbcDatabase.java | 1 + .../integrations/source/jdbc/AbstractJdbcSource.java | 2 +- .../connectors/source-relational-db/Dockerfile | 4 ++-- .../connectors/source-relational-db/readme.md | 8 ++------ .../source/relationaldb/AbstractRelationalDbSource.java | 6 +++--- .../integrations/source/relationaldb/CdcStateManager.java | 2 +- .../integrations/source/relationaldb/StateManager.java | 2 +- 8 files changed, 12 insertions(+), 15 deletions(-) rename airbyte-db/src/main/java/io/airbyte/db/{jdbc => }/SqlDatabase.java (98%) diff --git a/airbyte-db/src/main/java/io/airbyte/db/jdbc/SqlDatabase.java b/airbyte-db/src/main/java/io/airbyte/db/SqlDatabase.java similarity index 98% rename from airbyte-db/src/main/java/io/airbyte/db/jdbc/SqlDatabase.java rename to airbyte-db/src/main/java/io/airbyte/db/SqlDatabase.java index 1e471441d3e7..a09738d3d734 100644 --- a/airbyte-db/src/main/java/io/airbyte/db/jdbc/SqlDatabase.java +++ b/airbyte-db/src/main/java/io/airbyte/db/SqlDatabase.java @@ -22,7 +22,7 @@ * SOFTWARE. */ -package io.airbyte.db.jdbc; +package io.airbyte.db; import com.fasterxml.jackson.databind.JsonNode; import java.sql.SQLException; diff --git a/airbyte-db/src/main/java/io/airbyte/db/jdbc/JdbcDatabase.java b/airbyte-db/src/main/java/io/airbyte/db/jdbc/JdbcDatabase.java index e830ae3efffe..c3e71f6ece56 100644 --- a/airbyte-db/src/main/java/io/airbyte/db/jdbc/JdbcDatabase.java +++ b/airbyte-db/src/main/java/io/airbyte/db/jdbc/JdbcDatabase.java @@ -27,6 +27,7 @@ import com.fasterxml.jackson.databind.JsonNode; import io.airbyte.commons.functional.CheckedConsumer; import io.airbyte.commons.functional.CheckedFunction; +import io.airbyte.db.SqlDatabase; import java.sql.Connection; import java.sql.DatabaseMetaData; import java.sql.PreparedStatement; diff --git a/airbyte-integrations/connectors/source-jdbc/src/main/java/io/airbyte/integrations/source/jdbc/AbstractJdbcSource.java b/airbyte-integrations/connectors/source-jdbc/src/main/java/io/airbyte/integrations/source/jdbc/AbstractJdbcSource.java index e03a25d5853d..7aae58c25414 100644 --- a/airbyte-integrations/connectors/source-jdbc/src/main/java/io/airbyte/integrations/source/jdbc/AbstractJdbcSource.java +++ b/airbyte-integrations/connectors/source-jdbc/src/main/java/io/airbyte/integrations/source/jdbc/AbstractJdbcSource.java @@ -32,10 +32,10 @@ import io.airbyte.commons.util.AutoCloseableIterator; import io.airbyte.commons.util.AutoCloseableIterators; import io.airbyte.db.Databases; +import io.airbyte.db.SqlDatabase; import io.airbyte.db.jdbc.JdbcDatabase; import io.airbyte.db.jdbc.JdbcStreamingQueryConfiguration; import io.airbyte.db.jdbc.JdbcUtils; -import io.airbyte.db.jdbc.SqlDatabase; import io.airbyte.integrations.base.Source; import io.airbyte.integrations.source.relationaldb.AbstractRelationalDbSource; import io.airbyte.integrations.source.relationaldb.TableInfo; diff --git a/airbyte-integrations/connectors/source-relational-db/Dockerfile b/airbyte-integrations/connectors/source-relational-db/Dockerfile index bf1d4c215c6d..0fa1991c49e5 100644 --- a/airbyte-integrations/connectors/source-relational-db/Dockerfile +++ b/airbyte-integrations/connectors/source-relational-db/Dockerfile @@ -2,11 +2,11 @@ FROM airbyte/integration-base-java:dev WORKDIR /airbyte -ENV APPLICATION source-jdbc +ENV APPLICATION source-relational-db COPY build/distributions/${APPLICATION}*.tar ${APPLICATION}.tar RUN tar xf ${APPLICATION}.tar --strip-components=1 LABEL io.airbyte.version=0.3.0 -LABEL io.airbyte.name=airbyte/source-jdbc +LABEL io.airbyte.name=airbyte/source-relational-db diff --git a/airbyte-integrations/connectors/source-relational-db/readme.md b/airbyte-integrations/connectors/source-relational-db/readme.md index 30ba2fa6dc6c..c84c4df734ab 100644 --- a/airbyte-integrations/connectors/source-relational-db/readme.md +++ b/airbyte-integrations/connectors/source-relational-db/readme.md @@ -1,7 +1,3 @@ -# JDBC Source +# Relational Database Source -We are not planning to expose this source in the UI yet. It serves as a base upon which we can build all of our other JDBC-compliant sources. - -The reasons we are not exposing this source by itself are: -1. It is not terribly user-friendly (jdbc urls are hard for a human to parse) -1. Each JDBC-compliant db, we need to make sure the appropriate drivers are installed on the image. We don't want to frontload installing all possible drivers, and instead would like to be more methodical. Instead for each JDBC-compliant source, we will extend this one and then install only the necessary JDBC drivers on that source's image. +We are not planning to expose this source in the UI yet. It serves as a base upon which we can build all of our other Relational Database-compliant sources. diff --git a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/AbstractRelationalDbSource.java b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/AbstractRelationalDbSource.java index 2e52b90f4990..4b38086824a3 100644 --- a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/AbstractRelationalDbSource.java +++ b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/AbstractRelationalDbSource.java @@ -34,7 +34,7 @@ import io.airbyte.commons.util.AutoCloseableIterator; import io.airbyte.commons.util.AutoCloseableIterators; import io.airbyte.db.IncrementalUtils; -import io.airbyte.db.jdbc.SqlDatabase; +import io.airbyte.db.SqlDatabase; import io.airbyte.integrations.BaseConnector; import io.airbyte.integrations.base.AirbyteStreamNameNamespacePair; import io.airbyte.integrations.base.Source; @@ -329,7 +329,7 @@ protected AutoCloseableIterator getIncrementalStream(K database, final String streamName = airbyteStream.getStream().getName(); final String namespace = airbyteStream.getStream().getNamespace(); final String cursorField = IncrementalUtils.getCursorField(airbyteStream); - final T cursorJdbcType = table.getFields().stream() + final T cursorType = table.getFields().stream() .filter(info -> info.getName().equals(cursorField)) .map(CommonField::getType) .findFirst() @@ -344,7 +344,7 @@ protected AutoCloseableIterator getIncrementalStream(K database, table.getNameSpace(), table.getName(), cursorField, - cursorJdbcType, + cursorType, cursor); return getMessageIterator(queryIterator, streamName, namespace, emittedAt.toEpochMilli()); diff --git a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/CdcStateManager.java b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/CdcStateManager.java index 030d4b7c47bb..f8f401870ab1 100644 --- a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/CdcStateManager.java +++ b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/CdcStateManager.java @@ -56,7 +56,7 @@ public CdcState getCdcState() { @Override public String toString() { - return "JdbcCdcStateManager{" + + return "CdcStateManager{" + "initialState=" + initialState + ", currentState=" + currentState + '}'; diff --git a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/StateManager.java b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/StateManager.java index f49884e5ba7d..45ea8e17c7b8 100644 --- a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/StateManager.java +++ b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/StateManager.java @@ -46,7 +46,7 @@ import org.slf4j.LoggerFactory; /** - * Handles the state machine for the state of jdbc source implementations. + * Handles the state machine for the state of source implementations. */ public class StateManager { From 18e86d8b11f35e957e4c49e2c3e0e194ae5295d3 Mon Sep 17 00:00:00 2001 From: Andrii Leonets Date: Fri, 18 Jun 2021 17:44:11 +0300 Subject: [PATCH 12/63] move utility methods specific for jdbc source to a proper module --- .../java/io/airbyte/db/jdbc/JdbcUtils.java | 62 +------ .../source/clickhouse/ClickHouseSource.java | 4 +- .../ClickHouseSourceAcceptanceTest.java | 6 +- .../source/jdbc/AbstractJdbcSource.java | 19 +- .../source/jdbc/JdbcSourceUtils.java | 168 ++++++++++++++++++ .../jdbc/test/JdbcSourceAcceptanceTest.java | 32 ++-- .../OracleJdbcSourceAcceptanceTest.java | 4 +- .../sources/RedshiftSourceAcceptanceTest.java | 4 +- .../AbstractRelationalDbSource.java | 13 ++ 9 files changed, 220 insertions(+), 92 deletions(-) create mode 100644 airbyte-integrations/connectors/source-jdbc/src/main/java/io/airbyte/integrations/source/jdbc/JdbcSourceUtils.java diff --git a/airbyte-db/src/main/java/io/airbyte/db/jdbc/JdbcUtils.java b/airbyte-db/src/main/java/io/airbyte/db/jdbc/JdbcUtils.java index fec8460395c1..cd153385c236 100644 --- a/airbyte-db/src/main/java/io/airbyte/db/jdbc/JdbcUtils.java +++ b/airbyte-db/src/main/java/io/airbyte/db/jdbc/JdbcUtils.java @@ -30,7 +30,6 @@ import io.airbyte.commons.json.Jsons; import io.airbyte.protocol.models.JsonSchemaPrimitive; import java.math.BigDecimal; -import java.sql.Connection; import java.sql.Date; import java.sql.JDBCType; import java.sql.PreparedStatement; @@ -42,10 +41,8 @@ import java.text.SimpleDateFormat; import java.time.Instant; import java.util.Collections; -import java.util.List; import java.util.Spliterator; import java.util.Spliterators; -import java.util.StringJoiner; import java.util.function.Consumer; import java.util.function.Function; import java.util.stream.Stream; @@ -54,7 +51,7 @@ public class JdbcUtils { - private static final DateFormat DATE_FORMAT = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'"); // Quoted "Z" to indicate UTC, no timezone offset + public static final DateFormat DATE_FORMAT = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'"); // Quoted "Z" to indicate UTC, no timezone offset /** * Map records returned in a result set. @@ -261,32 +258,6 @@ private static T nullIfInvalid(SQLSupplier valueProducer, Function { @@ -294,35 +265,4 @@ private interface SQLSupplier { } - /** - * Given a database connection and identifier, adds db-specific quoting. - * - * @param connection database connection - * @param identifier identifier to quote - * @return quoted identifier - * @throws SQLException throws if there are any issues fulling the quoting metadata from the db. - */ - public static String enquoteIdentifier(Connection connection, String identifier) throws SQLException { - final String identifierQuoteString = connection.getMetaData().getIdentifierQuoteString(); - - return identifierQuoteString + identifier + identifierQuoteString; - } - - /** - * Given a database connection and identifiers, adds db-specific quoting to each identifier. - * - * @param connection database connection - * @param identifiers identifiers to quote - * @return quoted identifiers - * @throws SQLException throws if there are any issues fulling the quoting metadata from the db. - */ - public static String enquoteIdentifierList(Connection connection, List identifiers) throws SQLException { - final StringJoiner joiner = new StringJoiner(","); - for (String col : identifiers) { - String s = JdbcUtils.enquoteIdentifier(connection, col); - joiner.add(s); - } - return joiner.toString(); - } - } diff --git a/airbyte-integrations/connectors/source-clickhouse/src/main/java/io/airbyte/integrations/source/clickhouse/ClickHouseSource.java b/airbyte-integrations/connectors/source-clickhouse/src/main/java/io/airbyte/integrations/source/clickhouse/ClickHouseSource.java index dacc68a2128d..6dbe139aa67f 100644 --- a/airbyte-integrations/connectors/source-clickhouse/src/main/java/io/airbyte/integrations/source/clickhouse/ClickHouseSource.java +++ b/airbyte-integrations/connectors/source-clickhouse/src/main/java/io/airbyte/integrations/source/clickhouse/ClickHouseSource.java @@ -28,11 +28,11 @@ import com.google.common.collect.ImmutableMap; import io.airbyte.commons.json.Jsons; import io.airbyte.db.jdbc.JdbcDatabase; -import io.airbyte.db.jdbc.JdbcUtils; import io.airbyte.db.jdbc.NoOpJdbcStreamingQueryConfiguration; import io.airbyte.integrations.base.IntegrationRunner; import io.airbyte.integrations.base.Source; import io.airbyte.integrations.source.jdbc.AbstractJdbcSource; +import io.airbyte.integrations.source.jdbc.JdbcSourceUtils; import io.airbyte.integrations.source.relationaldb.TableInfo; import io.airbyte.protocol.models.CommonField; import java.sql.JDBCType; @@ -61,7 +61,7 @@ protected Map> discoverPrimaryKeys(JdbcDatabase database, List>> tableInfos) { return tableInfos.stream() .collect(Collectors.toMap( - tableInfo -> JdbcUtils + tableInfo -> JdbcSourceUtils .getFullyQualifiedTableName(tableInfo.getNameSpace(), tableInfo.getName()), tableInfo -> { try { diff --git a/airbyte-integrations/connectors/source-clickhouse/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/ClickHouseSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-clickhouse/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/ClickHouseSourceAcceptanceTest.java index c1edc17600f5..245dc9daae86 100644 --- a/airbyte-integrations/connectors/source-clickhouse/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/ClickHouseSourceAcceptanceTest.java +++ b/airbyte-integrations/connectors/source-clickhouse/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/ClickHouseSourceAcceptanceTest.java @@ -31,8 +31,8 @@ import io.airbyte.commons.resources.MoreResources; import io.airbyte.db.Databases; import io.airbyte.db.jdbc.JdbcDatabase; -import io.airbyte.db.jdbc.JdbcUtils; import io.airbyte.integrations.source.clickhouse.ClickHouseSource; +import io.airbyte.integrations.source.jdbc.JdbcSourceUtils; import io.airbyte.integrations.standardtest.source.SourceAcceptanceTest; import io.airbyte.integrations.standardtest.source.TestDestinationEnv; import io.airbyte.protocol.models.CatalogHelpers; @@ -126,10 +126,10 @@ protected void setupEnvironment(TestDestinationEnv environment) throws Exception config.get("database").asText()), ClickHouseSource.DRIVER_CLASS); - final String table1 = JdbcUtils.getFullyQualifiedTableName(SCHEMA_NAME, STREAM_NAME); + final String table1 = JdbcSourceUtils.getFullyQualifiedTableName(SCHEMA_NAME, STREAM_NAME); final String createTable1 = String.format("CREATE TABLE IF NOT EXISTS %s (id INTEGER, name VARCHAR(200)) ENGINE = TinyLog \n", table1); - final String table2 = JdbcUtils.getFullyQualifiedTableName(SCHEMA_NAME, STREAM_NAME2); + final String table2 = JdbcSourceUtils.getFullyQualifiedTableName(SCHEMA_NAME, STREAM_NAME2); final String createTable2 = String.format("CREATE TABLE IF NOT EXISTS %s (id INTEGER, name VARCHAR(200)) ENGINE = TinyLog \n", table2); database.execute(connection -> { diff --git a/airbyte-integrations/connectors/source-jdbc/src/main/java/io/airbyte/integrations/source/jdbc/AbstractJdbcSource.java b/airbyte-integrations/connectors/source-jdbc/src/main/java/io/airbyte/integrations/source/jdbc/AbstractJdbcSource.java index 7aae58c25414..e0ee7d023028 100644 --- a/airbyte-integrations/connectors/source-jdbc/src/main/java/io/airbyte/integrations/source/jdbc/AbstractJdbcSource.java +++ b/airbyte-integrations/connectors/source-jdbc/src/main/java/io/airbyte/integrations/source/jdbc/AbstractJdbcSource.java @@ -168,7 +168,7 @@ public List>> discoverInternal(final JdbcDatabas @Override protected JsonSchemaPrimitive getType(JDBCType columnType) { - return JdbcUtils.getType(columnType); + return JdbcSourceUtils.getType(columnType); } @Override @@ -181,7 +181,7 @@ protected Map> discoverPrimaryKeys(JdbcDatabase database, r -> { final String schemaName = r.getObject(JDBC_COLUMN_SCHEMA_NAME) != null ? r.getString(JDBC_COLUMN_SCHEMA_NAME) : r.getString(JDBC_COLUMN_DATABASE_NAME); - final String streamName = JdbcUtils.getFullyQualifiedTableName(schemaName, r.getString(JDBC_COLUMN_TABLE_NAME)); + final String streamName = JdbcSourceUtils.getFullyQualifiedTableName(schemaName, r.getString(JDBC_COLUMN_TABLE_NAME)); final String primaryKey = r.getString(JDBC_COLUMN_COLUMN_NAME); return new SimpleImmutableEntry<>(streamName, primaryKey); })); @@ -194,9 +194,11 @@ protected Map> discoverPrimaryKeys(JdbcDatabase database, // Get primary keys one table at a time return tableInfos.stream() .collect(Collectors.toMap( - tableInfo -> JdbcUtils.getFullyQualifiedTableName(tableInfo.getNameSpace(), tableInfo.getName()), + tableInfo -> JdbcSourceUtils + .getFullyQualifiedTableName(tableInfo.getNameSpace(), tableInfo.getName()), tableInfo -> { - final String streamName = JdbcUtils.getFullyQualifiedTableName(tableInfo.getNameSpace(), tableInfo.getName()); + final String streamName = JdbcSourceUtils + .getFullyQualifiedTableName(tableInfo.getNameSpace(), tableInfo.getName()); try { final Map> primaryKeys = aggregatePrimateKeys(database.bufferedResultSetQuery( conn -> conn.getMetaData().getPrimaryKeys(getCatalog(database), tableInfo.getNameSpace(), tableInfo.getName()), @@ -229,12 +231,13 @@ public AutoCloseableIterator queryTableIncremental(JdbcDatabase databa connection -> { LOGGER.info("Preparing query for table: {}", tableName); final String sql = String.format("SELECT %s FROM %s WHERE %s > ?", - JdbcUtils.enquoteIdentifierList(connection, columnNames), - JdbcUtils.getFullyQualifiedTableNameWithQuoting(connection, schemaName, tableName), - JdbcUtils.enquoteIdentifier(connection, cursorField)); + JdbcSourceUtils.enquoteIdentifierList(connection, columnNames), + JdbcSourceUtils + .getFullyQualifiedTableNameWithQuoting(connection, schemaName, tableName), + JdbcSourceUtils.enquoteIdentifier(connection, cursorField)); final PreparedStatement preparedStatement = connection.prepareStatement(sql); - JdbcUtils.setStatementField(preparedStatement, 1, cursorFieldType, cursor); + JdbcSourceUtils.setStatementField(preparedStatement, 1, cursorFieldType, cursor); LOGGER.info("Executing query for table: {}", tableName); return preparedStatement; }, diff --git a/airbyte-integrations/connectors/source-jdbc/src/main/java/io/airbyte/integrations/source/jdbc/JdbcSourceUtils.java b/airbyte-integrations/connectors/source-jdbc/src/main/java/io/airbyte/integrations/source/jdbc/JdbcSourceUtils.java new file mode 100644 index 000000000000..741d95e07b97 --- /dev/null +++ b/airbyte-integrations/connectors/source-jdbc/src/main/java/io/airbyte/integrations/source/jdbc/JdbcSourceUtils.java @@ -0,0 +1,168 @@ +/* + * MIT License + * + * Copyright (c) 2020 Airbyte + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package io.airbyte.integrations.source.jdbc; + +import io.airbyte.db.jdbc.JdbcUtils; +import io.airbyte.protocol.models.JsonSchemaPrimitive; +import java.math.BigDecimal; +import java.sql.Connection; +import java.sql.Date; +import java.sql.JDBCType; +import java.sql.PreparedStatement; +import java.sql.SQLException; +import java.sql.Timestamp; +import java.text.ParseException; +import java.util.List; +import java.util.StringJoiner; +import javax.xml.bind.DatatypeConverter; + +public class JdbcSourceUtils { + + public static void setStatementField(PreparedStatement preparedStatement, + int parameterIndex, + JDBCType cursorFieldType, + String value) + throws SQLException { + switch (cursorFieldType) { + // parse time, and timestamp the same way. this seems to not cause an problems and allows us + // to treat them all as ISO8601. if this causes any problems down the line, we can adjust. + // Parsing TIME as a TIMESTAMP might potentially break for ClickHouse cause it doesn't expect TIME + // value in the following format + case TIME, TIMESTAMP -> { + try { + preparedStatement.setTimestamp(parameterIndex, Timestamp + .from(JdbcUtils.DATE_FORMAT.parse(value).toInstant())); + } catch (ParseException e) { + throw new RuntimeException(e); + } + } + + case DATE -> { + try { + Timestamp from = Timestamp.from(JdbcUtils.DATE_FORMAT.parse(value).toInstant()); + preparedStatement.setDate(parameterIndex, new Date(from.getTime())); + } catch (ParseException e) { + throw new RuntimeException(e); + } + } + // todo (cgardens) - currently we do not support bit because it requires special handling in the + // prepared statement. + // see + // https://www.postgresql-archive.org/Problems-with-BIT-datatype-and-preparedStatment-td5733533.html. + // case BIT -> preparedStatement.setString(parameterIndex, value); + case BOOLEAN -> preparedStatement.setBoolean(parameterIndex, Boolean.parseBoolean(value)); + case TINYINT, SMALLINT -> preparedStatement.setShort(parameterIndex, Short.parseShort(value)); + case INTEGER -> preparedStatement.setInt(parameterIndex, Integer.parseInt(value)); + case BIGINT -> preparedStatement.setLong(parameterIndex, Long.parseLong(value)); + case FLOAT, DOUBLE -> preparedStatement.setDouble(parameterIndex, Double.parseDouble(value)); + case REAL -> preparedStatement.setFloat(parameterIndex, Float.parseFloat(value)); + case NUMERIC, DECIMAL -> preparedStatement.setBigDecimal(parameterIndex, new BigDecimal(value)); + case CHAR, NCHAR, NVARCHAR, VARCHAR, LONGVARCHAR -> preparedStatement.setString(parameterIndex, value); + case BINARY, BLOB -> preparedStatement.setBytes(parameterIndex, DatatypeConverter.parseHexBinary(value)); + // since cursor are expected to be comparable, handle cursor typing strictly and error on + // unrecognized types + default -> throw new IllegalArgumentException(String.format("%s is not supported.", cursorFieldType)); + } + } + + /** + * Given a database connection and identifiers, adds db-specific quoting to each identifier. + * + * @param connection database connection + * @param identifiers identifiers to quote + * @return quoted identifiers + * @throws SQLException throws if there are any issues fulling the quoting metadata from the db. + */ + public static String enquoteIdentifierList(Connection connection, List identifiers) throws SQLException { + final StringJoiner joiner = new StringJoiner(","); + for (String col : identifiers) { + String s = enquoteIdentifier(connection, col); + joiner.add(s); + } + return joiner.toString(); + } + + /** + * Given a database connection and identifier, adds db-specific quoting. + * + * @param connection database connection + * @param identifier identifier to quote + * @return quoted identifier + * @throws SQLException throws if there are any issues fulling the quoting metadata from the db. + */ + public static String enquoteIdentifier(Connection connection, String identifier) throws SQLException { + final String identifierQuoteString = connection.getMetaData().getIdentifierQuoteString(); + + return identifierQuoteString + identifier + identifierQuoteString; + } + + /** + * Create a fully qualified table name (including schema). e.g. public.my_table + * + * @param schemaName name of schema, if exists (CAN BE NULL) + * @param tableName name of the table + * @return fully qualified table name + */ + public static String getFullyQualifiedTableName(String schemaName, String tableName) { + return schemaName != null ? schemaName + "." + tableName : tableName; + } + + /** + * Create a fully qualified table name (including schema) with db-specific quoted syntax. e.g. + * "public"."my_table" + * + * @param connection connection to jdbc database (gives access to proper quotes) + * @param schemaName name of schema, if exists (CAN BE NULL) + * @param tableName name of the table + * @return fully qualified table name, using db-specific quoted syntax + * @throws SQLException throws if fails to pull correct quote character. + */ + public static String getFullyQualifiedTableNameWithQuoting(Connection connection, String schemaName, String tableName) throws SQLException { + final String quotedTableName = enquoteIdentifier(connection, tableName); + return schemaName != null ? enquoteIdentifier(connection, schemaName) + "." + quotedTableName : quotedTableName; + } + + @SuppressWarnings("DuplicateBranchesInSwitch") + public static JsonSchemaPrimitive getType(JDBCType jdbcType) { + return switch (jdbcType) { + case BIT, BOOLEAN -> JsonSchemaPrimitive.BOOLEAN; + case TINYINT, SMALLINT -> JsonSchemaPrimitive.NUMBER; + case INTEGER -> JsonSchemaPrimitive.NUMBER; + case BIGINT -> JsonSchemaPrimitive.NUMBER; + case FLOAT, DOUBLE -> JsonSchemaPrimitive.NUMBER; + case REAL -> JsonSchemaPrimitive.NUMBER; + case NUMERIC, DECIMAL -> JsonSchemaPrimitive.NUMBER; + case CHAR, NCHAR, NVARCHAR, VARCHAR, LONGVARCHAR -> JsonSchemaPrimitive.STRING; + case DATE -> JsonSchemaPrimitive.STRING; + case TIME -> JsonSchemaPrimitive.STRING; + case TIMESTAMP -> JsonSchemaPrimitive.STRING; + case BLOB, BINARY, VARBINARY, LONGVARBINARY -> JsonSchemaPrimitive.STRING; + // since column types aren't necessarily meaningful to Airbyte, liberally convert all unrecgonised + // types to String + default -> JsonSchemaPrimitive.STRING; + }; + } + +} diff --git a/airbyte-integrations/connectors/source-jdbc/src/testFixtures/java/io/airbyte/integrations/source/jdbc/test/JdbcSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-jdbc/src/testFixtures/java/io/airbyte/integrations/source/jdbc/test/JdbcSourceAcceptanceTest.java index e086700faaf5..62a652ac07ef 100644 --- a/airbyte-integrations/connectors/source-jdbc/src/testFixtures/java/io/airbyte/integrations/source/jdbc/test/JdbcSourceAcceptanceTest.java +++ b/airbyte-integrations/connectors/source-jdbc/src/testFixtures/java/io/airbyte/integrations/source/jdbc/test/JdbcSourceAcceptanceTest.java @@ -41,8 +41,8 @@ import io.airbyte.commons.util.MoreIterators; import io.airbyte.db.Databases; import io.airbyte.db.jdbc.JdbcDatabase; -import io.airbyte.db.jdbc.JdbcUtils; import io.airbyte.integrations.source.jdbc.AbstractJdbcSource; +import io.airbyte.integrations.source.jdbc.JdbcSourceUtils; import io.airbyte.integrations.source.relationaldb.models.DbState; import io.airbyte.integrations.source.relationaldb.models.DbStreamState; import io.airbyte.protocol.models.AirbyteCatalog; @@ -302,16 +302,16 @@ void testDiscoverWithMultipleSchemas() throws Exception { database.execute(connection -> { connection.createStatement().execute( String.format("CREATE TABLE %s(id VARCHAR(200), name VARCHAR(200))", - JdbcUtils.getFullyQualifiedTableName(SCHEMA_NAME2, TABLE_NAME))); + JdbcSourceUtils.getFullyQualifiedTableName(SCHEMA_NAME2, TABLE_NAME))); connection.createStatement() .execute(String.format("INSERT INTO %s(id, name) VALUES ('1','picard')", - JdbcUtils.getFullyQualifiedTableName(SCHEMA_NAME2, TABLE_NAME))); + JdbcSourceUtils.getFullyQualifiedTableName(SCHEMA_NAME2, TABLE_NAME))); connection.createStatement() .execute(String.format("INSERT INTO %s(id, name) VALUES ('2', 'crusher')", - JdbcUtils.getFullyQualifiedTableName(SCHEMA_NAME2, TABLE_NAME))); + JdbcSourceUtils.getFullyQualifiedTableName(SCHEMA_NAME2, TABLE_NAME))); connection.createStatement() .execute(String.format("INSERT INTO %s(id, name) VALUES ('3', 'vash')", - JdbcUtils.getFullyQualifiedTableName(SCHEMA_NAME2, TABLE_NAME))); + JdbcSourceUtils.getFullyQualifiedTableName(SCHEMA_NAME2, TABLE_NAME))); }); final AirbyteCatalog actual = source.discover(config); @@ -840,22 +840,26 @@ private ConfiguredAirbyteStream createTableWithSpaces() throws SQLException { connection.createStatement() .execute( createTableQuery(getFullyQualifiedTableName( - JdbcUtils.enquoteIdentifier(connection, tableNameWithSpaces)), - "id INTEGER, " + JdbcUtils.enquoteIdentifier(connection, COL_LAST_NAME_WITH_SPACE) + JdbcSourceUtils.enquoteIdentifier(connection, tableNameWithSpaces)), + "id INTEGER, " + JdbcSourceUtils + .enquoteIdentifier(connection, COL_LAST_NAME_WITH_SPACE) + " VARCHAR(200)", "")); connection.createStatement() .execute(String.format("INSERT INTO %s(id, %s) VALUES (1,'picard')", - getFullyQualifiedTableName(JdbcUtils.enquoteIdentifier(connection, tableNameWithSpaces)), - JdbcUtils.enquoteIdentifier(connection, COL_LAST_NAME_WITH_SPACE))); + getFullyQualifiedTableName( + JdbcSourceUtils.enquoteIdentifier(connection, tableNameWithSpaces)), + JdbcSourceUtils.enquoteIdentifier(connection, COL_LAST_NAME_WITH_SPACE))); connection.createStatement() .execute(String.format("INSERT INTO %s(id, %s) VALUES (2, 'crusher')", - getFullyQualifiedTableName(JdbcUtils.enquoteIdentifier(connection, tableNameWithSpaces)), - JdbcUtils.enquoteIdentifier(connection, COL_LAST_NAME_WITH_SPACE))); + getFullyQualifiedTableName( + JdbcSourceUtils.enquoteIdentifier(connection, tableNameWithSpaces)), + JdbcSourceUtils.enquoteIdentifier(connection, COL_LAST_NAME_WITH_SPACE))); connection.createStatement() .execute(String.format("INSERT INTO %s(id, %s) VALUES (3, 'vash')", - getFullyQualifiedTableName(JdbcUtils.enquoteIdentifier(connection, tableNameWithSpaces)), - JdbcUtils.enquoteIdentifier(connection, COL_LAST_NAME_WITH_SPACE))); + getFullyQualifiedTableName( + JdbcSourceUtils.enquoteIdentifier(connection, tableNameWithSpaces)), + JdbcSourceUtils.enquoteIdentifier(connection, COL_LAST_NAME_WITH_SPACE))); }); return CatalogHelpers.createConfiguredAirbyteStream( @@ -866,7 +870,7 @@ private ConfiguredAirbyteStream createTableWithSpaces() throws SQLException { } public String getFullyQualifiedTableName(String tableName) { - return JdbcUtils.getFullyQualifiedTableName(getDefaultSchemaName(), tableName); + return JdbcSourceUtils.getFullyQualifiedTableName(getDefaultSchemaName(), tableName); } public void createSchemas() throws SQLException { diff --git a/airbyte-integrations/connectors/source-oracle/src/test/java/io/airbyte/integrations/source/oracle/OracleJdbcSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-oracle/src/test/java/io/airbyte/integrations/source/oracle/OracleJdbcSourceAcceptanceTest.java index 5bc6a13abce5..597718314204 100644 --- a/airbyte-integrations/connectors/source-oracle/src/test/java/io/airbyte/integrations/source/oracle/OracleJdbcSourceAcceptanceTest.java +++ b/airbyte-integrations/connectors/source-oracle/src/test/java/io/airbyte/integrations/source/oracle/OracleJdbcSourceAcceptanceTest.java @@ -28,8 +28,8 @@ import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; import io.airbyte.commons.json.Jsons; -import io.airbyte.db.jdbc.JdbcUtils; import io.airbyte.integrations.source.jdbc.AbstractJdbcSource; +import io.airbyte.integrations.source.jdbc.JdbcSourceUtils; import io.airbyte.integrations.source.jdbc.test.JdbcSourceAcceptanceTest; import java.math.BigDecimal; import java.sql.Connection; @@ -118,7 +118,7 @@ void cleanUpTables() throws SQLException { ResultSet resultSet = conn.createStatement().executeQuery(String.format("SELECT TABLE_NAME FROM ALL_TABLES WHERE OWNER = '%s'", schemaName)); while (resultSet.next()) { String tableName = resultSet.getString("TABLE_NAME"); - String tableNameProcessed = tableName.contains(" ") ? JdbcUtils + String tableNameProcessed = tableName.contains(" ") ? JdbcSourceUtils .enquoteIdentifier(conn, tableName) : tableName; conn.createStatement().executeQuery(String.format("DROP TABLE %s.%s", schemaName, tableNameProcessed)); } diff --git a/airbyte-integrations/connectors/source-redshift/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/RedshiftSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-redshift/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/RedshiftSourceAcceptanceTest.java index 1b70cbd471d0..9645ef0672f1 100644 --- a/airbyte-integrations/connectors/source-redshift/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/RedshiftSourceAcceptanceTest.java +++ b/airbyte-integrations/connectors/source-redshift/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/RedshiftSourceAcceptanceTest.java @@ -31,7 +31,7 @@ import io.airbyte.commons.string.Strings; import io.airbyte.db.Databases; import io.airbyte.db.jdbc.JdbcDatabase; -import io.airbyte.db.jdbc.JdbcUtils; +import io.airbyte.integrations.source.jdbc.JdbcSourceUtils; import io.airbyte.integrations.source.redshift.RedshiftSource; import io.airbyte.integrations.standardtest.source.SourceAcceptanceTest; import io.airbyte.integrations.standardtest.source.TestDestinationEnv; @@ -78,7 +78,7 @@ protected void setupEnvironment(TestDestinationEnv environment) throws Exception }); streamName = "customer"; - final String fqTableName = JdbcUtils.getFullyQualifiedTableName(schemaName, streamName); + final String fqTableName = JdbcSourceUtils.getFullyQualifiedTableName(schemaName, streamName); String createTestTable = String.format("CREATE TABLE IF NOT EXISTS %s (c_custkey INTEGER, c_name VARCHAR(16), c_nation VARCHAR(16));\n", fqTableName); database.execute(connection -> { diff --git a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/AbstractRelationalDbSource.java b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/AbstractRelationalDbSource.java index 4b38086824a3..65043fe875c0 100644 --- a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/AbstractRelationalDbSource.java +++ b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/AbstractRelationalDbSource.java @@ -465,6 +465,19 @@ public AutoCloseableIterator queryTableFullRefresh(K database, getFullTableName(schemaName, tableName))); } + /** + * Read incremental data from a table. Incremental read should returns only records where cursor + * column value is bigger than cursor. + * + * @param database source database + * @param columnNames interested column names + * @param schemaName table namespace + * @param tableName target table + * @param cursorField cursor field name + * @param cursorFieldType cursor field type + * @param cursor cursor value + * @return iterator with read data + */ public abstract AutoCloseableIterator queryTableIncremental(K database, List columnNames, String schemaName, From 67f1e6af5caf0d0a865d612eca4d45203dad318e Mon Sep 17 00:00:00 2001 From: Andrii Leonets Date: Tue, 22 Jun 2021 22:31:12 +0300 Subject: [PATCH 13/63] internal review update --- .../source/clickhouse/ClickHouseSource.java | 4 +-- .../ClickHouseSourceAcceptanceTest.java | 6 ++-- .../connectors/source-jdbc/build.gradle | 1 - .../source/jdbc/AbstractJdbcSource.java | 18 ++++++------ ...cSourceUtils.java => SourceJdbcUtils.java} | 2 +- .../jdbc/test/JdbcSourceAcceptanceTest.java | 28 +++++++++---------- .../OracleJdbcSourceAcceptanceTest.java | 4 +-- .../sources/RedshiftSourceAcceptanceTest.java | 4 +-- 8 files changed, 34 insertions(+), 33 deletions(-) rename airbyte-integrations/connectors/source-jdbc/src/main/java/io/airbyte/integrations/source/jdbc/{JdbcSourceUtils.java => SourceJdbcUtils.java} (99%) diff --git a/airbyte-integrations/connectors/source-clickhouse/src/main/java/io/airbyte/integrations/source/clickhouse/ClickHouseSource.java b/airbyte-integrations/connectors/source-clickhouse/src/main/java/io/airbyte/integrations/source/clickhouse/ClickHouseSource.java index 6dbe139aa67f..f89969edf806 100644 --- a/airbyte-integrations/connectors/source-clickhouse/src/main/java/io/airbyte/integrations/source/clickhouse/ClickHouseSource.java +++ b/airbyte-integrations/connectors/source-clickhouse/src/main/java/io/airbyte/integrations/source/clickhouse/ClickHouseSource.java @@ -32,7 +32,7 @@ import io.airbyte.integrations.base.IntegrationRunner; import io.airbyte.integrations.base.Source; import io.airbyte.integrations.source.jdbc.AbstractJdbcSource; -import io.airbyte.integrations.source.jdbc.JdbcSourceUtils; +import io.airbyte.integrations.source.jdbc.SourceJdbcUtils; import io.airbyte.integrations.source.relationaldb.TableInfo; import io.airbyte.protocol.models.CommonField; import java.sql.JDBCType; @@ -61,7 +61,7 @@ protected Map> discoverPrimaryKeys(JdbcDatabase database, List>> tableInfos) { return tableInfos.stream() .collect(Collectors.toMap( - tableInfo -> JdbcSourceUtils + tableInfo -> SourceJdbcUtils .getFullyQualifiedTableName(tableInfo.getNameSpace(), tableInfo.getName()), tableInfo -> { try { diff --git a/airbyte-integrations/connectors/source-clickhouse/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/ClickHouseSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-clickhouse/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/ClickHouseSourceAcceptanceTest.java index 245dc9daae86..41694d3573db 100644 --- a/airbyte-integrations/connectors/source-clickhouse/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/ClickHouseSourceAcceptanceTest.java +++ b/airbyte-integrations/connectors/source-clickhouse/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/ClickHouseSourceAcceptanceTest.java @@ -32,7 +32,7 @@ import io.airbyte.db.Databases; import io.airbyte.db.jdbc.JdbcDatabase; import io.airbyte.integrations.source.clickhouse.ClickHouseSource; -import io.airbyte.integrations.source.jdbc.JdbcSourceUtils; +import io.airbyte.integrations.source.jdbc.SourceJdbcUtils; import io.airbyte.integrations.standardtest.source.SourceAcceptanceTest; import io.airbyte.integrations.standardtest.source.TestDestinationEnv; import io.airbyte.protocol.models.CatalogHelpers; @@ -126,10 +126,10 @@ protected void setupEnvironment(TestDestinationEnv environment) throws Exception config.get("database").asText()), ClickHouseSource.DRIVER_CLASS); - final String table1 = JdbcSourceUtils.getFullyQualifiedTableName(SCHEMA_NAME, STREAM_NAME); + final String table1 = SourceJdbcUtils.getFullyQualifiedTableName(SCHEMA_NAME, STREAM_NAME); final String createTable1 = String.format("CREATE TABLE IF NOT EXISTS %s (id INTEGER, name VARCHAR(200)) ENGINE = TinyLog \n", table1); - final String table2 = JdbcSourceUtils.getFullyQualifiedTableName(SCHEMA_NAME, STREAM_NAME2); + final String table2 = SourceJdbcUtils.getFullyQualifiedTableName(SCHEMA_NAME, STREAM_NAME2); final String createTable2 = String.format("CREATE TABLE IF NOT EXISTS %s (id INTEGER, name VARCHAR(200)) ENGINE = TinyLog \n", table2); database.execute(connection -> { diff --git a/airbyte-integrations/connectors/source-jdbc/build.gradle b/airbyte-integrations/connectors/source-jdbc/build.gradle index 7e384bf074f8..955274a099d2 100644 --- a/airbyte-integrations/connectors/source-jdbc/build.gradle +++ b/airbyte-integrations/connectors/source-jdbc/build.gradle @@ -36,7 +36,6 @@ dependencies { testFixturesImplementation project(':airbyte-protocol:models') testFixturesImplementation project(':airbyte-db') testFixturesImplementation project(':airbyte-integrations:bases:base-java') -// testFixturesImplementation project(':airbyte-integrations:connectors:source-jdbc') // todo (cgardens) - the java-test-fixtures plugin doesn't by default extend from test. // we cannot make it depend on the dependencies of source-jdbc:test, because source-jdbc:test diff --git a/airbyte-integrations/connectors/source-jdbc/src/main/java/io/airbyte/integrations/source/jdbc/AbstractJdbcSource.java b/airbyte-integrations/connectors/source-jdbc/src/main/java/io/airbyte/integrations/source/jdbc/AbstractJdbcSource.java index e0ee7d023028..bab7a06a434b 100644 --- a/airbyte-integrations/connectors/source-jdbc/src/main/java/io/airbyte/integrations/source/jdbc/AbstractJdbcSource.java +++ b/airbyte-integrations/connectors/source-jdbc/src/main/java/io/airbyte/integrations/source/jdbc/AbstractJdbcSource.java @@ -168,12 +168,14 @@ public List>> discoverInternal(final JdbcDatabas @Override protected JsonSchemaPrimitive getType(JDBCType columnType) { - return JdbcSourceUtils.getType(columnType); + return SourceJdbcUtils.getType(columnType); } @Override protected Map> discoverPrimaryKeys(JdbcDatabase database, List>> tableInfos) { + LOGGER.info("Discover primary keys for tables: " + tableInfos.stream().map(tab -> tab.getName()).collect( + Collectors.toSet())); try { // Get all primary keys without specifying a table name final Map> tablePrimaryKeys = aggregatePrimateKeys(database.bufferedResultSetQuery( @@ -181,7 +183,7 @@ protected Map> discoverPrimaryKeys(JdbcDatabase database, r -> { final String schemaName = r.getObject(JDBC_COLUMN_SCHEMA_NAME) != null ? r.getString(JDBC_COLUMN_SCHEMA_NAME) : r.getString(JDBC_COLUMN_DATABASE_NAME); - final String streamName = JdbcSourceUtils.getFullyQualifiedTableName(schemaName, r.getString(JDBC_COLUMN_TABLE_NAME)); + final String streamName = SourceJdbcUtils.getFullyQualifiedTableName(schemaName, r.getString(JDBC_COLUMN_TABLE_NAME)); final String primaryKey = r.getString(JDBC_COLUMN_COLUMN_NAME); return new SimpleImmutableEntry<>(streamName, primaryKey); })); @@ -194,10 +196,10 @@ protected Map> discoverPrimaryKeys(JdbcDatabase database, // Get primary keys one table at a time return tableInfos.stream() .collect(Collectors.toMap( - tableInfo -> JdbcSourceUtils + tableInfo -> SourceJdbcUtils .getFullyQualifiedTableName(tableInfo.getNameSpace(), tableInfo.getName()), tableInfo -> { - final String streamName = JdbcSourceUtils + final String streamName = SourceJdbcUtils .getFullyQualifiedTableName(tableInfo.getNameSpace(), tableInfo.getName()); try { final Map> primaryKeys = aggregatePrimateKeys(database.bufferedResultSetQuery( @@ -231,13 +233,13 @@ public AutoCloseableIterator queryTableIncremental(JdbcDatabase databa connection -> { LOGGER.info("Preparing query for table: {}", tableName); final String sql = String.format("SELECT %s FROM %s WHERE %s > ?", - JdbcSourceUtils.enquoteIdentifierList(connection, columnNames), - JdbcSourceUtils + SourceJdbcUtils.enquoteIdentifierList(connection, columnNames), + SourceJdbcUtils .getFullyQualifiedTableNameWithQuoting(connection, schemaName, tableName), - JdbcSourceUtils.enquoteIdentifier(connection, cursorField)); + SourceJdbcUtils.enquoteIdentifier(connection, cursorField)); final PreparedStatement preparedStatement = connection.prepareStatement(sql); - JdbcSourceUtils.setStatementField(preparedStatement, 1, cursorFieldType, cursor); + SourceJdbcUtils.setStatementField(preparedStatement, 1, cursorFieldType, cursor); LOGGER.info("Executing query for table: {}", tableName); return preparedStatement; }, diff --git a/airbyte-integrations/connectors/source-jdbc/src/main/java/io/airbyte/integrations/source/jdbc/JdbcSourceUtils.java b/airbyte-integrations/connectors/source-jdbc/src/main/java/io/airbyte/integrations/source/jdbc/SourceJdbcUtils.java similarity index 99% rename from airbyte-integrations/connectors/source-jdbc/src/main/java/io/airbyte/integrations/source/jdbc/JdbcSourceUtils.java rename to airbyte-integrations/connectors/source-jdbc/src/main/java/io/airbyte/integrations/source/jdbc/SourceJdbcUtils.java index 741d95e07b97..4db786f8f717 100644 --- a/airbyte-integrations/connectors/source-jdbc/src/main/java/io/airbyte/integrations/source/jdbc/JdbcSourceUtils.java +++ b/airbyte-integrations/connectors/source-jdbc/src/main/java/io/airbyte/integrations/source/jdbc/SourceJdbcUtils.java @@ -38,7 +38,7 @@ import java.util.StringJoiner; import javax.xml.bind.DatatypeConverter; -public class JdbcSourceUtils { +public class SourceJdbcUtils { public static void setStatementField(PreparedStatement preparedStatement, int parameterIndex, diff --git a/airbyte-integrations/connectors/source-jdbc/src/testFixtures/java/io/airbyte/integrations/source/jdbc/test/JdbcSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-jdbc/src/testFixtures/java/io/airbyte/integrations/source/jdbc/test/JdbcSourceAcceptanceTest.java index 62a652ac07ef..d1f5e4d10a1a 100644 --- a/airbyte-integrations/connectors/source-jdbc/src/testFixtures/java/io/airbyte/integrations/source/jdbc/test/JdbcSourceAcceptanceTest.java +++ b/airbyte-integrations/connectors/source-jdbc/src/testFixtures/java/io/airbyte/integrations/source/jdbc/test/JdbcSourceAcceptanceTest.java @@ -42,7 +42,7 @@ import io.airbyte.db.Databases; import io.airbyte.db.jdbc.JdbcDatabase; import io.airbyte.integrations.source.jdbc.AbstractJdbcSource; -import io.airbyte.integrations.source.jdbc.JdbcSourceUtils; +import io.airbyte.integrations.source.jdbc.SourceJdbcUtils; import io.airbyte.integrations.source.relationaldb.models.DbState; import io.airbyte.integrations.source.relationaldb.models.DbStreamState; import io.airbyte.protocol.models.AirbyteCatalog; @@ -302,16 +302,16 @@ void testDiscoverWithMultipleSchemas() throws Exception { database.execute(connection -> { connection.createStatement().execute( String.format("CREATE TABLE %s(id VARCHAR(200), name VARCHAR(200))", - JdbcSourceUtils.getFullyQualifiedTableName(SCHEMA_NAME2, TABLE_NAME))); + SourceJdbcUtils.getFullyQualifiedTableName(SCHEMA_NAME2, TABLE_NAME))); connection.createStatement() .execute(String.format("INSERT INTO %s(id, name) VALUES ('1','picard')", - JdbcSourceUtils.getFullyQualifiedTableName(SCHEMA_NAME2, TABLE_NAME))); + SourceJdbcUtils.getFullyQualifiedTableName(SCHEMA_NAME2, TABLE_NAME))); connection.createStatement() .execute(String.format("INSERT INTO %s(id, name) VALUES ('2', 'crusher')", - JdbcSourceUtils.getFullyQualifiedTableName(SCHEMA_NAME2, TABLE_NAME))); + SourceJdbcUtils.getFullyQualifiedTableName(SCHEMA_NAME2, TABLE_NAME))); connection.createStatement() .execute(String.format("INSERT INTO %s(id, name) VALUES ('3', 'vash')", - JdbcSourceUtils.getFullyQualifiedTableName(SCHEMA_NAME2, TABLE_NAME))); + SourceJdbcUtils.getFullyQualifiedTableName(SCHEMA_NAME2, TABLE_NAME))); }); final AirbyteCatalog actual = source.discover(config); @@ -840,26 +840,26 @@ private ConfiguredAirbyteStream createTableWithSpaces() throws SQLException { connection.createStatement() .execute( createTableQuery(getFullyQualifiedTableName( - JdbcSourceUtils.enquoteIdentifier(connection, tableNameWithSpaces)), - "id INTEGER, " + JdbcSourceUtils + SourceJdbcUtils.enquoteIdentifier(connection, tableNameWithSpaces)), + "id INTEGER, " + SourceJdbcUtils .enquoteIdentifier(connection, COL_LAST_NAME_WITH_SPACE) + " VARCHAR(200)", "")); connection.createStatement() .execute(String.format("INSERT INTO %s(id, %s) VALUES (1,'picard')", getFullyQualifiedTableName( - JdbcSourceUtils.enquoteIdentifier(connection, tableNameWithSpaces)), - JdbcSourceUtils.enquoteIdentifier(connection, COL_LAST_NAME_WITH_SPACE))); + SourceJdbcUtils.enquoteIdentifier(connection, tableNameWithSpaces)), + SourceJdbcUtils.enquoteIdentifier(connection, COL_LAST_NAME_WITH_SPACE))); connection.createStatement() .execute(String.format("INSERT INTO %s(id, %s) VALUES (2, 'crusher')", getFullyQualifiedTableName( - JdbcSourceUtils.enquoteIdentifier(connection, tableNameWithSpaces)), - JdbcSourceUtils.enquoteIdentifier(connection, COL_LAST_NAME_WITH_SPACE))); + SourceJdbcUtils.enquoteIdentifier(connection, tableNameWithSpaces)), + SourceJdbcUtils.enquoteIdentifier(connection, COL_LAST_NAME_WITH_SPACE))); connection.createStatement() .execute(String.format("INSERT INTO %s(id, %s) VALUES (3, 'vash')", getFullyQualifiedTableName( - JdbcSourceUtils.enquoteIdentifier(connection, tableNameWithSpaces)), - JdbcSourceUtils.enquoteIdentifier(connection, COL_LAST_NAME_WITH_SPACE))); + SourceJdbcUtils.enquoteIdentifier(connection, tableNameWithSpaces)), + SourceJdbcUtils.enquoteIdentifier(connection, COL_LAST_NAME_WITH_SPACE))); }); return CatalogHelpers.createConfiguredAirbyteStream( @@ -870,7 +870,7 @@ private ConfiguredAirbyteStream createTableWithSpaces() throws SQLException { } public String getFullyQualifiedTableName(String tableName) { - return JdbcSourceUtils.getFullyQualifiedTableName(getDefaultSchemaName(), tableName); + return SourceJdbcUtils.getFullyQualifiedTableName(getDefaultSchemaName(), tableName); } public void createSchemas() throws SQLException { diff --git a/airbyte-integrations/connectors/source-oracle/src/test/java/io/airbyte/integrations/source/oracle/OracleJdbcSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-oracle/src/test/java/io/airbyte/integrations/source/oracle/OracleJdbcSourceAcceptanceTest.java index 597718314204..29687ee67640 100644 --- a/airbyte-integrations/connectors/source-oracle/src/test/java/io/airbyte/integrations/source/oracle/OracleJdbcSourceAcceptanceTest.java +++ b/airbyte-integrations/connectors/source-oracle/src/test/java/io/airbyte/integrations/source/oracle/OracleJdbcSourceAcceptanceTest.java @@ -29,7 +29,7 @@ import com.google.common.collect.ImmutableSet; import io.airbyte.commons.json.Jsons; import io.airbyte.integrations.source.jdbc.AbstractJdbcSource; -import io.airbyte.integrations.source.jdbc.JdbcSourceUtils; +import io.airbyte.integrations.source.jdbc.SourceJdbcUtils; import io.airbyte.integrations.source.jdbc.test.JdbcSourceAcceptanceTest; import java.math.BigDecimal; import java.sql.Connection; @@ -118,7 +118,7 @@ void cleanUpTables() throws SQLException { ResultSet resultSet = conn.createStatement().executeQuery(String.format("SELECT TABLE_NAME FROM ALL_TABLES WHERE OWNER = '%s'", schemaName)); while (resultSet.next()) { String tableName = resultSet.getString("TABLE_NAME"); - String tableNameProcessed = tableName.contains(" ") ? JdbcSourceUtils + String tableNameProcessed = tableName.contains(" ") ? SourceJdbcUtils .enquoteIdentifier(conn, tableName) : tableName; conn.createStatement().executeQuery(String.format("DROP TABLE %s.%s", schemaName, tableNameProcessed)); } diff --git a/airbyte-integrations/connectors/source-redshift/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/RedshiftSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-redshift/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/RedshiftSourceAcceptanceTest.java index 9645ef0672f1..8d2e74f171c7 100644 --- a/airbyte-integrations/connectors/source-redshift/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/RedshiftSourceAcceptanceTest.java +++ b/airbyte-integrations/connectors/source-redshift/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/RedshiftSourceAcceptanceTest.java @@ -31,7 +31,7 @@ import io.airbyte.commons.string.Strings; import io.airbyte.db.Databases; import io.airbyte.db.jdbc.JdbcDatabase; -import io.airbyte.integrations.source.jdbc.JdbcSourceUtils; +import io.airbyte.integrations.source.jdbc.SourceJdbcUtils; import io.airbyte.integrations.source.redshift.RedshiftSource; import io.airbyte.integrations.standardtest.source.SourceAcceptanceTest; import io.airbyte.integrations.standardtest.source.TestDestinationEnv; @@ -78,7 +78,7 @@ protected void setupEnvironment(TestDestinationEnv environment) throws Exception }); streamName = "customer"; - final String fqTableName = JdbcSourceUtils.getFullyQualifiedTableName(schemaName, streamName); + final String fqTableName = SourceJdbcUtils.getFullyQualifiedTableName(schemaName, streamName); String createTestTable = String.format("CREATE TABLE IF NOT EXISTS %s (c_custkey INTEGER, c_name VARCHAR(16), c_nation VARCHAR(16));\n", fqTableName); database.execute(connection -> { From 42eb2c5e35cf334bb8d1cabd753a2fc642c5af79 Mon Sep 17 00:00:00 2001 From: Andrii Leonets Date: Wed, 23 Jun 2021 12:07:56 +0300 Subject: [PATCH 14/63] BigQueryDatabase impl without row transformation --- .../airbyte_cdk/models/airbyte_protocol.py | 120 +++++++------- airbyte-db/build.gradle | 8 + .../main/java/io/airbyte/db/SqlDatabase.java | 5 +- .../airbyte/db/bigquery/BigQueryDatabase.java | 150 ++++++++++++++++++ .../io/airbyte/db/bigquery/BigQueryUtils.java | 36 +++++ 5 files changed, 250 insertions(+), 69 deletions(-) create mode 100644 airbyte-db/src/main/java/io/airbyte/db/bigquery/BigQueryDatabase.java create mode 100644 airbyte-db/src/main/java/io/airbyte/db/bigquery/BigQueryUtils.java diff --git a/airbyte-cdk/python/airbyte_cdk/models/airbyte_protocol.py b/airbyte-cdk/python/airbyte_cdk/models/airbyte_protocol.py index da45f5cebd14..cb7e2f70bd65 100644 --- a/airbyte-cdk/python/airbyte_cdk/models/airbyte_protocol.py +++ b/airbyte-cdk/python/airbyte_cdk/models/airbyte_protocol.py @@ -1,27 +1,3 @@ -# -# MIT License -# -# Copyright (c) 2020 Airbyte -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. -# - # generated by datamodel-codegen: # filename: airbyte_protocol.yaml @@ -34,12 +10,12 @@ class Type(Enum): - RECORD = "RECORD" - STATE = "STATE" - LOG = "LOG" - SPEC = "SPEC" - CONNECTION_STATUS = "CONNECTION_STATUS" - CATALOG = "CATALOG" + RECORD = 'RECORD' + STATE = 'STATE' + LOG = 'LOG' + SPEC = 'SPEC' + CONNECTION_STATUS = 'CONNECTION_STATUS' + CATALOG = 'CATALOG' class AirbyteRecordMessage(BaseModel): @@ -47,41 +23,43 @@ class Config: extra = Extra.allow stream: str = Field(..., description="the name of this record's stream") - data: Dict[str, Any] = Field(..., description="the record data") + data: Dict[str, Any] = Field(..., description='the record data') emitted_at: int = Field( ..., - description="when the data was emitted from the source. epoch in millisecond.", + description='when the data was emitted from the source. epoch in millisecond.', + ) + namespace: Optional[str] = Field( + None, description="the namespace of this record's stream" ) - namespace: Optional[str] = Field(None, description="the namespace of this record's stream") class AirbyteStateMessage(BaseModel): class Config: extra = Extra.allow - data: Dict[str, Any] = Field(..., description="the state data") + data: Dict[str, Any] = Field(..., description='the state data') class Level(Enum): - FATAL = "FATAL" - ERROR = "ERROR" - WARN = "WARN" - INFO = "INFO" - DEBUG = "DEBUG" - TRACE = "TRACE" + FATAL = 'FATAL' + ERROR = 'ERROR' + WARN = 'WARN' + INFO = 'INFO' + DEBUG = 'DEBUG' + TRACE = 'TRACE' class AirbyteLogMessage(BaseModel): class Config: extra = Extra.allow - level: Level = Field(..., description="the type of logging") - message: str = Field(..., description="the log message") + level: Level = Field(..., description='the type of logging') + message: str = Field(..., description='the log message') class Status(Enum): - SUCCEEDED = "SUCCEEDED" - FAILED = "FAILED" + SUCCEEDED = 'SUCCEEDED' + FAILED = 'FAILED' class AirbyteConnectionStatus(BaseModel): @@ -93,14 +71,14 @@ class Config: class SyncMode(Enum): - full_refresh = "full_refresh" - incremental = "incremental" + full_refresh = 'full_refresh' + incremental = 'incremental' class DestinationSyncMode(Enum): - append = "append" - overwrite = "overwrite" - append_dedup = "append_dedup" + append = 'append' + overwrite = 'overwrite' + append_dedup = 'append_dedup' class ConnectorSpecification(BaseModel): @@ -111,13 +89,19 @@ class Config: changelogUrl: Optional[AnyUrl] = None connectionSpecification: Dict[str, Any] = Field( ..., - description="ConnectorDefinition specific blob. Must be a valid JSON string.", + description='ConnectorDefinition specific blob. Must be a valid JSON string.', + ) + supportsIncremental: Optional[bool] = Field( + None, description='If the connector supports incremental mode or not.' + ) + supportsNormalization: Optional[bool] = Field( + False, description='If the connector supports normalization or not.' + ) + supportsDBT: Optional[bool] = Field( + False, description='If the connector supports DBT or not.' ) - supportsIncremental: Optional[bool] = Field(None, description="If the connector supports incremental mode or not.") - supportsNormalization: Optional[bool] = Field(False, description="If the connector supports normalization or not.") - supportsDBT: Optional[bool] = Field(False, description="If the connector supports DBT or not.") supported_destination_sync_modes: Optional[List[DestinationSyncMode]] = Field( - None, description="List of destination sync modes supported by the connector" + None, description='List of destination sync modes supported by the connector' ) @@ -126,23 +110,25 @@ class Config: extra = Extra.allow name: str = Field(..., description="Stream's name.") - json_schema: Dict[str, Any] = Field(..., description="Stream schema using Json Schema specs.") + json_schema: Dict[str, Any] = Field( + ..., description='Stream schema using Json Schema specs.' + ) supported_sync_modes: Optional[List[SyncMode]] = None source_defined_cursor: Optional[bool] = Field( None, - description="If the source defines the cursor field, then any other cursor field inputs will be ignored. If it does not, either the user_provided one is used, or the default one is used as a backup.", + description='If the source defines the cursor field, then any other cursor field inputs will be ignored. If it does not, either the user_provided one is used, or the default one is used as a backup.', ) default_cursor_field: Optional[List[str]] = Field( None, - description="Path to the field that will be used to determine if a record is new or modified since the last sync. If not provided by the source, the end user will have to specify the comparable themselves.", + description='Path to the field that will be used to determine if a record is new or modified since the last sync. If not provided by the source, the end user will have to specify the comparable themselves.', ) source_defined_primary_key: Optional[List[List[str]]] = Field( None, - description="If the source defines the primary key, paths to the fields that will be used as a primary key. If not provided by the source, the end user will have to specify the primary key themselves.", + description='If the source defines the primary key, paths to the fields that will be used as a primary key. If not provided by the source, the end user will have to specify the primary key themselves.', ) namespace: Optional[str] = Field( None, - description="Optional Source-defined namespace. Currently only used by JDBC destinations to determine what schema to write to. Airbyte streams from the same sources should have the same namespace.", + description='Optional Source-defined namespace. Currently only used by JDBC destinations to determine what schema to write to. Airbyte streams from the same sources should have the same namespace.', ) @@ -154,12 +140,12 @@ class Config: sync_mode: SyncMode cursor_field: Optional[List[str]] = Field( None, - description="Path to the field that will be used to determine if a record is new or modified since the last sync. This field is REQUIRED if `sync_mode` is `incremental`. Otherwise it is ignored.", + description='Path to the field that will be used to determine if a record is new or modified since the last sync. This field is REQUIRED if `sync_mode` is `incremental`. Otherwise it is ignored.', ) destination_sync_mode: DestinationSyncMode primary_key: Optional[List[List[str]]] = Field( None, - description="Paths to the fields that will be used as primary key. This field is REQUIRED if `destination_sync_mode` is `*_dedup`. Otherwise it is ignored.", + description='Paths to the fields that will be used as primary key. This field is REQUIRED if `destination_sync_mode` is `*_dedup`. Otherwise it is ignored.', ) @@ -181,21 +167,23 @@ class AirbyteMessage(BaseModel): class Config: extra = Extra.allow - type: Type = Field(..., description="Message type") + type: Type = Field(..., description='Message type') log: Optional[AirbyteLogMessage] = Field( None, - description="log message: any kind of logging you want the platform to know about.", + description='log message: any kind of logging you want the platform to know about.', ) spec: Optional[ConnectorSpecification] = None connectionStatus: Optional[AirbyteConnectionStatus] = None catalog: Optional[AirbyteCatalog] = Field( None, - description="log message: any kind of logging you want the platform to know about.", + description='log message: any kind of logging you want the platform to know about.', + ) + record: Optional[AirbyteRecordMessage] = Field( + None, description='record message: the record' ) - record: Optional[AirbyteRecordMessage] = Field(None, description="record message: the record") state: Optional[AirbyteStateMessage] = Field( None, - description="schema message: the state. Must be the last message produced. The platform uses this information", + description='schema message: the state. Must be the last message produced. The platform uses this information', ) diff --git a/airbyte-db/build.gradle b/airbyte-db/build.gradle index 0205b835d1a2..81266bb73b09 100644 --- a/airbyte-db/build.gradle +++ b/airbyte-db/build.gradle @@ -14,4 +14,12 @@ dependencies { testImplementation 'org.apache.commons:commons-lang3:3.11' testImplementation "org.testcontainers:postgresql:1.15.1" + + // Big Query + implementation platform('com.google.cloud:libraries-bom:20.6.0') + compile 'com.google.cloud:google-cloud-bigquery:1.133.1' + + // Lombok + implementation 'org.projectlombok:lombok:1.18.20' + annotationProcessor('org.projectlombok:lombok:1.18.20') } diff --git a/airbyte-db/src/main/java/io/airbyte/db/SqlDatabase.java b/airbyte-db/src/main/java/io/airbyte/db/SqlDatabase.java index a09738d3d734..f51ace86236f 100644 --- a/airbyte-db/src/main/java/io/airbyte/db/SqlDatabase.java +++ b/airbyte-db/src/main/java/io/airbyte/db/SqlDatabase.java @@ -25,7 +25,6 @@ package io.airbyte.db; import com.fasterxml.jackson.databind.JsonNode; -import java.sql.SQLException; import java.util.stream.Stream; public abstract class SqlDatabase implements AutoCloseable { @@ -33,9 +32,9 @@ public abstract class SqlDatabase implements AutoCloseable { private JsonNode sourceConfig; private JsonNode databaseConfig; - public abstract void execute(String sql) throws SQLException; + public abstract void execute(String sql) throws Exception; - public abstract Stream query(String sql, String... params) throws SQLException; + public abstract Stream query(String sql, String... params) throws Exception; public JsonNode getSourceConfig() { return sourceConfig; diff --git a/airbyte-db/src/main/java/io/airbyte/db/bigquery/BigQueryDatabase.java b/airbyte-db/src/main/java/io/airbyte/db/bigquery/BigQueryDatabase.java new file mode 100644 index 000000000000..536897385987 --- /dev/null +++ b/airbyte-db/src/main/java/io/airbyte/db/bigquery/BigQueryDatabase.java @@ -0,0 +1,150 @@ +/* + * MIT License + * + * Copyright (c) 2020 Airbyte + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package io.airbyte.db.bigquery; + +import static java.util.Objects.isNull; + +import com.fasterxml.jackson.databind.JsonNode; +import com.google.auth.oauth2.ServiceAccountCredentials; +import com.google.cloud.bigquery.BigQuery; +import com.google.cloud.bigquery.BigQueryOptions; +import com.google.cloud.bigquery.Job; +import com.google.cloud.bigquery.JobId; +import com.google.cloud.bigquery.JobInfo; +import com.google.cloud.bigquery.QueryJobConfiguration; +import com.google.cloud.bigquery.QueryParameterValue; +import com.google.cloud.bigquery.StandardSQLTypeName; +import com.google.common.base.Charsets; +import com.google.common.collect.Streams; +import io.airbyte.db.SqlDatabase; +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.sql.SQLException; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.UUID; +import java.util.stream.Collectors; +import java.util.stream.Stream; +import org.apache.commons.lang3.tuple.ImmutablePair; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class BigQueryDatabase extends SqlDatabase { + + private static final Logger LOGGER = LoggerFactory.getLogger(BigQueryDatabase.class); + + private BigQuery bigQuery; + private String databaseId; + + public BigQueryDatabase(String projectId, String jsonCreds, String databaseId) { + this.databaseId = databaseId; + try { + BigQueryOptions.Builder bigQueryBuilder = BigQueryOptions.newBuilder(); + ServiceAccountCredentials credentials = null; + if (jsonCreds != null && !jsonCreds.isEmpty()) { + credentials = ServiceAccountCredentials + .fromStream(new ByteArrayInputStream(jsonCreds.getBytes(Charsets.UTF_8))); + } + bigQuery = bigQueryBuilder + .setProjectId(projectId) + .setCredentials(!isNull(credentials) ? credentials : ServiceAccountCredentials.getApplicationDefault()) + .build() + .getService(); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + @Override + public void execute(String sql) throws SQLException { + final ImmutablePair result = executeQuery(bigQuery, getQueryConfig(sql, Collections.emptyList())); + if (result.getLeft() == null) { + throw new SQLException("BigQuery request is failed with error: " + result.getRight() + ". SQL: " + sql); + } + LOGGER.info("BigQuery successfully finished execution SQL: " + sql); + } + + @Override + public Stream query(String sql, String... params) throws Exception { + List parameterValueList; + if (params == null) + parameterValueList = Collections.emptyList(); + else + parameterValueList = Arrays.stream(params).map(param -> QueryParameterValue.newBuilder().setValue(param).setType( + StandardSQLTypeName.STRING).build()).collect(Collectors.toList()); + + final ImmutablePair result = executeQuery(bigQuery, getQueryConfig(sql, parameterValueList)); + + if (result.getLeft() != null) + return Streams.stream(result.getLeft().getQueryResults().iterateAll()).map(BigQueryUtils::rowToJson); + else + throw new Exception("Failed to execute query " + sql + (params != null ? " with params " + Arrays + .toString(params) : "") + ". Error: " + result.getRight()); + } + + @Override + public void close() throws Exception { + /** + * The BigQuery doesn't require connection close. It will be done automatically. + */ + } + + public QueryJobConfiguration getQueryConfig(String sql, List params) { + return QueryJobConfiguration + .newBuilder(String.format(sql, this.databaseId)) + .setUseLegacySql(false) + .setPositionalParameters(params) + .build(); + } + + public ImmutablePair executeQuery(BigQuery bigquery, QueryJobConfiguration queryConfig) { + final JobId jobId = JobId.of(UUID.randomUUID().toString()); + final Job queryJob = bigquery.create(JobInfo.newBuilder(queryConfig).setJobId(jobId).build()); + return executeQuery(queryJob); + } + + private ImmutablePair executeQuery(Job queryJob) { + final Job completedJob = waitForQuery(queryJob); + if (completedJob == null) { + throw new RuntimeException("Job no longer exists"); + } else if (completedJob.getStatus().getError() != null) { + // You can also look at queryJob.getStatus().getExecutionErrors() for all + // errors, not just the latest one. + return ImmutablePair.of(null, (completedJob.getStatus().getError().toString())); + } + + return ImmutablePair.of(completedJob, null); + } + + private Job waitForQuery(Job queryJob) { + try { + return queryJob.waitFor(); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + +} diff --git a/airbyte-db/src/main/java/io/airbyte/db/bigquery/BigQueryUtils.java b/airbyte-db/src/main/java/io/airbyte/db/bigquery/BigQueryUtils.java new file mode 100644 index 000000000000..be371a5dccb0 --- /dev/null +++ b/airbyte-db/src/main/java/io/airbyte/db/bigquery/BigQueryUtils.java @@ -0,0 +1,36 @@ +/* + * MIT License + * + * Copyright (c) 2020 Airbyte + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package io.airbyte.db.bigquery; + +import com.fasterxml.jackson.databind.JsonNode; +import com.google.cloud.bigquery.FieldValueList; + +public class BigQueryUtils { + + public static JsonNode rowToJson(FieldValueList rowValues) { + return null; + } + +} From 9326d354e12a6c1a6cc47b1e1bc0bb9e3c85d506 Mon Sep 17 00:00:00 2001 From: Andrii Leonets Date: Wed, 23 Jun 2021 12:13:36 +0300 Subject: [PATCH 15/63] add Static method for BigQueryDatabase instancing --- airbyte-db/src/main/java/io/airbyte/db/Databases.java | 5 +++++ .../main/java/io/airbyte/db/bigquery/BigQueryDatabase.java | 4 ++-- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/airbyte-db/src/main/java/io/airbyte/db/Databases.java b/airbyte-db/src/main/java/io/airbyte/db/Databases.java index 01c15a9474b4..10c8541303e4 100644 --- a/airbyte-db/src/main/java/io/airbyte/db/Databases.java +++ b/airbyte-db/src/main/java/io/airbyte/db/Databases.java @@ -25,6 +25,7 @@ package io.airbyte.db; import io.airbyte.commons.lang.Exceptions; +import io.airbyte.db.bigquery.BigQueryDatabase; import io.airbyte.db.jdbc.DefaultJdbcDatabase; import io.airbyte.db.jdbc.JdbcDatabase; import io.airbyte.db.jdbc.JdbcStreamingQueryConfiguration; @@ -145,4 +146,8 @@ private static BasicDataSource createBasicDataSource(final String username, return connectionPool; } + public static BigQueryDatabase createBigQueryDatabase(final String projectId, final String jsonCreds, final String databaseId) { + return new BigQueryDatabase(projectId, jsonCreds, databaseId); + } + } diff --git a/airbyte-db/src/main/java/io/airbyte/db/bigquery/BigQueryDatabase.java b/airbyte-db/src/main/java/io/airbyte/db/bigquery/BigQueryDatabase.java index 536897385987..6fc382f9876b 100644 --- a/airbyte-db/src/main/java/io/airbyte/db/bigquery/BigQueryDatabase.java +++ b/airbyte-db/src/main/java/io/airbyte/db/bigquery/BigQueryDatabase.java @@ -56,8 +56,8 @@ public class BigQueryDatabase extends SqlDatabase { private static final Logger LOGGER = LoggerFactory.getLogger(BigQueryDatabase.class); - private BigQuery bigQuery; - private String databaseId; + private final BigQuery bigQuery; + private final String databaseId; public BigQueryDatabase(String projectId, String jsonCreds, String databaseId) { this.databaseId = databaseId; From c7d2effc6eee5fec7676a16592dc07a6bacd992e Mon Sep 17 00:00:00 2001 From: Andrii Leonets Date: Wed, 23 Jun 2021 13:07:35 +0300 Subject: [PATCH 16/63] remove data type parameter limitation + rename class parameters --- .../AbstractRelationalDbSource.java | 82 +++++++++---------- 1 file changed, 40 insertions(+), 42 deletions(-) diff --git a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/AbstractRelationalDbSource.java b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/AbstractRelationalDbSource.java index 65043fe875c0..2078bc12db1c 100644 --- a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/AbstractRelationalDbSource.java +++ b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/AbstractRelationalDbSource.java @@ -53,8 +53,6 @@ import io.airbyte.protocol.models.Field; import io.airbyte.protocol.models.JsonSchemaPrimitive; import io.airbyte.protocol.models.SyncMode; -import java.sql.SQLException; -import java.sql.SQLType; import java.time.Instant; import java.util.ArrayList; import java.util.Collection; @@ -72,7 +70,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -public abstract class AbstractRelationalDbSource extends BaseConnector implements Source { +public abstract class AbstractRelationalDbSource extends BaseConnector implements Source { private static final Logger LOGGER = LoggerFactory.getLogger(AbstractRelationalDbSource.class); @@ -92,14 +90,14 @@ public abstract class AbstractRelationalDbSource> getCheckOperations(JsonNode config) throws Exception; + public abstract List> getCheckOperations(JsonNode config) throws Exception; /** * Map source types and Airbyte types @@ -107,7 +105,7 @@ public abstract class AbstractRelationalDbSource>> discoverInternal(final K database) + protected abstract List>> discoverInternal(final Database database) throws Exception; /** @@ -134,8 +132,8 @@ protected abstract List>> discoverInternal(final K data * @param tableInfos list of tables * @return map of namespace.table and primary key fields. */ - protected abstract Map> discoverPrimaryKeys(K database, - List>> tableInfos); + protected abstract Map> discoverPrimaryKeys(Database database, + List>> tableInfos); /** * Returns quote symbol of the database @@ -146,8 +144,8 @@ protected abstract Map> discoverPrimaryKeys(K database, @Override public AirbyteConnectionStatus check(JsonNode config) { - try (final K database = createDatabaseInternal(config)) { - for (CheckedConsumer checkOperation : getCheckOperations(config)) { + try (final Database database = createDatabaseInternal(config)) { + for (CheckedConsumer checkOperation : getCheckOperations(config)) { checkOperation.accept(database); } @@ -162,7 +160,7 @@ public AirbyteConnectionStatus check(JsonNode config) { @Override public AirbyteCatalog discover(JsonNode config) throws Exception { - try (final K database = createDatabaseInternal(config)) { + try (final Database database = createDatabaseInternal(config)) { List streams = getTables(database).stream() .map(tableInfo -> CatalogHelpers .createAirbyteStream(tableInfo.getName(), tableInfo.getNameSpace(), tableInfo.getFields()) @@ -180,9 +178,9 @@ public AutoCloseableIterator read(JsonNode config, ConfiguredAir catalog); final Instant emittedAt = Instant.now(); - final K database = createDatabaseInternal(config); + final Database database = createDatabaseInternal(config); - final Map>> fullyQualifiedTableNameToInfo = + final Map>> fullyQualifiedTableNameToInfo = discoverWithoutSystemTables(database) .stream() .collect(Collectors.toMap(t -> String.format("%s.%s", t.getNameSpace(), t.getName()), Function @@ -205,9 +203,9 @@ public AutoCloseableIterator read(JsonNode config, ConfiguredAir }); } - public List> getIncrementalIterators(K database, + public List> getIncrementalIterators(Database database, ConfiguredAirbyteCatalog catalog, - Map>> tableNameToTable, + Map>> tableNameToTable, StateManager stateManager, Instant emittedAt) { return getSelectedIterators( @@ -219,9 +217,9 @@ public List> getIncrementalIterators(K dat configuredStream -> configuredStream.getSyncMode().equals(SyncMode.INCREMENTAL)); } - public List> getFullRefreshIterators(K database, + public List> getFullRefreshIterators(Database database, ConfiguredAirbyteCatalog catalog, - Map>> tableNameToTable, + Map>> tableNameToTable, StateManager stateManager, Instant emittedAt) { return getSelectedIterators( @@ -233,9 +231,9 @@ public List> getFullRefreshIterators(K dat configuredStream -> configuredStream.getSyncMode().equals(SyncMode.FULL_REFRESH)); } - protected List> getSelectedIterators(K database, + protected List> getSelectedIterators(Database database, ConfiguredAirbyteCatalog catalog, - Map>> tableNameToTable, + Map>> tableNameToTable, StateManager stateManager, Instant emittedAt, Predicate selector) { @@ -249,7 +247,7 @@ protected List> getSelectedIterators(K dat continue; } - final TableInfo> table = tableNameToTable.get(fullyQualifiedTableName); + final TableInfo> table = tableNameToTable.get(fullyQualifiedTableName); final AutoCloseableIterator tableReadIterator = createReadIterator( database, airbyteStream, @@ -263,9 +261,9 @@ protected List> getSelectedIterators(K dat return iteratorList; } - protected AutoCloseableIterator createReadIterator(K database, + protected AutoCloseableIterator createReadIterator(Database database, ConfiguredAirbyteStream airbyteStream, - TableInfo> table, + TableInfo> table, StateManager stateManager, Instant emittedAt) { final String streamName = airbyteStream.getStream().getName(); @@ -320,16 +318,16 @@ protected AutoCloseableIterator createReadIterator(K database, }); } - protected AutoCloseableIterator getIncrementalStream(K database, + protected AutoCloseableIterator getIncrementalStream(Database database, ConfiguredAirbyteStream airbyteStream, List selectedDatabaseFields, - TableInfo> table, + TableInfo> table, String cursor, Instant emittedAt) { final String streamName = airbyteStream.getStream().getName(); final String namespace = airbyteStream.getStream().getNamespace(); final String cursorField = IncrementalUtils.getCursorField(airbyteStream); - final T cursorType = table.getFields().stream() + final DataType cursorType = table.getFields().stream() .filter(info -> info.getName().equals(cursorField)) .map(CommonField::getType) .findFirst() @@ -350,11 +348,11 @@ protected AutoCloseableIterator getIncrementalStream(K database, return getMessageIterator(queryIterator, streamName, namespace, emittedAt.toEpochMilli()); } - protected AutoCloseableIterator getFullRefreshStream(K database, + protected AutoCloseableIterator getFullRefreshStream(Database database, String streamName, String namespace, List selectedDatabaseFields, - TableInfo> table, + TableInfo> table, Instant emittedAt) { final AutoCloseableIterator queryStream = queryTableFullRefresh(database, selectedDatabaseFields, table.getNameSpace(), table.getName()); @@ -365,8 +363,8 @@ protected String getFullyQualifiedTableName(String nameSpace, String tableName) return nameSpace != null ? nameSpace + "." + tableName : tableName; } - protected List> getTables(final K database) throws Exception { - final List>> tableInfos = discoverWithoutSystemTables(database); + protected List> getTables(final Database database) throws Exception { + final List>> tableInfos = discoverWithoutSystemTables(database); final Map> fullyQualifiedTableNameToPrimaryKeys = discoverPrimaryKeys(database, tableInfos); return tableInfos.stream() @@ -390,12 +388,12 @@ protected List> getTables(final K database) throws Exception { .collect(Collectors.toList()); } - protected void assertColumnsWithSameNameAreSame(String nameSpace, String tableName, List> columns) { + protected void assertColumnsWithSameNameAreSame(String nameSpace, String tableName, List> columns) { columns.stream() - .collect(Collectors.groupingBy(CommonField::getName)) + .collect(Collectors.groupingBy(CommonField::getName)) .values() .forEach(columnsWithSameName -> { - final CommonField comparisonColumn = columnsWithSameName.get(0); + final CommonField comparisonColumn = columnsWithSameName.get(0); columnsWithSameName.forEach(column -> { if (!column.equals(comparisonColumn)) { throw new RuntimeException( @@ -406,9 +404,9 @@ protected void assertColumnsWithSameNameAreSame(String nameSpace, String tableNa }); } - protected List>> discoverWithoutSystemTables(final K database) throws Exception { + protected List>> discoverWithoutSystemTables(final Database database) throws Exception { Set systemNameSpaces = getExcludedInternalNameSpaces(); - List>> discoveredTables = discoverInternal(database); + List>> discoveredTables = discoverInternal(database); return (systemNameSpaces == null || systemNameSpaces.isEmpty() ? discoveredTables : discoveredTables.stream().filter(table -> !systemNameSpaces.contains(table.getNameSpace())).collect( Collectors.toList())); @@ -444,18 +442,18 @@ public AutoCloseableIterator getMessageIterator(AutoCloseableIte .withData(r))); } - protected AutoCloseableIterator queryTable(K database, String sqlQuery) { + protected AutoCloseableIterator queryTable(Database database, String sqlQuery) { return AutoCloseableIterators.lazyIterator(() -> { try { final Stream stream = database.query(sqlQuery); return AutoCloseableIterators.fromStream(stream); - } catch (SQLException e) { + } catch (Exception e) { throw new RuntimeException(e); } }); } - public AutoCloseableIterator queryTableFullRefresh(K database, + public AutoCloseableIterator queryTableFullRefresh(Database database, List columnNames, String schemaName, String tableName) { @@ -478,16 +476,16 @@ public AutoCloseableIterator queryTableFullRefresh(K database, * @param cursor cursor value * @return iterator with read data */ - public abstract AutoCloseableIterator queryTableIncremental(K database, + public abstract AutoCloseableIterator queryTableIncremental(Database database, List columnNames, String schemaName, String tableName, String cursorField, - T cursorFieldType, + DataType cursorFieldType, String cursor); - private K createDatabaseInternal(JsonNode sourceConfig) throws Exception { - K database = createDatabase(sourceConfig); + private Database createDatabaseInternal(JsonNode sourceConfig) throws Exception { + Database database = createDatabase(sourceConfig); database.setSourceConfig(sourceConfig); database.setDatabaseConfig(toDatabaseConfig(sourceConfig)); return database; From 06f5d137e0cec2425f3bc108615ce23efa365715 Mon Sep 17 00:00:00 2001 From: Andrii Leonets Date: Wed, 23 Jun 2021 22:29:44 +0300 Subject: [PATCH 17/63] Move DataTypeUtils from jdbs to common + impl basic types BigQueryUtils --- .../java/io/airbyte/db/DataTypeSupplier.java | 34 +++++++++ .../java/io/airbyte/db/DataTypeUtils.java | 63 +++++++++++++++ .../airbyte/db/bigquery/BigQueryDatabase.java | 9 ++- .../io/airbyte/db/bigquery/BigQueryUtils.java | 76 ++++++++++++++++++- .../java/io/airbyte/db/jdbc/JdbcUtils.java | 61 ++++----------- .../jdbc/SqlOperationsUtilsTest.java | 7 +- .../source/jdbc/SourceJdbcUtils.java | 6 +- 7 files changed, 198 insertions(+), 58 deletions(-) create mode 100644 airbyte-db/src/main/java/io/airbyte/db/DataTypeSupplier.java create mode 100644 airbyte-db/src/main/java/io/airbyte/db/DataTypeUtils.java diff --git a/airbyte-db/src/main/java/io/airbyte/db/DataTypeSupplier.java b/airbyte-db/src/main/java/io/airbyte/db/DataTypeSupplier.java new file mode 100644 index 000000000000..18062b8e4ccd --- /dev/null +++ b/airbyte-db/src/main/java/io/airbyte/db/DataTypeSupplier.java @@ -0,0 +1,34 @@ +/* + * MIT License + * + * Copyright (c) 2020 Airbyte + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package io.airbyte.db; + +import java.sql.SQLException; + +@FunctionalInterface +public interface DataTypeSupplier { + + DataType apply() throws SQLException; + +} diff --git a/airbyte-db/src/main/java/io/airbyte/db/DataTypeUtils.java b/airbyte-db/src/main/java/io/airbyte/db/DataTypeUtils.java new file mode 100644 index 000000000000..0ec38e828a18 --- /dev/null +++ b/airbyte-db/src/main/java/io/airbyte/db/DataTypeUtils.java @@ -0,0 +1,63 @@ +/* + * MIT License + * + * Copyright (c) 2020 Airbyte + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package io.airbyte.db; + +import java.sql.Date; +import java.sql.SQLException; +import java.text.DateFormat; +import java.text.SimpleDateFormat; +import java.time.Instant; +import java.util.function.Function; + +public class DataTypeUtils { + + public static final DateFormat DATE_FORMAT = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'"); // Quoted "Z" to indicate UTC, no timezone offset + + public static T nullIfInvalid(DataTypeSupplier valueProducer) { + return nullIfInvalid(valueProducer, ignored -> true); + } + + public static T nullIfInvalid(DataTypeSupplier valueProducer, Function isValidFn) { + // Some edge case values (e.g: Infinity, NaN) have no java or JSON equivalent, and will throw an + // exception when parsed. We want to parse those + // values as null. + // This method reduces error handling boilerplate. + try { + T value = valueProducer.apply(); + return isValidFn.apply(value) ? value : null; + } catch (SQLException e) { + return null; + } + } + + public static String toISO8601String(long epochMillis) { + return DATE_FORMAT.format(Date.from(Instant.ofEpochMilli(epochMillis))); + } + + public static String toISO8601String(java.util.Date date) { + return DATE_FORMAT.format(date); + } + +} diff --git a/airbyte-db/src/main/java/io/airbyte/db/bigquery/BigQueryDatabase.java b/airbyte-db/src/main/java/io/airbyte/db/bigquery/BigQueryDatabase.java index 6fc382f9876b..5c83b7e470d4 100644 --- a/airbyte-db/src/main/java/io/airbyte/db/bigquery/BigQueryDatabase.java +++ b/airbyte-db/src/main/java/io/airbyte/db/bigquery/BigQueryDatabase.java @@ -30,6 +30,7 @@ import com.google.auth.oauth2.ServiceAccountCredentials; import com.google.cloud.bigquery.BigQuery; import com.google.cloud.bigquery.BigQueryOptions; +import com.google.cloud.bigquery.FieldList; import com.google.cloud.bigquery.Job; import com.google.cloud.bigquery.JobId; import com.google.cloud.bigquery.JobInfo; @@ -98,9 +99,11 @@ public Stream query(String sql, String... params) throws Exception { final ImmutablePair result = executeQuery(bigQuery, getQueryConfig(sql, parameterValueList)); - if (result.getLeft() != null) - return Streams.stream(result.getLeft().getQueryResults().iterateAll()).map(BigQueryUtils::rowToJson); - else + if (result.getLeft() != null) { + FieldList fieldList = result.getLeft().getQueryResults().getSchema().getFields(); + return Streams.stream(result.getLeft().getQueryResults().iterateAll()) + .map(fieldValues -> BigQueryUtils.rowToJson(fieldValues, fieldList)); + } else throw new Exception("Failed to execute query " + sql + (params != null ? " with params " + Arrays .toString(params) : "") + ". Error: " + result.getRight()); } diff --git a/airbyte-db/src/main/java/io/airbyte/db/bigquery/BigQueryUtils.java b/airbyte-db/src/main/java/io/airbyte/db/bigquery/BigQueryUtils.java index be371a5dccb0..9f4dace424dc 100644 --- a/airbyte-db/src/main/java/io/airbyte/db/bigquery/BigQueryUtils.java +++ b/airbyte-db/src/main/java/io/airbyte/db/bigquery/BigQueryUtils.java @@ -24,13 +24,85 @@ package io.airbyte.db.bigquery; +import static io.airbyte.db.DataTypeUtils.nullIfInvalid; + import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.node.ObjectNode; +import com.google.cloud.bigquery.Field; +import com.google.cloud.bigquery.FieldList; +import com.google.cloud.bigquery.FieldValue; import com.google.cloud.bigquery.FieldValueList; +import com.google.cloud.bigquery.LegacySQLTypeName; +import io.airbyte.commons.json.Jsons; +import io.airbyte.db.DataTypeUtils; +import java.text.ParseException; +import java.text.SimpleDateFormat; +import java.util.Collections; +import java.util.Date; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; public class BigQueryUtils { - public static JsonNode rowToJson(FieldValueList rowValues) { - return null; + private static final Logger LOGGER = LoggerFactory.getLogger(BigQueryUtils.class); + + public static final String BIG_QUERY_DATE_FORMAT = "YYYY-MM-DD"; + + public static JsonNode rowToJson(FieldValueList rowValues, FieldList fieldList) { + ObjectNode jsonNode = (ObjectNode) Jsons.jsonNode(Collections.emptyMap()); + fieldList.forEach(field -> setJsonField(field, rowValues.get(field.getName()), jsonNode)); + return jsonNode; + } + + private static void setJsonField(Field field, FieldValue fieldValue, ObjectNode node) { + LegacySQLTypeName fieldType = field.getType(); + String fieldName = field.getName(); + switch (fieldType.getStandardType()) { + case BOOL -> node.put(fieldName, fieldValue.getBooleanValue()); + case INT64 -> node.put(fieldName, fieldValue.getLongValue()); + case FLOAT64 -> node.put(fieldName, fieldValue.getDoubleValue()); + case NUMERIC -> node.put(fieldName, fieldValue.getNumericValue()); + case BIGNUMERIC -> node.put(fieldName, nullIfInvalid(fieldValue::getNumericValue)); + case STRING -> node.put(fieldName, fieldValue.getStringValue()); + case BYTES -> node.put(fieldName, fieldValue.getBytesValue()); + case STRUCT -> node.put(fieldName, ""); // @TODO impl + case ARRAY -> node.put(fieldName, ""); // @TODO impl + case TIMESTAMP -> node.put(fieldName, ""); // @TODO impl + case DATE -> node.put(fieldName, DataTypeUtils.toISO8601String(getDateValue(fieldValue))); + case TIME -> node.put(fieldName, ""); // @TODO impl + case DATETIME -> node.put(fieldName, ""); // @TODO impl + case GEOGRAPHY -> node.put(fieldName, ""); // @TODO impl + } + } + + public static Date getDateValue(FieldValue fieldValue) { + Date parsedValue = null; + String value = fieldValue.getStringValue(); + try { + parsedValue = new SimpleDateFormat(BIG_QUERY_DATE_FORMAT).parse(value); + } catch (ParseException e) { + LOGGER.error("Fail to parse date value : " + value + ". Null is returned."); + } + return parsedValue; } + /* + * final int columnTypeInt = r.getMetaData().getColumnType(i); final String columnName = + * r.getMetaData().getColumnName(i); final JDBCType columnType = safeGetJdbcType(columnTypeInt); + * + * // https://www.cis.upenn.edu/~bcpierce/courses/629/jdkdocs/guide/jdbc/getstart/mapping.doc.html + * switch (columnType) { case BIT, BOOLEAN -> o.put(columnName, r.getBoolean(i)); case TINYINT, + * SMALLINT -> o.put(columnName, r.getShort(i)); case INTEGER -> putInteger(o, columnName, r, i); + * case BIGINT -> o.put(columnName, nullIfInvalid(() -> r.getLong(i))); case FLOAT, DOUBLE -> + * o.put(columnName, nullIfInvalid(() -> r.getDouble(i), Double::isFinite)); case REAL -> + * o.put(columnName, nullIfInvalid(() -> r.getFloat(i), Float::isFinite)); case NUMERIC, DECIMAL -> + * o.put(columnName, nullIfInvalid(() -> r.getBigDecimal(i))); case CHAR, VARCHAR, LONGVARCHAR -> + * o.put(columnName, r.getString(i)); case DATE -> o.put(columnName, toISO8601String(r.getDate(i))); + * case TIME -> o.put(columnName, toISO8601String(r.getTime(i))); case TIMESTAMP -> { // + * https://www.cis.upenn.edu/~bcpierce/courses/629/jdkdocs/guide/jdbc/getstart/mapping.doc.html + * final Timestamp t = r.getTimestamp(i); java.util.Date d = new java.util.Date(t.getTime() + + * (t.getNanos() / 1000000)); o.put(columnName, toISO8601String(d)); } case BLOB, BINARY, VARBINARY, + * LONGVARBINARY -> o.put(columnName, r.getBytes(i)); default -> o.put(columnName, r.getString(i)); + * } + */ } diff --git a/airbyte-db/src/main/java/io/airbyte/db/jdbc/JdbcUtils.java b/airbyte-db/src/main/java/io/airbyte/db/jdbc/JdbcUtils.java index cd153385c236..4c2c37587b9b 100644 --- a/airbyte-db/src/main/java/io/airbyte/db/jdbc/JdbcUtils.java +++ b/airbyte-db/src/main/java/io/airbyte/db/jdbc/JdbcUtils.java @@ -28,6 +28,7 @@ import com.fasterxml.jackson.databind.node.ObjectNode; import io.airbyte.commons.functional.CheckedFunction; import io.airbyte.commons.json.Jsons; +import io.airbyte.db.DataTypeUtils; import io.airbyte.protocol.models.JsonSchemaPrimitive; import java.math.BigDecimal; import java.sql.Date; @@ -36,23 +37,17 @@ import java.sql.ResultSet; import java.sql.SQLException; import java.sql.Timestamp; -import java.text.DateFormat; import java.text.ParseException; -import java.text.SimpleDateFormat; -import java.time.Instant; import java.util.Collections; import java.util.Spliterator; import java.util.Spliterators; import java.util.function.Consumer; -import java.util.function.Function; import java.util.stream.Stream; import java.util.stream.StreamSupport; import javax.xml.bind.DatatypeConverter; public class JdbcUtils { - public static final DateFormat DATE_FORMAT = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'"); // Quoted "Z" to indicate UTC, no timezone offset - /** * Map records returned in a result set. * @@ -130,18 +125,19 @@ private static void setJsonField(ResultSet r, int i, ObjectNode o) throws SQLExc case BIT, BOOLEAN -> o.put(columnName, r.getBoolean(i)); case TINYINT, SMALLINT -> o.put(columnName, r.getShort(i)); case INTEGER -> putInteger(o, columnName, r, i); - case BIGINT -> o.put(columnName, nullIfInvalid(() -> r.getLong(i))); - case FLOAT, DOUBLE -> o.put(columnName, nullIfInvalid(() -> r.getDouble(i), Double::isFinite)); - case REAL -> o.put(columnName, nullIfInvalid(() -> r.getFloat(i), Float::isFinite)); - case NUMERIC, DECIMAL -> o.put(columnName, nullIfInvalid(() -> r.getBigDecimal(i))); + case BIGINT -> o.put(columnName, DataTypeUtils.nullIfInvalid(() -> r.getLong(i))); + case FLOAT, DOUBLE -> o.put(columnName, DataTypeUtils + .nullIfInvalid(() -> r.getDouble(i), Double::isFinite)); + case REAL -> o.put(columnName, DataTypeUtils.nullIfInvalid(() -> r.getFloat(i), Float::isFinite)); + case NUMERIC, DECIMAL -> o.put(columnName, DataTypeUtils.nullIfInvalid(() -> r.getBigDecimal(i))); case CHAR, VARCHAR, LONGVARCHAR -> o.put(columnName, r.getString(i)); - case DATE -> o.put(columnName, toISO8601String(r.getDate(i))); - case TIME -> o.put(columnName, toISO8601String(r.getTime(i))); + case DATE -> o.put(columnName, DataTypeUtils.toISO8601String(r.getDate(i))); + case TIME -> o.put(columnName, DataTypeUtils.toISO8601String(r.getTime(i))); case TIMESTAMP -> { // https://www.cis.upenn.edu/~bcpierce/courses/629/jdkdocs/guide/jdbc/getstart/mapping.doc.html final Timestamp t = r.getTimestamp(i); java.util.Date d = new java.util.Date(t.getTime() + (t.getNanos() / 1000000)); - o.put(columnName, toISO8601String(d)); + o.put(columnName, DataTypeUtils.toISO8601String(d)); } case BLOB, BINARY, VARBINARY, LONGVARBINARY -> o.put(columnName, r.getBytes(i)); default -> o.put(columnName, r.getString(i)); @@ -157,20 +153,12 @@ private static void putInteger(ObjectNode node, String columnName, ResultSet res try { node.put(columnName, resultSet.getInt(index)); } catch (SQLException e) { - node.put(columnName, nullIfInvalid(() -> resultSet.getLong(index))); + node.put(columnName, DataTypeUtils.nullIfInvalid(() -> resultSet.getLong(index))); } } // todo (cgardens) - move generic date helpers to commons. - public static String toISO8601String(long epochMillis) { - return DATE_FORMAT.format(Date.from(Instant.ofEpochMilli(epochMillis))); - } - - public static String toISO8601String(java.util.Date date) { - return DATE_FORMAT.format(date); - } - public static void setStatementField(PreparedStatement preparedStatement, int parameterIndex, JDBCType cursorFieldType, @@ -183,7 +171,8 @@ public static void setStatementField(PreparedStatement preparedStatement, // value in the following format case TIME, TIMESTAMP -> { try { - preparedStatement.setTimestamp(parameterIndex, Timestamp.from(DATE_FORMAT.parse(value).toInstant())); + preparedStatement.setTimestamp(parameterIndex, Timestamp.from( + DataTypeUtils.DATE_FORMAT.parse(value).toInstant())); } catch (ParseException e) { throw new RuntimeException(e); } @@ -191,7 +180,7 @@ public static void setStatementField(PreparedStatement preparedStatement, case DATE -> { try { - Timestamp from = Timestamp.from(DATE_FORMAT.parse(value).toInstant()); + Timestamp from = Timestamp.from(DataTypeUtils.DATE_FORMAT.parse(value).toInstant()); preparedStatement.setDate(parameterIndex, new Date(from.getTime())); } catch (ParseException e) { throw new RuntimeException(e); @@ -241,28 +230,4 @@ public static JsonSchemaPrimitive getType(JDBCType jdbcType) { }; } - private static T nullIfInvalid(SQLSupplier valueProducer) { - return nullIfInvalid(valueProducer, ignored -> true); - } - - private static T nullIfInvalid(SQLSupplier valueProducer, Function isValidFn) { - // Some edge case values (e.g: Infinity, NaN) have no java or JSON equivalent, and will throw an - // exception when parsed. We want to parse those - // values as null. - // This method reduces error handling boilerplate. - try { - T value = valueProducer.apply(); - return isValidFn.apply(value) ? value : null; - } catch (SQLException e) { - return null; - } - } - - @FunctionalInterface - private interface SQLSupplier { - - O apply() throws SQLException; - - } - } diff --git a/airbyte-integrations/connectors/destination-jdbc/src/test/java/io/airbyte/integrations/destination/jdbc/SqlOperationsUtilsTest.java b/airbyte-integrations/connectors/destination-jdbc/src/test/java/io/airbyte/integrations/destination/jdbc/SqlOperationsUtilsTest.java index 2822f29733ac..07e5350e8d92 100644 --- a/airbyte-integrations/connectors/destination-jdbc/src/test/java/io/airbyte/integrations/destination/jdbc/SqlOperationsUtilsTest.java +++ b/airbyte-integrations/connectors/destination-jdbc/src/test/java/io/airbyte/integrations/destination/jdbc/SqlOperationsUtilsTest.java @@ -33,6 +33,7 @@ import com.google.common.collect.ImmutableMap; import com.google.common.collect.Lists; import io.airbyte.commons.json.Jsons; +import io.airbyte.db.DataTypeUtils; import io.airbyte.db.Databases; import io.airbyte.db.jdbc.JdbcDatabase; import io.airbyte.db.jdbc.JdbcUtils; @@ -111,12 +112,14 @@ void testInsertRawRecordsInSingleQuery() throws SQLException { Jsons.jsonNode(ImmutableMap.builder() .put(JavaBaseConstants.COLUMN_NAME_AB_ID, RECORD1_UUID) .put(JavaBaseConstants.COLUMN_NAME_DATA, records.get(0).getData()) - .put(JavaBaseConstants.COLUMN_NAME_EMITTED_AT, JdbcUtils.toISO8601String(records.get(0).getEmittedAt())) + .put(JavaBaseConstants.COLUMN_NAME_EMITTED_AT, DataTypeUtils + .toISO8601String(records.get(0).getEmittedAt())) .build()), Jsons.jsonNode(ImmutableMap.builder() .put(JavaBaseConstants.COLUMN_NAME_AB_ID, RECORD2_UUID) .put(JavaBaseConstants.COLUMN_NAME_DATA, records.get(1).getData()) - .put(JavaBaseConstants.COLUMN_NAME_EMITTED_AT, JdbcUtils.toISO8601String(records.get(1).getEmittedAt())) + .put(JavaBaseConstants.COLUMN_NAME_EMITTED_AT, DataTypeUtils + .toISO8601String(records.get(1).getEmittedAt())) .build())); actualRecords.forEach( diff --git a/airbyte-integrations/connectors/source-jdbc/src/main/java/io/airbyte/integrations/source/jdbc/SourceJdbcUtils.java b/airbyte-integrations/connectors/source-jdbc/src/main/java/io/airbyte/integrations/source/jdbc/SourceJdbcUtils.java index 4db786f8f717..97165b6fe894 100644 --- a/airbyte-integrations/connectors/source-jdbc/src/main/java/io/airbyte/integrations/source/jdbc/SourceJdbcUtils.java +++ b/airbyte-integrations/connectors/source-jdbc/src/main/java/io/airbyte/integrations/source/jdbc/SourceJdbcUtils.java @@ -24,7 +24,7 @@ package io.airbyte.integrations.source.jdbc; -import io.airbyte.db.jdbc.JdbcUtils; +import io.airbyte.db.DataTypeUtils; import io.airbyte.protocol.models.JsonSchemaPrimitive; import java.math.BigDecimal; import java.sql.Connection; @@ -53,7 +53,7 @@ public static void setStatementField(PreparedStatement preparedStatement, case TIME, TIMESTAMP -> { try { preparedStatement.setTimestamp(parameterIndex, Timestamp - .from(JdbcUtils.DATE_FORMAT.parse(value).toInstant())); + .from(DataTypeUtils.DATE_FORMAT.parse(value).toInstant())); } catch (ParseException e) { throw new RuntimeException(e); } @@ -61,7 +61,7 @@ public static void setStatementField(PreparedStatement preparedStatement, case DATE -> { try { - Timestamp from = Timestamp.from(JdbcUtils.DATE_FORMAT.parse(value).toInstant()); + Timestamp from = Timestamp.from(DataTypeUtils.DATE_FORMAT.parse(value).toInstant()); preparedStatement.setDate(parameterIndex, new Date(from.getTime())); } catch (ParseException e) { throw new RuntimeException(e); From 75e7c99d69f08bf234d44758ab510b9a19742ac7 Mon Sep 17 00:00:00 2001 From: Andrii Leonets Date: Wed, 23 Jun 2021 23:49:04 +0300 Subject: [PATCH 18/63] make DB2 in line with new relational abstract classes --- airbyte-integrations/connectors/source-db2/build.gradle | 1 + .../io.airbyte.integrations.source.db2/Db2Source.java | 4 ++-- .../Db2JdbcSourceAcceptanceTest.java | 9 ++++----- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/airbyte-integrations/connectors/source-db2/build.gradle b/airbyte-integrations/connectors/source-db2/build.gradle index 7c713f1e8e7f..0778b1d32636 100644 --- a/airbyte-integrations/connectors/source-db2/build.gradle +++ b/airbyte-integrations/connectors/source-db2/build.gradle @@ -12,6 +12,7 @@ dependencies { implementation project(':airbyte-db') implementation project(':airbyte-integrations:bases:base-java') implementation project(':airbyte-integrations:connectors:source-jdbc') + implementation project(':airbyte-integrations:connectors:source-relational-db') implementation project(':airbyte-protocol:models') implementation files(project(':airbyte-integrations:bases:base-java').airbyteDocker.outputs) diff --git a/airbyte-integrations/connectors/source-db2/src/main/java/io.airbyte.integrations.source.db2/Db2Source.java b/airbyte-integrations/connectors/source-db2/src/main/java/io.airbyte.integrations.source.db2/Db2Source.java index a12e9a24fb3d..65d228417d85 100644 --- a/airbyte-integrations/connectors/source-db2/src/main/java/io.airbyte.integrations.source.db2/Db2Source.java +++ b/airbyte-integrations/connectors/source-db2/src/main/java/io.airbyte.integrations.source.db2/Db2Source.java @@ -51,7 +51,7 @@ public static void main(String[] args) throws Exception { } @Override - public JsonNode toJdbcConfig(JsonNode config) { + public JsonNode toDatabaseConfig(JsonNode config) { return Jsons.jsonNode(ImmutableMap.builder() .put("jdbc_url", String.format("jdbc:db2://%s:%s/%s", config.get("host").asText(), @@ -63,7 +63,7 @@ public JsonNode toJdbcConfig(JsonNode config) { } @Override - public Set getExcludedInternalSchemas() { + public Set getExcludedInternalNameSpaces() { return Set.of( "NULLID", "SYSCAT", "SQLJ", "SYSFUN", "SYSIBM", "SYSIBMADM", "SYSIBMINTERNAL", "SYSIBMTS", "SYSPROC", "SYSPUBLIC", "SYSSTAT", "SYSTOOLS"); diff --git a/airbyte-integrations/connectors/source-db2/src/test/java/io.airbyte.integrations.source.db2/Db2JdbcSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-db2/src/test/java/io.airbyte.integrations.source.db2/Db2JdbcSourceAcceptanceTest.java index ea34e5aa7562..3f212b21ef72 100644 --- a/airbyte-integrations/connectors/source-db2/src/test/java/io.airbyte.integrations.source.db2/Db2JdbcSourceAcceptanceTest.java +++ b/airbyte-integrations/connectors/source-db2/src/test/java/io.airbyte.integrations.source.db2/Db2JdbcSourceAcceptanceTest.java @@ -28,8 +28,7 @@ import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; import io.airbyte.commons.json.Jsons; -import io.airbyte.db.jdbc.JdbcUtils; -import io.airbyte.integrations.source.jdbc.AbstractJdbcSource; +import io.airbyte.integrations.source.jdbc.SourceJdbcUtils; import io.airbyte.integrations.source.jdbc.test.JdbcSourceAcceptanceTest; import java.util.Collections; import java.util.Set; @@ -104,13 +103,13 @@ public void clean() throws Exception { } super.database.execute(connection -> connection.createStatement().execute(String .format("DROP TABLE IF EXISTS %s.%s", SCHEMA_NAME, - JdbcUtils.enquoteIdentifier(connection, TABLE_NAME_WITH_SPACES)))); + SourceJdbcUtils.enquoteIdentifier(connection, TABLE_NAME_WITH_SPACES)))); super.database.execute(connection -> connection.createStatement().execute(String .format("DROP TABLE IF EXISTS %s.%s", SCHEMA_NAME, - JdbcUtils.enquoteIdentifier(connection, TABLE_NAME_WITH_SPACES + 2)))); + SourceJdbcUtils.enquoteIdentifier(connection, TABLE_NAME_WITH_SPACES + 2)))); super.database.execute(connection -> connection.createStatement().execute(String .format("DROP TABLE IF EXISTS %s.%s", SCHEMA_NAME2, - JdbcUtils.enquoteIdentifier(connection, TABLE_NAME)))); + SourceJdbcUtils.enquoteIdentifier(connection, TABLE_NAME)))); super.tearDown(); } From 3dc5383180043a47137bd8d6c2284feec278a432 Mon Sep 17 00:00:00 2001 From: Andrii Leonets Date: Thu, 24 Jun 2021 00:00:59 +0300 Subject: [PATCH 19/63] add missing import --- .../Db2JdbcSourceAcceptanceTest.java | 1 + 1 file changed, 1 insertion(+) diff --git a/airbyte-integrations/connectors/source-db2/src/test/java/io.airbyte.integrations.source.db2/Db2JdbcSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-db2/src/test/java/io.airbyte.integrations.source.db2/Db2JdbcSourceAcceptanceTest.java index 3f212b21ef72..d017470c763b 100644 --- a/airbyte-integrations/connectors/source-db2/src/test/java/io.airbyte.integrations.source.db2/Db2JdbcSourceAcceptanceTest.java +++ b/airbyte-integrations/connectors/source-db2/src/test/java/io.airbyte.integrations.source.db2/Db2JdbcSourceAcceptanceTest.java @@ -28,6 +28,7 @@ import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; import io.airbyte.commons.json.Jsons; +import io.airbyte.integrations.source.jdbc.AbstractJdbcSource; import io.airbyte.integrations.source.jdbc.SourceJdbcUtils; import io.airbyte.integrations.source.jdbc.test.JdbcSourceAcceptanceTest; import java.util.Collections; From fd60d1459272a704a933544675404c702f257c20 Mon Sep 17 00:00:00 2001 From: Andrii Leonets Date: Tue, 29 Jun 2021 23:49:33 +0300 Subject: [PATCH 20/63] cover all biqquery classes + add type transformation method from StandardSQLTypeName to JsonSchemaPrimitive --- .../io/airbyte/db/bigquery/BigQueryUtils.java | 42 +++++++------------ 1 file changed, 15 insertions(+), 27 deletions(-) diff --git a/airbyte-db/src/main/java/io/airbyte/db/bigquery/BigQueryUtils.java b/airbyte-db/src/main/java/io/airbyte/db/bigquery/BigQueryUtils.java index 9f4dace424dc..a132405d09d4 100644 --- a/airbyte-db/src/main/java/io/airbyte/db/bigquery/BigQueryUtils.java +++ b/airbyte-db/src/main/java/io/airbyte/db/bigquery/BigQueryUtils.java @@ -25,6 +25,7 @@ package io.airbyte.db.bigquery; import static io.airbyte.db.DataTypeUtils.nullIfInvalid; +import static io.airbyte.db.DataTypeUtils.toISO8601String; import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.node.ObjectNode; @@ -33,8 +34,9 @@ import com.google.cloud.bigquery.FieldValue; import com.google.cloud.bigquery.FieldValueList; import com.google.cloud.bigquery.LegacySQLTypeName; +import com.google.cloud.bigquery.StandardSQLTypeName; import io.airbyte.commons.json.Jsons; -import io.airbyte.db.DataTypeUtils; +import io.airbyte.protocol.models.JsonSchemaPrimitive; import java.text.ParseException; import java.text.SimpleDateFormat; import java.util.Collections; @@ -65,13 +67,8 @@ private static void setJsonField(Field field, FieldValue fieldValue, ObjectNode case BIGNUMERIC -> node.put(fieldName, nullIfInvalid(fieldValue::getNumericValue)); case STRING -> node.put(fieldName, fieldValue.getStringValue()); case BYTES -> node.put(fieldName, fieldValue.getBytesValue()); - case STRUCT -> node.put(fieldName, ""); // @TODO impl - case ARRAY -> node.put(fieldName, ""); // @TODO impl - case TIMESTAMP -> node.put(fieldName, ""); // @TODO impl - case DATE -> node.put(fieldName, DataTypeUtils.toISO8601String(getDateValue(fieldValue))); - case TIME -> node.put(fieldName, ""); // @TODO impl - case DATETIME -> node.put(fieldName, ""); // @TODO impl - case GEOGRAPHY -> node.put(fieldName, ""); // @TODO impl + case TIMESTAMP, DATE, TIME, DATETIME -> node.put(fieldName, toISO8601String(fieldValue.getTimestampValue())); + default -> node.put(fieldName, fieldValue.getStringValue()); } } @@ -85,24 +82,15 @@ public static Date getDateValue(FieldValue fieldValue) { } return parsedValue; } - /* - * final int columnTypeInt = r.getMetaData().getColumnType(i); final String columnName = - * r.getMetaData().getColumnName(i); final JDBCType columnType = safeGetJdbcType(columnTypeInt); - * - * // https://www.cis.upenn.edu/~bcpierce/courses/629/jdkdocs/guide/jdbc/getstart/mapping.doc.html - * switch (columnType) { case BIT, BOOLEAN -> o.put(columnName, r.getBoolean(i)); case TINYINT, - * SMALLINT -> o.put(columnName, r.getShort(i)); case INTEGER -> putInteger(o, columnName, r, i); - * case BIGINT -> o.put(columnName, nullIfInvalid(() -> r.getLong(i))); case FLOAT, DOUBLE -> - * o.put(columnName, nullIfInvalid(() -> r.getDouble(i), Double::isFinite)); case REAL -> - * o.put(columnName, nullIfInvalid(() -> r.getFloat(i), Float::isFinite)); case NUMERIC, DECIMAL -> - * o.put(columnName, nullIfInvalid(() -> r.getBigDecimal(i))); case CHAR, VARCHAR, LONGVARCHAR -> - * o.put(columnName, r.getString(i)); case DATE -> o.put(columnName, toISO8601String(r.getDate(i))); - * case TIME -> o.put(columnName, toISO8601String(r.getTime(i))); case TIMESTAMP -> { // - * https://www.cis.upenn.edu/~bcpierce/courses/629/jdkdocs/guide/jdbc/getstart/mapping.doc.html - * final Timestamp t = r.getTimestamp(i); java.util.Date d = new java.util.Date(t.getTime() + - * (t.getNanos() / 1000000)); o.put(columnName, toISO8601String(d)); } case BLOB, BINARY, VARBINARY, - * LONGVARBINARY -> o.put(columnName, r.getBytes(i)); default -> o.put(columnName, r.getString(i)); - * } - */ + + public static JsonSchemaPrimitive getType(StandardSQLTypeName bigQueryType) { + return + switch (bigQueryType) { + case BOOL -> JsonSchemaPrimitive.BOOLEAN; + case INT64, FLOAT64, NUMERIC, BIGNUMERIC -> JsonSchemaPrimitive.NUMBER; + case STRING, BYTES, TIMESTAMP, DATE, TIME, DATETIME -> JsonSchemaPrimitive.STRING; + default -> JsonSchemaPrimitive.STRING; + }; + } } From 54213137468cac55659464b2c7646c85a6adaa88 Mon Sep 17 00:00:00 2001 From: Andrii Leonets Date: Thu, 1 Jul 2021 01:49:36 +0300 Subject: [PATCH 21/63] close unused connections --- .../main/java/io/airbyte/db/jdbc/DefaultJdbcDatabase.java | 5 ++++- .../source/oracle/OracleJdbcSourceAcceptanceTest.java | 2 ++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/airbyte-db/src/main/java/io/airbyte/db/jdbc/DefaultJdbcDatabase.java b/airbyte-db/src/main/java/io/airbyte/db/jdbc/DefaultJdbcDatabase.java index 234362fd3b0a..134d8f38cb5d 100644 --- a/airbyte-db/src/main/java/io/airbyte/db/jdbc/DefaultJdbcDatabase.java +++ b/airbyte-db/src/main/java/io/airbyte/db/jdbc/DefaultJdbcDatabase.java @@ -89,7 +89,10 @@ public Stream resultSetQuery(CheckedFunction Date: Thu, 1 Jul 2021 14:02:40 +0300 Subject: [PATCH 22/63] add table list extract method --- .../io/airbyte/db/bigquery/BigQueryDatabase.java | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/airbyte-db/src/main/java/io/airbyte/db/bigquery/BigQueryDatabase.java b/airbyte-db/src/main/java/io/airbyte/db/bigquery/BigQueryDatabase.java index 5c83b7e470d4..ae41e0dccfe4 100644 --- a/airbyte-db/src/main/java/io/airbyte/db/bigquery/BigQueryDatabase.java +++ b/airbyte-db/src/main/java/io/airbyte/db/bigquery/BigQueryDatabase.java @@ -27,6 +27,7 @@ import static java.util.Objects.isNull; import com.fasterxml.jackson.databind.JsonNode; +import com.google.api.gax.paging.Page; import com.google.auth.oauth2.ServiceAccountCredentials; import com.google.cloud.bigquery.BigQuery; import com.google.cloud.bigquery.BigQueryOptions; @@ -37,15 +38,19 @@ import com.google.cloud.bigquery.QueryJobConfiguration; import com.google.cloud.bigquery.QueryParameterValue; import com.google.cloud.bigquery.StandardSQLTypeName; +import com.google.cloud.bigquery.Table; import com.google.common.base.Charsets; import com.google.common.collect.Streams; import io.airbyte.db.SqlDatabase; import java.io.ByteArrayInputStream; import java.io.IOException; import java.sql.SQLException; +import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; +import java.util.HashSet; import java.util.List; +import java.util.Set; import java.util.UUID; import java.util.stream.Collectors; import java.util.stream.Stream; @@ -129,6 +134,14 @@ public ImmutablePair executeQuery(BigQuery bigquery, QueryJobConfig return executeQuery(queryJob); } + public List getNameSpaceTables(String nameSpace) { + Page
tables = bigQuery.listTables(nameSpace); + List
tableList = new ArrayList<>(); + tables.iterateAll().forEach(tableList::add); + return tableList; + } + + private ImmutablePair executeQuery(Job queryJob) { final Job completedJob = waitForQuery(queryJob); if (completedJob == null) { From 4fb2f249d4b13c7928c02ecdbc75e8aa1ca895e3 Mon Sep 17 00:00:00 2001 From: heade Date: Thu, 1 Jul 2021 14:29:29 +0300 Subject: [PATCH 23/63] bigquery source connector --- .../connectors/source-bigquery/Dockerfile | 13 ++ .../connectors/source-bigquery/build.gradle | 31 +++++ .../source/bigquery/BigQuerySource.java | 123 ++++++++++++++++++ .../src/main/resources/spec.json | 67 ++++++++++ .../BigQuerySourceAcceptanceTest.java | 88 +++++++++++++ .../source/bigquery/BigQuerySourceTests.java | 51 ++++++++ .../BigqueryJdbcSourceAcceptanceTest.java | 88 +++++++++++++ 7 files changed, 461 insertions(+) create mode 100644 airbyte-integrations/connectors/source-bigquery/Dockerfile create mode 100644 airbyte-integrations/connectors/source-bigquery/build.gradle create mode 100644 airbyte-integrations/connectors/source-bigquery/src/main/java/io/airbyte/integrations/source/bigquery/BigQuerySource.java create mode 100644 airbyte-integrations/connectors/source-bigquery/src/main/resources/spec.json create mode 100644 airbyte-integrations/connectors/source-bigquery/src/test-integration/java/io/airbyte/integrations/source/bigquery/BigQuerySourceAcceptanceTest.java create mode 100644 airbyte-integrations/connectors/source-bigquery/src/test/java/io/airbyte/integrations/source/bigquery/BigQuerySourceTests.java create mode 100644 airbyte-integrations/connectors/source-bigquery/src/test/java/io/airbyte/integrations/source/bigquery/BigqueryJdbcSourceAcceptanceTest.java diff --git a/airbyte-integrations/connectors/source-bigquery/Dockerfile b/airbyte-integrations/connectors/source-bigquery/Dockerfile new file mode 100644 index 000000000000..cfdaa65dde59 --- /dev/null +++ b/airbyte-integrations/connectors/source-bigquery/Dockerfile @@ -0,0 +1,13 @@ +FROM airbyte/integration-base-java:dev + +WORKDIR /airbyte + +ENV APPLICATION source-bigquery + +COPY build/distributions/${APPLICATION}*.tar ${APPLICATION}.tar + +RUN tar xf ${APPLICATION}.tar --strip-components=1 + +# Airbyte's build system uses these labels to know what to name and tag the docker images produced by this Dockerfile. +LABEL io.airbyte.version=0.1.0 +LABEL io.airbyte.name=airbyte/source-bigquery \ No newline at end of file diff --git a/airbyte-integrations/connectors/source-bigquery/build.gradle b/airbyte-integrations/connectors/source-bigquery/build.gradle new file mode 100644 index 000000000000..0214905cf428 --- /dev/null +++ b/airbyte-integrations/connectors/source-bigquery/build.gradle @@ -0,0 +1,31 @@ +plugins { + id 'application' + id 'airbyte-docker' + id 'airbyte-integration-test-java' +} + +application { + mainClass = 'io.airbyte.integrations.source.bigquery.BigQuerySource' +} + +dependencies { + implementation 'com.google.cloud:google-cloud-bigquery:1.122.2' + implementation 'org.apache.commons:commons-lang3:3.11' + implementation project(':airbyte-db') + implementation project(':airbyte-integrations:bases:base-java') + implementation project(':airbyte-protocol:models') + implementation project(':airbyte-integrations:connectors:source-jdbc') + implementation project(':airbyte-integrations:connectors:source-relational-db') + + //TODO Add jdbc driver import here. Ex: implementation 'com.microsoft.sqlserver:mssql-jdbc:8.4.1.jre14' + + testImplementation testFixtures(project(':airbyte-integrations:connectors:source-jdbc')) + + testImplementation 'org.apache.commons:commons-lang3:3.11' + + integrationTestJavaImplementation project(':airbyte-integrations:connectors:source-bigquery') + integrationTestJavaImplementation project(':airbyte-integrations:bases:standard-source-test') + + implementation files(project(':airbyte-integrations:bases:base-java').airbyteDocker.outputs) + integrationTestJavaImplementation files(project(':airbyte-integrations:bases:base-java').airbyteDocker.outputs) +} \ No newline at end of file diff --git a/airbyte-integrations/connectors/source-bigquery/src/main/java/io/airbyte/integrations/source/bigquery/BigQuerySource.java b/airbyte-integrations/connectors/source-bigquery/src/main/java/io/airbyte/integrations/source/bigquery/BigQuerySource.java new file mode 100644 index 000000000000..dcd1e7b4545f --- /dev/null +++ b/airbyte-integrations/connectors/source-bigquery/src/main/java/io/airbyte/integrations/source/bigquery/BigQuerySource.java @@ -0,0 +1,123 @@ +/* + * MIT License + * + * Copyright (c) 2020 Airbyte + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package io.airbyte.integrations.source.bigquery; + +import com.fasterxml.jackson.databind.JsonNode; +import com.google.cloud.bigquery.StandardSQLTypeName; +import com.google.cloud.bigquery.Table; +import com.google.common.collect.ImmutableMap; +import io.airbyte.commons.functional.CheckedConsumer; +import io.airbyte.commons.json.Jsons; +import io.airbyte.commons.util.AutoCloseableIterator; +import io.airbyte.db.Databases; +import io.airbyte.db.bigquery.BigQueryDatabase; +import io.airbyte.db.bigquery.BigQueryUtils; +import io.airbyte.integrations.base.IntegrationRunner; +import io.airbyte.integrations.base.Source; +import io.airbyte.integrations.source.jdbc.SourceJdbcUtils; +import io.airbyte.integrations.source.relationaldb.AbstractRelationalDbSource; +import io.airbyte.integrations.source.relationaldb.TableInfo; +import io.airbyte.protocol.models.CommonField; +import io.airbyte.protocol.models.JsonSchemaPrimitive; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.*; + +public class BigQuerySource extends AbstractRelationalDbSource implements Source { + + private static final Logger LOGGER = LoggerFactory.getLogger(BigQuerySource.class); + + // TODO insert your driver name. Ex: "com.microsoft.sqlserver.jdbc.SQLServerDriver" + static final String DRIVER_CLASS = "driver_name_here"; + static final String CONFIG_DATASET_ID = "dataset_id"; + static final String CONFIG_PROJECT_ID = "project_id"; + static final String CONFIG_DATASET_LOCATION = "dataset_location"; + static final String CONFIG_CREDS = "credentials_json"; + private String quote = ""; + private JsonNode dbConfig; + + public BigQuerySource() { + } + + @Override + public JsonNode toDatabaseConfig(JsonNode config) { + return Jsons.jsonNode(ImmutableMap.builder() + .put(CONFIG_PROJECT_ID, config.get(CONFIG_PROJECT_ID).asText()) + .put(CONFIG_CREDS, config.get(CONFIG_CREDS).asText()) + .put(CONFIG_DATASET_ID, config.get(CONFIG_DATASET_ID).asText()) + .build()); + } + + @Override + protected BigQueryDatabase createDatabase(JsonNode config) throws Exception { + dbConfig = Jsons.clone(config); + return Databases.createBigQueryDatabase(config.get(CONFIG_PROJECT_ID).asText(), config.get(CONFIG_CREDS).asText(), config.get(CONFIG_DATASET_ID).asText()); + } + + @Override + public List> getCheckOperations(JsonNode config) throws Exception { + return new ArrayList<>(); + } + + @Override + protected JsonSchemaPrimitive getType(StandardSQLTypeName columnType) { + return BigQueryUtils.getType(columnType); + } + + @Override + public Set getExcludedInternalNameSpaces() { + return Collections.emptySet(); + } + + @Override + protected List>> discoverInternal(BigQueryDatabase database) throws Exception { + List
nameSpaceTables = database.getNameSpaceTables(dbConfig.get(CONFIG_PROJECT_ID).asText()); + + return null; + } + + @Override + protected Map> discoverPrimaryKeys(BigQueryDatabase database, List>> tableInfos) { + return null; + } + + @Override + protected String getQuoteString() { + return quote; + } + + @Override + public AutoCloseableIterator queryTableIncremental(BigQueryDatabase database, List columnNames, String schemaName, String tableName, String cursorField, StandardSQLTypeName cursorFieldType, String cursor) { + return null; + } + + public static void main(String[] args) throws Exception { + final Source source = new BigQuerySource(); + LOGGER.info("starting source: {}", BigQuerySource.class); + new IntegrationRunner(source).run(args); + LOGGER.info("completed source: {}", BigQuerySource.class); + } +} diff --git a/airbyte-integrations/connectors/source-bigquery/src/main/resources/spec.json b/airbyte-integrations/connectors/source-bigquery/src/main/resources/spec.json new file mode 100644 index 000000000000..23b20e245d6d --- /dev/null +++ b/airbyte-integrations/connectors/source-bigquery/src/main/resources/spec.json @@ -0,0 +1,67 @@ +{ + "documentationUrl": "https://docs.airbyte.io/integrations/destinations/bigquery", + "supportsIncremental": true, + "supportsNormalization": true, + "supportsDBT": true, + "supported_destination_sync_modes": ["overwrite", "append", "append_dedup"], + "connectionSpecification": { + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "BigQuery Destination Spec", + "type": "object", + "required": ["project_id", "dataset_id"], + "additionalProperties": false, + "properties": { + "project_id": { + "type": "string", + "description": "The GCP project ID for the project containing the target BigQuery dataset.", + "title": "Project ID" + }, + "dataset_id": { + "type": "string", + "description": "Default BigQuery Dataset ID tables are replicated to if the source does not specify a namespace.", + "title": "Default Dataset ID" + }, + "dataset_location": { + "type": "string", + "description": "The location of the dataset. Warning: Changes made after creation will not be applied.", + "title": "Dataset Location", + "default": "US", + "enum": [ + "US", + "EU", + "us-central1", + "us-west-1", + "us-west-2", + "us-west-3", + "us-west-4", + "us-east1", + "us-east4", + "northamerica-northeast1", + "southamerica-east1", + "europe-north1", + "europe-west1", + "europe-west2", + "europe-west3", + "europe-west4", + "europe-west6", + "europe-central2", + "asia-east1", + "asia-east2", + "asia-southeast2", + "asia-south1", + "asia-northeast1", + "asia-northeast2", + "asia-southeast1", + "asia-northeast3", + "australia-southeast1" + ] + }, + "credentials_json": { + "type": "string", + "description": "The contents of the JSON service account key. Check out the docs if you need help generating this key. Default credentials will be used if this field is left empty.", + "title": "Credentials JSON", + "airbyte_secret": true + } + } + } +} diff --git a/airbyte-integrations/connectors/source-bigquery/src/test-integration/java/io/airbyte/integrations/source/bigquery/BigQuerySourceAcceptanceTest.java b/airbyte-integrations/connectors/source-bigquery/src/test-integration/java/io/airbyte/integrations/source/bigquery/BigQuerySourceAcceptanceTest.java new file mode 100644 index 000000000000..3f11291c89ba --- /dev/null +++ b/airbyte-integrations/connectors/source-bigquery/src/test-integration/java/io/airbyte/integrations/source/bigquery/BigQuerySourceAcceptanceTest.java @@ -0,0 +1,88 @@ +/* + * MIT License + * + * Copyright (c) 2020 Airbyte + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package io.airbyte.integrations.source.bigquery; + +import com.fasterxml.jackson.databind.JsonNode; +import io.airbyte.commons.json.Jsons; +import io.airbyte.commons.resources.MoreResources; +import io.airbyte.integrations.standardtest.source.SourceAcceptanceTest; +import io.airbyte.integrations.standardtest.source.TestDestinationEnv; +import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; +import io.airbyte.protocol.models.ConnectorSpecification; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; + +public class BigQuerySourceAcceptanceTest extends SourceAcceptanceTest { + + private JsonNode config; + + @Override + protected void setupEnvironment(TestDestinationEnv testEnv) { + // TODO create new container. Ex: "new OracleContainer("epiclabs/docker-oracle-xe-11g");" + // TODO make container started. Ex: "container.start();" + // TODO init JsonNode config + // TODO crete airbyte Database object "Databases.createJdbcDatabase(...)" + // TODO insert test data to DB. Ex: "database.execute(connection-> ...)" + // TODO close Database. Ex: "database.close();" + } + + @Override + protected void tearDown(TestDestinationEnv testEnv) { + // TODO close container that was initialized in setup() method. Ex: "container.close();" + } + + @Override + protected String getImageName() { + return "airbyte/source-bigquery:dev"; + } + + @Override + protected ConnectorSpecification getSpec() throws Exception { + return Jsons.deserialize(MoreResources.readResource("spec.json"), ConnectorSpecification.class); + } + + @Override + protected JsonNode getConfig() { + return config; + } + + @Override + protected ConfiguredAirbyteCatalog getConfiguredCatalog() { + // TODO Return the ConfiguredAirbyteCatalog with ConfiguredAirbyteStream objects + return null; + } + + @Override + protected List getRegexTests() { + return Collections.emptyList(); + } + + @Override + protected JsonNode getState() { + return Jsons.jsonNode(new HashMap<>()); + } + +} diff --git a/airbyte-integrations/connectors/source-bigquery/src/test/java/io/airbyte/integrations/source/bigquery/BigQuerySourceTests.java b/airbyte-integrations/connectors/source-bigquery/src/test/java/io/airbyte/integrations/source/bigquery/BigQuerySourceTests.java new file mode 100644 index 000000000000..4196237094cc --- /dev/null +++ b/airbyte-integrations/connectors/source-bigquery/src/test/java/io/airbyte/integrations/source/bigquery/BigQuerySourceTests.java @@ -0,0 +1,51 @@ +/* + * MIT License + * + * Copyright (c) 2020 Airbyte + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package io.airbyte.integrations.source.bigquery; + +import com.fasterxml.jackson.databind.JsonNode; +import io.airbyte.db.Database; +import org.junit.jupiter.api.Test; + +public class BigQuerySourceTests { + + private JsonNode config; + private Database database; + + @Test + public void testSettingTimezones() throws Exception { + // TODO init your container. Ex: "new + // org.testcontainers.containers.MSSQLServerContainer<>("mcr.microsoft.com/mssql/server:2019-latest").acceptLicense();" + // TODO start the container. Ex: "container.start();" + // TODO prepare DB config. Ex: "config = getConfig(container, dbName, + // "serverTimezone=Europe/London");" + // TODO create DB, grant all privileges, etc. + // TODO check connection status. Ex: "AirbyteConnectionStatus check = new + // ScaffoldJavaJdbcGenericSource().check(config);" + // TODO assert connection status. Ex: "assertEquals(AirbyteConnectionStatus.Status.SUCCEEDED, + // check.getStatus());" + // TODO cleanup used resources and close used container. Ex: "container.close();" + } + +} diff --git a/airbyte-integrations/connectors/source-bigquery/src/test/java/io/airbyte/integrations/source/bigquery/BigqueryJdbcSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-bigquery/src/test/java/io/airbyte/integrations/source/bigquery/BigqueryJdbcSourceAcceptanceTest.java new file mode 100644 index 000000000000..fa88e9668df1 --- /dev/null +++ b/airbyte-integrations/connectors/source-bigquery/src/test/java/io/airbyte/integrations/source/bigquery/BigqueryJdbcSourceAcceptanceTest.java @@ -0,0 +1,88 @@ +///* +// * MIT License +// * +// * Copyright (c) 2020 Airbyte +// * +// * Permission is hereby granted, free of charge, to any person obtaining a copy +// * of this software and associated documentation files (the "Software"), to deal +// * in the Software without restriction, including without limitation the rights +// * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// * copies of the Software, and to permit persons to whom the Software is +// * furnished to do so, subject to the following conditions: +// * +// * The above copyright notice and this permission notice shall be included in all +// * copies or substantial portions of the Software. +// * +// * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// * SOFTWARE. +// */ +// +//package io.airbyte.integrations.source.bigquery; +// +//import com.fasterxml.jackson.databind.JsonNode; +//import io.airbyte.integrations.source.jdbc.AbstractJdbcSource; +//import io.airbyte.integrations.source.jdbc.test.JdbcSourceAcceptanceTest; +//import org.junit.jupiter.api.AfterAll; +//import org.junit.jupiter.api.AfterEach; +//import org.junit.jupiter.api.BeforeAll; +//import org.junit.jupiter.api.BeforeEach; +//import org.slf4j.Logger; +//import org.slf4j.LoggerFactory; +// +//class BigqueryJdbcSourceAcceptanceTest extends JdbcSourceAcceptanceTest { +// +// private static final Logger LOGGER = LoggerFactory.getLogger(BigqueryJdbcSourceAcceptanceTest.class); +// +// // TODO declare a test container for DB. EX: org.testcontainers.containers.OracleContainer +// +// @BeforeAll +// static void init() { +// // Oracle returns uppercase values +// // TODO init test container. Ex: "new OracleContainer("epiclabs/docker-oracle-xe-11g")" +// // TODO start container. Ex: "container.start();" +// } +// +// @BeforeEach +// public void setup() throws Exception { +// // TODO init config. Ex: "config = Jsons.jsonNode(ImmutableMap.builder().put("host", +// // host).put("port", port)....build()); +// super.setup(); +// } +// +// @AfterEach +// public void tearDown() { +// // TODO clean used resources +// } +// +// @Override +// public AbstractJdbcSource getSource() { +// return new AbstractJdbcSource(); +// } +// +// @Override +// public boolean supportsSchemas() { +// // TODO check if your db supports it and update method accordingly +// return false; +// } +// +// @Override +// public JsonNode getConfig() { +// return config; +// } +// +// @Override +// public String getDriverClass() { +// return BigQuerySource.DRIVER_CLASS; +// } +// +// @AfterAll +// static void cleanUp() { +// // TODO close the container. Ex: "container.close();" +// } +// +//} From f4d6aa068695b62d6d50a4e6f255fcb9edfcc590 Mon Sep 17 00:00:00 2001 From: Andrii Leonets Date: Thu, 1 Jul 2021 16:13:40 +0300 Subject: [PATCH 24/63] return all tables for a whole project instead of a dataset --- .../io/airbyte/db/bigquery/BigQueryDatabase.java | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/airbyte-db/src/main/java/io/airbyte/db/bigquery/BigQueryDatabase.java b/airbyte-db/src/main/java/io/airbyte/db/bigquery/BigQueryDatabase.java index ae41e0dccfe4..af6b31558464 100644 --- a/airbyte-db/src/main/java/io/airbyte/db/bigquery/BigQueryDatabase.java +++ b/airbyte-db/src/main/java/io/airbyte/db/bigquery/BigQueryDatabase.java @@ -48,9 +48,7 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; -import java.util.HashSet; import java.util.List; -import java.util.Set; import java.util.UUID; import java.util.stream.Collectors; import java.util.stream.Stream; @@ -134,10 +132,15 @@ public ImmutablePair executeQuery(BigQuery bigquery, QueryJobConfig return executeQuery(queryJob); } - public List
getNameSpaceTables(String nameSpace) { - Page
tables = bigQuery.listTables(nameSpace); + public List
getProjectTables(String projectId) { List
tableList = new ArrayList<>(); - tables.iterateAll().forEach(tableList::add); + bigQuery.listDatasets(projectId) + .iterateAll() + .forEach(dataset -> + bigQuery.listTables(dataset.getDatasetId()) + .iterateAll() + .forEach(tableList::add) + ); return tableList; } From 7f76db98823fc176560d1e2915c041a99e4b96b6 Mon Sep 17 00:00:00 2001 From: Andrii Leonets Date: Thu, 1 Jul 2021 16:59:48 +0300 Subject: [PATCH 25/63] impl incremental fetch --- .../airbyte/db/bigquery/BigQueryDatabase.java | 15 ++++++++---- .../io/airbyte/db/bigquery/BigQueryUtils.java | 17 +++++++++----- .../source/bigquery/BigQuerySource.java | 23 ++++++++++++++++--- 3 files changed, 41 insertions(+), 14 deletions(-) diff --git a/airbyte-db/src/main/java/io/airbyte/db/bigquery/BigQueryDatabase.java b/airbyte-db/src/main/java/io/airbyte/db/bigquery/BigQueryDatabase.java index af6b31558464..4bcfe736c7f8 100644 --- a/airbyte-db/src/main/java/io/airbyte/db/bigquery/BigQueryDatabase.java +++ b/airbyte-db/src/main/java/io/airbyte/db/bigquery/BigQueryDatabase.java @@ -27,7 +27,6 @@ import static java.util.Objects.isNull; import com.fasterxml.jackson.databind.JsonNode; -import com.google.api.gax.paging.Page; import com.google.auth.oauth2.ServiceAccountCredentials; import com.google.cloud.bigquery.BigQuery; import com.google.cloud.bigquery.BigQueryOptions; @@ -91,6 +90,10 @@ public void execute(String sql) throws SQLException { LOGGER.info("BigQuery successfully finished execution SQL: " + sql); } + public Stream query(String sql, QueryParameterValue... params) throws Exception { + return query(sql, (params == null ? Collections.emptyList() : Arrays.asList(params))); + } + @Override public Stream query(String sql, String... params) throws Exception { List parameterValueList; @@ -100,15 +103,18 @@ public Stream query(String sql, String... params) throws Exception { parameterValueList = Arrays.stream(params).map(param -> QueryParameterValue.newBuilder().setValue(param).setType( StandardSQLTypeName.STRING).build()).collect(Collectors.toList()); - final ImmutablePair result = executeQuery(bigQuery, getQueryConfig(sql, parameterValueList)); + return query(sql, parameterValueList); + } + + public Stream query(String sql, List params) throws Exception { + final ImmutablePair result = executeQuery(bigQuery, getQueryConfig(sql, params)); if (result.getLeft() != null) { FieldList fieldList = result.getLeft().getQueryResults().getSchema().getFields(); return Streams.stream(result.getLeft().getQueryResults().iterateAll()) .map(fieldValues -> BigQueryUtils.rowToJson(fieldValues, fieldList)); } else - throw new Exception("Failed to execute query " + sql + (params != null ? " with params " + Arrays - .toString(params) : "") + ". Error: " + result.getRight()); + throw new Exception("Failed to execute query " + sql + (params != null && !params.isEmpty() ? " with params " + params : "") + ". Error: " + result.getRight()); } @Override @@ -144,7 +150,6 @@ public List
getProjectTables(String projectId) { return tableList; } - private ImmutablePair executeQuery(Job queryJob) { final Job completedJob = waitForQuery(queryJob); if (completedJob == null) { diff --git a/airbyte-db/src/main/java/io/airbyte/db/bigquery/BigQueryUtils.java b/airbyte-db/src/main/java/io/airbyte/db/bigquery/BigQueryUtils.java index a132405d09d4..dd53bef856de 100644 --- a/airbyte-db/src/main/java/io/airbyte/db/bigquery/BigQueryUtils.java +++ b/airbyte-db/src/main/java/io/airbyte/db/bigquery/BigQueryUtils.java @@ -34,6 +34,7 @@ import com.google.cloud.bigquery.FieldValue; import com.google.cloud.bigquery.FieldValueList; import com.google.cloud.bigquery.LegacySQLTypeName; +import com.google.cloud.bigquery.QueryParameterValue; import com.google.cloud.bigquery.StandardSQLTypeName; import io.airbyte.commons.json.Jsons; import io.airbyte.protocol.models.JsonSchemaPrimitive; @@ -84,13 +85,17 @@ public static Date getDateValue(FieldValue fieldValue) { } public static JsonSchemaPrimitive getType(StandardSQLTypeName bigQueryType) { - return - switch (bigQueryType) { - case BOOL -> JsonSchemaPrimitive.BOOLEAN; - case INT64, FLOAT64, NUMERIC, BIGNUMERIC -> JsonSchemaPrimitive.NUMBER; - case STRING, BYTES, TIMESTAMP, DATE, TIME, DATETIME -> JsonSchemaPrimitive.STRING; - default -> JsonSchemaPrimitive.STRING; + return switch (bigQueryType) { + case BOOL -> JsonSchemaPrimitive.BOOLEAN; + case INT64, FLOAT64, NUMERIC, BIGNUMERIC -> JsonSchemaPrimitive.NUMBER; + case STRING, BYTES, TIMESTAMP, DATE, TIME, DATETIME -> JsonSchemaPrimitive.STRING; + default -> JsonSchemaPrimitive.STRING; }; } + // @TODO probably we need a reverse value transformation. especially for time and date types. + public static QueryParameterValue getQueryParameter(StandardSQLTypeName paramType, String paramValue) { + return QueryParameterValue.newBuilder().setType(paramType).setValue(paramValue).build(); + } + } diff --git a/airbyte-integrations/connectors/source-bigquery/src/main/java/io/airbyte/integrations/source/bigquery/BigQuerySource.java b/airbyte-integrations/connectors/source-bigquery/src/main/java/io/airbyte/integrations/source/bigquery/BigQuerySource.java index dcd1e7b4545f..9e81c994ca4f 100644 --- a/airbyte-integrations/connectors/source-bigquery/src/main/java/io/airbyte/integrations/source/bigquery/BigQuerySource.java +++ b/airbyte-integrations/connectors/source-bigquery/src/main/java/io/airbyte/integrations/source/bigquery/BigQuerySource.java @@ -25,22 +25,24 @@ package io.airbyte.integrations.source.bigquery; import com.fasterxml.jackson.databind.JsonNode; +import com.google.cloud.bigquery.QueryParameterValue; import com.google.cloud.bigquery.StandardSQLTypeName; import com.google.cloud.bigquery.Table; import com.google.common.collect.ImmutableMap; import io.airbyte.commons.functional.CheckedConsumer; import io.airbyte.commons.json.Jsons; import io.airbyte.commons.util.AutoCloseableIterator; +import io.airbyte.commons.util.AutoCloseableIterators; import io.airbyte.db.Databases; import io.airbyte.db.bigquery.BigQueryDatabase; import io.airbyte.db.bigquery.BigQueryUtils; import io.airbyte.integrations.base.IntegrationRunner; import io.airbyte.integrations.base.Source; -import io.airbyte.integrations.source.jdbc.SourceJdbcUtils; import io.airbyte.integrations.source.relationaldb.AbstractRelationalDbSource; import io.airbyte.integrations.source.relationaldb.TableInfo; import io.airbyte.protocol.models.CommonField; import io.airbyte.protocol.models.JsonSchemaPrimitive; +import java.util.stream.Stream; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -94,7 +96,7 @@ public Set getExcludedInternalNameSpaces() { @Override protected List>> discoverInternal(BigQueryDatabase database) throws Exception { - List
nameSpaceTables = database.getNameSpaceTables(dbConfig.get(CONFIG_PROJECT_ID).asText()); + List
nameSpaceTables = database.getProjectTables(dbConfig.get(CONFIG_PROJECT_ID).asText()); return null; } @@ -111,7 +113,22 @@ protected String getQuoteString() { @Override public AutoCloseableIterator queryTableIncremental(BigQueryDatabase database, List columnNames, String schemaName, String tableName, String cursorField, StandardSQLTypeName cursorFieldType, String cursor) { - return null; + return queryTableWithParams(database, String.format("SELECT %s FROM %s WHERE %s >= @cursor", + enquoteIdentifierList(columnNames), + getFullTableName(schemaName, tableName), + cursorField), + BigQueryUtils.getQueryParameter(cursorFieldType, cursor)); + } + + private AutoCloseableIterator queryTableWithParams(BigQueryDatabase database, String sqlQuery, QueryParameterValue... params) { + return AutoCloseableIterators.lazyIterator(() -> { + try { + final Stream stream = database.query(sqlQuery, params); + return AutoCloseableIterators.fromStream(stream); + } catch (Exception e) { + throw new RuntimeException(e); + } + }); } public static void main(String[] args) throws Exception { From 0495b357c6875cfeef93e67afc0a354f8f7e6466 Mon Sep 17 00:00:00 2001 From: heade Date: Fri, 2 Jul 2021 21:03:37 +0300 Subject: [PATCH 26/63] bigquery source connector --- .../source/bigquery/BigQuerySource.java | 29 ++++++++++++------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/airbyte-integrations/connectors/source-bigquery/src/main/java/io/airbyte/integrations/source/bigquery/BigQuerySource.java b/airbyte-integrations/connectors/source-bigquery/src/main/java/io/airbyte/integrations/source/bigquery/BigQuerySource.java index 9e81c994ca4f..c89ca93aff42 100644 --- a/airbyte-integrations/connectors/source-bigquery/src/main/java/io/airbyte/integrations/source/bigquery/BigQuerySource.java +++ b/airbyte-integrations/connectors/source-bigquery/src/main/java/io/airbyte/integrations/source/bigquery/BigQuerySource.java @@ -25,9 +25,7 @@ package io.airbyte.integrations.source.bigquery; import com.fasterxml.jackson.databind.JsonNode; -import com.google.cloud.bigquery.QueryParameterValue; -import com.google.cloud.bigquery.StandardSQLTypeName; -import com.google.cloud.bigquery.Table; +import com.google.cloud.bigquery.*; import com.google.common.collect.ImmutableMap; import io.airbyte.commons.functional.CheckedConsumer; import io.airbyte.commons.json.Jsons; @@ -42,6 +40,9 @@ import io.airbyte.integrations.source.relationaldb.TableInfo; import io.airbyte.protocol.models.CommonField; import io.airbyte.protocol.models.JsonSchemaPrimitive; + +import java.sql.JDBCType; +import java.util.stream.Collectors; import java.util.stream.Stream; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -52,8 +53,6 @@ public class BigQuerySource extends AbstractRelationalDbSource> getCheckOperations(JsonNode config) throws Exception { - return new ArrayList<>(); + return Collections.emptyList(); } @Override @@ -96,14 +95,24 @@ public Set getExcludedInternalNameSpaces() { @Override protected List>> discoverInternal(BigQueryDatabase database) throws Exception { - List
nameSpaceTables = database.getProjectTables(dbConfig.get(CONFIG_PROJECT_ID).asText()); - - return null; + String projectId = dbConfig.get(CONFIG_PROJECT_ID).asText(); + List
tables = database.getProjectTables(projectId); + List>> result = new ArrayList<>(); + tables.stream() + .map(table -> Objects.requireNonNull(table.getDefinition().getSchema()).getFields()) + .forEach(fields -> fields.stream().map(field -> TableInfo.>builder() + .nameSpace(projectId) + .name(field.getName()) + .fields(fields.stream().map(f -> { + StandardSQLTypeName standardType = f.getType().getStandardType(); + return new CommonField<>(f.getName(), standardType); + }).collect(Collectors.toList())).build()).forEach(result::add)); + return result; } @Override protected Map> discoverPrimaryKeys(BigQueryDatabase database, List>> tableInfos) { - return null; + return Collections.emptyMap(); } @Override From d764c16dff505084c2f2acd71f995e1f0dc9798b Mon Sep 17 00:00:00 2001 From: heade Date: Fri, 2 Jul 2021 21:06:51 +0300 Subject: [PATCH 27/63] bigquery source connector --- .../src/main/resources/spec.json | 39 +------------------ 1 file changed, 2 insertions(+), 37 deletions(-) diff --git a/airbyte-integrations/connectors/source-bigquery/src/main/resources/spec.json b/airbyte-integrations/connectors/source-bigquery/src/main/resources/spec.json index 23b20e245d6d..33bee15e6b43 100644 --- a/airbyte-integrations/connectors/source-bigquery/src/main/resources/spec.json +++ b/airbyte-integrations/connectors/source-bigquery/src/main/resources/spec.json @@ -1,5 +1,5 @@ { - "documentationUrl": "https://docs.airbyte.io/integrations/destinations/bigquery", + "documentationUrl": "https://docs.airbyte.io/integrations/source/bigquery", "supportsIncremental": true, "supportsNormalization": true, "supportsDBT": true, @@ -21,44 +21,9 @@ "description": "Default BigQuery Dataset ID tables are replicated to if the source does not specify a namespace.", "title": "Default Dataset ID" }, - "dataset_location": { - "type": "string", - "description": "The location of the dataset. Warning: Changes made after creation will not be applied.", - "title": "Dataset Location", - "default": "US", - "enum": [ - "US", - "EU", - "us-central1", - "us-west-1", - "us-west-2", - "us-west-3", - "us-west-4", - "us-east1", - "us-east4", - "northamerica-northeast1", - "southamerica-east1", - "europe-north1", - "europe-west1", - "europe-west2", - "europe-west3", - "europe-west4", - "europe-west6", - "europe-central2", - "asia-east1", - "asia-east2", - "asia-southeast2", - "asia-south1", - "asia-northeast1", - "asia-northeast2", - "asia-southeast1", - "asia-northeast3", - "australia-southeast1" - ] - }, "credentials_json": { "type": "string", - "description": "The contents of the JSON service account key. Check out the docs if you need help generating this key. Default credentials will be used if this field is left empty.", + "description": "The contents of the JSON service account key. Check out the docs if you need help generating this key.", "title": "Credentials JSON", "airbyte_secret": true } From 33a447ef7283ff9f7c10e8e0397f9d7249305d35 Mon Sep 17 00:00:00 2001 From: Andrii Leonets Date: Mon, 5 Jul 2021 12:39:09 +0300 Subject: [PATCH 28/63] remove unnecessary databaseid --- airbyte-db/src/main/java/io/airbyte/db/Databases.java | 4 ++-- .../java/io/airbyte/db/bigquery/BigQueryDatabase.java | 6 ++---- .../integrations/source/bigquery/BigQuerySource.java | 8 ++++---- 3 files changed, 8 insertions(+), 10 deletions(-) diff --git a/airbyte-db/src/main/java/io/airbyte/db/Databases.java b/airbyte-db/src/main/java/io/airbyte/db/Databases.java index 10c8541303e4..ab88c3ab314c 100644 --- a/airbyte-db/src/main/java/io/airbyte/db/Databases.java +++ b/airbyte-db/src/main/java/io/airbyte/db/Databases.java @@ -146,8 +146,8 @@ private static BasicDataSource createBasicDataSource(final String username, return connectionPool; } - public static BigQueryDatabase createBigQueryDatabase(final String projectId, final String jsonCreds, final String databaseId) { - return new BigQueryDatabase(projectId, jsonCreds, databaseId); + public static BigQueryDatabase createBigQueryDatabase(final String projectId, final String jsonCreds) { + return new BigQueryDatabase(projectId, jsonCreds); } } diff --git a/airbyte-db/src/main/java/io/airbyte/db/bigquery/BigQueryDatabase.java b/airbyte-db/src/main/java/io/airbyte/db/bigquery/BigQueryDatabase.java index 4bcfe736c7f8..e01154f81977 100644 --- a/airbyte-db/src/main/java/io/airbyte/db/bigquery/BigQueryDatabase.java +++ b/airbyte-db/src/main/java/io/airbyte/db/bigquery/BigQueryDatabase.java @@ -60,10 +60,8 @@ public class BigQueryDatabase extends SqlDatabase { private static final Logger LOGGER = LoggerFactory.getLogger(BigQueryDatabase.class); private final BigQuery bigQuery; - private final String databaseId; - public BigQueryDatabase(String projectId, String jsonCreds, String databaseId) { - this.databaseId = databaseId; + public BigQueryDatabase(String projectId, String jsonCreds) { try { BigQueryOptions.Builder bigQueryBuilder = BigQueryOptions.newBuilder(); ServiceAccountCredentials credentials = null; @@ -126,7 +124,7 @@ public void close() throws Exception { public QueryJobConfiguration getQueryConfig(String sql, List params) { return QueryJobConfiguration - .newBuilder(String.format(sql, this.databaseId)) + .newBuilder(sql) .setUseLegacySql(false) .setPositionalParameters(params) .build(); diff --git a/airbyte-integrations/connectors/source-bigquery/src/main/java/io/airbyte/integrations/source/bigquery/BigQuerySource.java b/airbyte-integrations/connectors/source-bigquery/src/main/java/io/airbyte/integrations/source/bigquery/BigQuerySource.java index c89ca93aff42..61fd34adae64 100644 --- a/airbyte-integrations/connectors/source-bigquery/src/main/java/io/airbyte/integrations/source/bigquery/BigQuerySource.java +++ b/airbyte-integrations/connectors/source-bigquery/src/main/java/io/airbyte/integrations/source/bigquery/BigQuerySource.java @@ -73,13 +73,13 @@ public JsonNode toDatabaseConfig(JsonNode config) { } @Override - protected BigQueryDatabase createDatabase(JsonNode config) throws Exception { + protected BigQueryDatabase createDatabase(JsonNode config) { dbConfig = Jsons.clone(config); - return Databases.createBigQueryDatabase(config.get(CONFIG_PROJECT_ID).asText(), config.get(CONFIG_CREDS).asText(), config.get(CONFIG_DATASET_ID).asText()); + return Databases.createBigQueryDatabase(config.get(CONFIG_PROJECT_ID).asText(), config.get(CONFIG_CREDS).asText()); } @Override - public List> getCheckOperations(JsonNode config) throws Exception { + public List> getCheckOperations(JsonNode config) { return Collections.emptyList(); } @@ -94,7 +94,7 @@ public Set getExcludedInternalNameSpaces() { } @Override - protected List>> discoverInternal(BigQueryDatabase database) throws Exception { + protected List>> discoverInternal(BigQueryDatabase database) { String projectId = dbConfig.get(CONFIG_PROJECT_ID).asText(); List
tables = database.getProjectTables(projectId); List>> result = new ArrayList<>(); From e114a18e6cd414d950dd07c5d96e9cbaa146e986 Mon Sep 17 00:00:00 2001 From: Andrii Leonets Date: Mon, 5 Jul 2021 15:37:38 +0300 Subject: [PATCH 29/63] add primitive type filtering --- .../io/airbyte/db/bigquery/BigQueryUtils.java | 28 +++++++++++-------- 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/airbyte-db/src/main/java/io/airbyte/db/bigquery/BigQueryUtils.java b/airbyte-db/src/main/java/io/airbyte/db/bigquery/BigQueryUtils.java index dd53bef856de..6864801fecb8 100644 --- a/airbyte-db/src/main/java/io/airbyte/db/bigquery/BigQueryUtils.java +++ b/airbyte-db/src/main/java/io/airbyte/db/bigquery/BigQueryUtils.java @@ -32,6 +32,7 @@ import com.google.cloud.bigquery.Field; import com.google.cloud.bigquery.FieldList; import com.google.cloud.bigquery.FieldValue; +import com.google.cloud.bigquery.FieldValue.Attribute; import com.google.cloud.bigquery.FieldValueList; import com.google.cloud.bigquery.LegacySQLTypeName; import com.google.cloud.bigquery.QueryParameterValue; @@ -58,18 +59,21 @@ public static JsonNode rowToJson(FieldValueList rowValues, FieldList fieldList) } private static void setJsonField(Field field, FieldValue fieldValue, ObjectNode node) { - LegacySQLTypeName fieldType = field.getType(); - String fieldName = field.getName(); - switch (fieldType.getStandardType()) { - case BOOL -> node.put(fieldName, fieldValue.getBooleanValue()); - case INT64 -> node.put(fieldName, fieldValue.getLongValue()); - case FLOAT64 -> node.put(fieldName, fieldValue.getDoubleValue()); - case NUMERIC -> node.put(fieldName, fieldValue.getNumericValue()); - case BIGNUMERIC -> node.put(fieldName, nullIfInvalid(fieldValue::getNumericValue)); - case STRING -> node.put(fieldName, fieldValue.getStringValue()); - case BYTES -> node.put(fieldName, fieldValue.getBytesValue()); - case TIMESTAMP, DATE, TIME, DATETIME -> node.put(fieldName, toISO8601String(fieldValue.getTimestampValue())); - default -> node.put(fieldName, fieldValue.getStringValue()); + if (fieldValue.getAttribute().equals(Attribute.PRIMITIVE)) { + LegacySQLTypeName fieldType = field.getType(); + String fieldName = field.getName(); + switch (fieldType.getStandardType()) { + case BOOL -> node.put(fieldName, fieldValue.getBooleanValue()); + case INT64 -> node.put(fieldName, fieldValue.getLongValue()); + case FLOAT64 -> node.put(fieldName, fieldValue.getDoubleValue()); + case NUMERIC -> node.put(fieldName, fieldValue.getNumericValue()); + case BIGNUMERIC -> node.put(fieldName, nullIfInvalid(fieldValue::getNumericValue)); + case STRING -> node.put(fieldName, fieldValue.getStringValue()); + case BYTES -> node.put(fieldName, fieldValue.getBytesValue()); + case TIMESTAMP, DATE, TIME, DATETIME -> node + .put(fieldName, toISO8601String(fieldValue.getTimestampValue())); + default -> node.put(fieldName, fieldValue.getStringValue()); + } } } From 74f83507467a949b70d458d65acf65f5f85ec924 Mon Sep 17 00:00:00 2001 From: Andrii Leonets Date: Wed, 7 Jul 2021 00:33:37 +0300 Subject: [PATCH 30/63] add temporary workaround for test database. --- .../airbyte/db/bigquery/BigQueryDatabase.java | 25 ++++-- .../TempBigQueryJoolDatabaseImpl.java | 63 ++++++++++++++ .../BigQuerySourceComprehensiveTest.java | 85 +++++++++++++++++++ 3 files changed, 168 insertions(+), 5 deletions(-) create mode 100644 airbyte-db/src/main/java/io/airbyte/db/bigquery/TempBigQueryJoolDatabaseImpl.java create mode 100644 airbyte-integrations/connectors/source-bigquery/src/test-integration/java/io/airbyte/integrations/source/bigquery/BigQuerySourceComprehensiveTest.java diff --git a/airbyte-db/src/main/java/io/airbyte/db/bigquery/BigQueryDatabase.java b/airbyte-db/src/main/java/io/airbyte/db/bigquery/BigQueryDatabase.java index e01154f81977..8dcc7dce58d9 100644 --- a/airbyte-db/src/main/java/io/airbyte/db/bigquery/BigQueryDatabase.java +++ b/airbyte-db/src/main/java/io/airbyte/db/bigquery/BigQueryDatabase.java @@ -112,7 +112,8 @@ public Stream query(String sql, List params) thro return Streams.stream(result.getLeft().getQueryResults().iterateAll()) .map(fieldValues -> BigQueryUtils.rowToJson(fieldValues, fieldList)); } else - throw new Exception("Failed to execute query " + sql + (params != null && !params.isEmpty() ? " with params " + params : "") + ". Error: " + result.getRight()); + throw new Exception( + "Failed to execute query " + sql + (params != null && !params.isEmpty() ? " with params " + params : "") + ". Error: " + result.getRight()); } @Override @@ -140,14 +141,28 @@ public List
getProjectTables(String projectId) { List
tableList = new ArrayList<>(); bigQuery.listDatasets(projectId) .iterateAll() - .forEach(dataset -> - bigQuery.listTables(dataset.getDatasetId()) + .forEach(dataset -> bigQuery.listTables(dataset.getDatasetId()) .iterateAll() - .forEach(tableList::add) - ); + .forEach(tableList::add)); return tableList; } + public BigQuery getBigQuery() { + return bigQuery; + } + + public void cleanDataSet(String dataSetId) { + // allows deletion of a dataset that has contents + final BigQuery.DatasetDeleteOption option = BigQuery.DatasetDeleteOption.deleteContents(); + + final boolean success = bigQuery.delete(dataSetId, option); + if (success) { + LOGGER.info("BQ Dataset " + dataSetId + " deleted..."); + } else { + LOGGER.info("BQ Dataset cleanup for " + dataSetId + " failed!"); + } + } + private ImmutablePair executeQuery(Job queryJob) { final Job completedJob = waitForQuery(queryJob); if (completedJob == null) { diff --git a/airbyte-db/src/main/java/io/airbyte/db/bigquery/TempBigQueryJoolDatabaseImpl.java b/airbyte-db/src/main/java/io/airbyte/db/bigquery/TempBigQueryJoolDatabaseImpl.java new file mode 100644 index 000000000000..eaf19be1ebe1 --- /dev/null +++ b/airbyte-db/src/main/java/io/airbyte/db/bigquery/TempBigQueryJoolDatabaseImpl.java @@ -0,0 +1,63 @@ +package io.airbyte.db.bigquery; + +import io.airbyte.db.ContextQueryFunction; +import io.airbyte.db.Database; +import io.airbyte.db.Databases; +import java.sql.SQLException; +import org.jooq.Record; +import org.jooq.Result; +import org.jooq.SQLDialect; +import org.jooq.exception.DataAccessException; +import org.jooq.impl.DefaultDSLContext; + +/** + * This class is a temporary and will be removed as part of the issue #4547 + */ +public class TempBigQueryJoolDatabaseImpl extends Database { + + private final BigQueryDatabase realDatabase; + + public TempBigQueryJoolDatabaseImpl(final String projectId, final String jsonCreds) { + super(null, null); + realDatabase = Databases.createBigQueryDatabase(projectId, jsonCreds); + } + + @Override + public T query(ContextQueryFunction transform) throws SQLException { + return transform.query(new FakeDefaultDSLContext(realDatabase)); + } + + @Override + public T transaction(ContextQueryFunction transform) throws SQLException { + return transform.query(new FakeDefaultDSLContext(realDatabase)); + } + + @Override + public void close() throws Exception { + realDatabase.close(); + } + + public BigQueryDatabase getRealDatabase() { + return realDatabase; + } + + private static class FakeDefaultDSLContext extends DefaultDSLContext { + + private final BigQueryDatabase database; + + public FakeDefaultDSLContext(BigQueryDatabase database) { + super((SQLDialect)null); + this.database = database; + } + + @Override + public Result fetch(String sql) throws DataAccessException { + try { + database.execute(sql); + } catch (SQLException e) { + throw new DataAccessException(e.getMessage()); + } + return null; + } + } +} diff --git a/airbyte-integrations/connectors/source-bigquery/src/test-integration/java/io/airbyte/integrations/source/bigquery/BigQuerySourceComprehensiveTest.java b/airbyte-integrations/connectors/source-bigquery/src/test-integration/java/io/airbyte/integrations/source/bigquery/BigQuerySourceComprehensiveTest.java new file mode 100644 index 000000000000..6ba34f5d3bb7 --- /dev/null +++ b/airbyte-integrations/connectors/source-bigquery/src/test-integration/java/io/airbyte/integrations/source/bigquery/BigQuerySourceComprehensiveTest.java @@ -0,0 +1,85 @@ +package io.airbyte.integrations.source.bigquery; + +import com.fasterxml.jackson.databind.JsonNode; +import com.google.cloud.bigquery.Dataset; +import com.google.cloud.bigquery.DatasetInfo; +import com.google.common.collect.ImmutableMap; +import io.airbyte.commons.json.Jsons; +import io.airbyte.commons.string.Strings; +import io.airbyte.db.Database; +import io.airbyte.db.bigquery.TempBigQueryJoolDatabaseImpl; +import io.airbyte.integrations.standardtest.source.SourceComprehensiveTest; +import io.airbyte.integrations.standardtest.source.TestDestinationEnv; +import java.nio.file.Files; +import java.nio.file.Path; + +public class BigQuerySourceComprehensiveTest extends SourceComprehensiveTest { + + private static final Path CREDENTIALS_PATH = Path.of("secrets/credentials.json"); + + private static final String CONFIG_DATASET_ID = "dataset_id"; + private static final String CONFIG_PROJECT_ID = "project_id"; + private static final String CONFIG_DATASET_LOCATION = "dataset_location"; + private static final String CONFIG_CREDS = "credentials_json"; + + private TempBigQueryJoolDatabaseImpl database; + private Dataset dataset; + private boolean tornDown; + private JsonNode config; + + @Override + protected String getImageName() { + return "airbyte/source-bigquery:dev"; + } + + @Override + protected JsonNode getConfig() throws Exception { + return config; + } + + @Override + protected void tearDown(TestDestinationEnv testEnv) throws Exception { + + } + + @Override + protected Database setupDatabase() throws Exception { + if (!Files.exists(CREDENTIALS_PATH)) { + throw new IllegalStateException( + "Must provide path to a big query credentials file. By default {module-root}/" + CREDENTIALS_PATH + + ". Override by setting setting path with the CREDENTIALS_PATH constant."); + } + + final String credentialsJsonString = new String(Files.readAllBytes(CREDENTIALS_PATH)); + + final JsonNode credentialsJson = Jsons.deserialize(credentialsJsonString); + final String projectId = credentialsJson.get(CONFIG_PROJECT_ID).asText(); +// final String datasetLocation = "US"; + + final String datasetId = Strings.addRandomSuffix("airbyte_tests", "_", 8); + + config = Jsons.jsonNode(ImmutableMap.builder() + .put(CONFIG_PROJECT_ID, projectId) + .put(CONFIG_CREDS, credentialsJsonString) + .put(CONFIG_DATASET_ID, datasetId) +// .put(CONFIG_DATASET_LOCATION, datasetLocation) + .build()); + + database = new TempBigQueryJoolDatabaseImpl(config.get(CONFIG_PROJECT_ID).asText(), credentialsJsonString); + + final DatasetInfo datasetInfo = + DatasetInfo.newBuilder(config.get(CONFIG_DATASET_ID).asText()).setLocation(config.get(CONFIG_DATASET_LOCATION).asText()).build(); + dataset = database.getRealDatabase().getBigQuery().create(datasetInfo); + return database; + } + + @Override + protected void initTests() { + + } + + @Override + protected String getNameSpace() { + return dataset.getFriendlyName(); + } +} From 2a5703eddda531e2b3946463af60898ab8e7b842 Mon Sep 17 00:00:00 2001 From: Andrii Leonets Date: Wed, 7 Jul 2021 13:00:33 +0300 Subject: [PATCH 31/63] add dataset location --- .../source/bigquery/BigQuerySourceComprehensiveTest.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/airbyte-integrations/connectors/source-bigquery/src/test-integration/java/io/airbyte/integrations/source/bigquery/BigQuerySourceComprehensiveTest.java b/airbyte-integrations/connectors/source-bigquery/src/test-integration/java/io/airbyte/integrations/source/bigquery/BigQuerySourceComprehensiveTest.java index 6ba34f5d3bb7..3be898071fe5 100644 --- a/airbyte-integrations/connectors/source-bigquery/src/test-integration/java/io/airbyte/integrations/source/bigquery/BigQuerySourceComprehensiveTest.java +++ b/airbyte-integrations/connectors/source-bigquery/src/test-integration/java/io/airbyte/integrations/source/bigquery/BigQuerySourceComprehensiveTest.java @@ -54,7 +54,7 @@ protected Database setupDatabase() throws Exception { final JsonNode credentialsJson = Jsons.deserialize(credentialsJsonString); final String projectId = credentialsJson.get(CONFIG_PROJECT_ID).asText(); -// final String datasetLocation = "US"; + final String datasetLocation = "US"; final String datasetId = Strings.addRandomSuffix("airbyte_tests", "_", 8); @@ -62,7 +62,7 @@ protected Database setupDatabase() throws Exception { .put(CONFIG_PROJECT_ID, projectId) .put(CONFIG_CREDS, credentialsJsonString) .put(CONFIG_DATASET_ID, datasetId) -// .put(CONFIG_DATASET_LOCATION, datasetLocation) + .put(CONFIG_DATASET_LOCATION, datasetLocation) .build()); database = new TempBigQueryJoolDatabaseImpl(config.get(CONFIG_PROJECT_ID).asText(), credentialsJsonString); From ae5f05968020a9ec4c0e2af216e249c4b3af6cc7 Mon Sep 17 00:00:00 2001 From: Andrii Leonets Date: Wed, 7 Jul 2021 16:50:46 +0300 Subject: [PATCH 32/63] fix table info retrieving --- .../src/main/java/io/airbyte/db/bigquery/BigQueryDatabase.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/airbyte-db/src/main/java/io/airbyte/db/bigquery/BigQueryDatabase.java b/airbyte-db/src/main/java/io/airbyte/db/bigquery/BigQueryDatabase.java index 8dcc7dce58d9..05feac307bfb 100644 --- a/airbyte-db/src/main/java/io/airbyte/db/bigquery/BigQueryDatabase.java +++ b/airbyte-db/src/main/java/io/airbyte/db/bigquery/BigQueryDatabase.java @@ -143,7 +143,8 @@ public List
getProjectTables(String projectId) { .iterateAll() .forEach(dataset -> bigQuery.listTables(dataset.getDatasetId()) .iterateAll() - .forEach(tableList::add)); + .forEach(table -> tableList.add(bigQuery.getTable(table.getTableId()))) + ); return tableList; } From 904f054540c77031908c5cce2b2bd34c62bb54a3 Mon Sep 17 00:00:00 2001 From: Andrii Leonets Date: Thu, 8 Jul 2021 15:47:24 +0300 Subject: [PATCH 33/63] handle dataset config --- .../airbyte/db/bigquery/BigQueryDatabase.java | 25 +++++++++++++++++++ .../source/bigquery/BigQuerySource.java | 22 +++++++++++----- 2 files changed, 41 insertions(+), 6 deletions(-) diff --git a/airbyte-db/src/main/java/io/airbyte/db/bigquery/BigQueryDatabase.java b/airbyte-db/src/main/java/io/airbyte/db/bigquery/BigQueryDatabase.java index 05feac307bfb..fc293485393e 100644 --- a/airbyte-db/src/main/java/io/airbyte/db/bigquery/BigQueryDatabase.java +++ b/airbyte-db/src/main/java/io/airbyte/db/bigquery/BigQueryDatabase.java @@ -137,6 +137,11 @@ public ImmutablePair executeQuery(BigQuery bigquery, QueryJobConfig return executeQuery(queryJob); } + /** + * Returns full information about all tables from entire project + * @param projectId BigQuery project id + * @return List of BigQuery tables + */ public List
getProjectTables(String projectId) { List
tableList = new ArrayList<>(); bigQuery.listDatasets(projectId) @@ -148,6 +153,26 @@ public List
getProjectTables(String projectId) { return tableList; } + /** + * Returns full information about all tables from specific Dataset + * @param projectId BigQuery project id + * @param datasetId BigQuery dataset id + * @return List of BigQuery tables + */ + public List
getDatasetTables(String projectId, String datasetId) { + List
tableList = new ArrayList<>(); + bigQuery.listDatasets(projectId) + .iterateAll() + .forEach(dataset -> bigQuery.listTables(dataset.getDatasetId()) + .iterateAll() + .forEach(table -> { + if (table.getTableId().getDataset().equalsIgnoreCase(datasetId)) + tableList.add(bigQuery.getTable(table.getTableId())); + }) + ); + return tableList; + } + public BigQuery getBigQuery() { return bigQuery; } diff --git a/airbyte-integrations/connectors/source-bigquery/src/main/java/io/airbyte/integrations/source/bigquery/BigQuerySource.java b/airbyte-integrations/connectors/source-bigquery/src/main/java/io/airbyte/integrations/source/bigquery/BigQuerySource.java index 61fd34adae64..c122e4f50607 100644 --- a/airbyte-integrations/connectors/source-bigquery/src/main/java/io/airbyte/integrations/source/bigquery/BigQuerySource.java +++ b/airbyte-integrations/connectors/source-bigquery/src/main/java/io/airbyte/integrations/source/bigquery/BigQuerySource.java @@ -32,6 +32,7 @@ import io.airbyte.commons.util.AutoCloseableIterator; import io.airbyte.commons.util.AutoCloseableIterators; import io.airbyte.db.Databases; +import io.airbyte.db.SqlDatabase; import io.airbyte.db.bigquery.BigQueryDatabase; import io.airbyte.db.bigquery.BigQueryUtils; import io.airbyte.integrations.base.IntegrationRunner; @@ -53,11 +54,12 @@ public class BigQuerySource extends AbstractRelationalDbSource getExcludedInternalNameSpaces() { @Override protected List>> discoverInternal(BigQueryDatabase database) { String projectId = dbConfig.get(CONFIG_PROJECT_ID).asText(); - List
tables = database.getProjectTables(projectId); + List
tables = (isDatasetConfigured(database) ? database.getDatasetTables(projectId, getConfigDatasetId(database)) : database.getProjectTables(projectId)); List>> result = new ArrayList<>(); tables.stream() .map(table -> Objects.requireNonNull(table.getDefinition().getSchema()).getFields()) @@ -140,6 +142,14 @@ private AutoCloseableIterator queryTableWithParams(BigQueryDatabase da }); } + private boolean isDatasetConfigured(SqlDatabase database) { + return database.getSourceConfig().hasNonNull(CONFIG_DATASET_ID); + } + + private String getConfigDatasetId(SqlDatabase database) { + return (isDatasetConfigured(database) ? database.getSourceConfig().get(CONFIG_DATASET_ID).asText() : null); + } + public static void main(String[] args) throws Exception { final Source source = new BigQuerySource(); LOGGER.info("starting source: {}", BigQuerySource.class); From 094fa827131d0c9ad9fe0250e11921fb6a496cbc Mon Sep 17 00:00:00 2001 From: Andrii Leonets Date: Thu, 8 Jul 2021 16:10:12 +0300 Subject: [PATCH 34/63] Add working comprehensive test without data cases --- .../BigQuerySourceComprehensiveTest.java | 25 +++++++++++++------ 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/airbyte-integrations/connectors/source-bigquery/src/test-integration/java/io/airbyte/integrations/source/bigquery/BigQuerySourceComprehensiveTest.java b/airbyte-integrations/connectors/source-bigquery/src/test-integration/java/io/airbyte/integrations/source/bigquery/BigQuerySourceComprehensiveTest.java index 3be898071fe5..3217b62f8d00 100644 --- a/airbyte-integrations/connectors/source-bigquery/src/test-integration/java/io/airbyte/integrations/source/bigquery/BigQuerySourceComprehensiveTest.java +++ b/airbyte-integrations/connectors/source-bigquery/src/test-integration/java/io/airbyte/integrations/source/bigquery/BigQuerySourceComprehensiveTest.java @@ -1,5 +1,10 @@ package io.airbyte.integrations.source.bigquery; +import static io.airbyte.integrations.source.bigquery.BigQuerySource.CONFIG_CREDS; +import static io.airbyte.integrations.source.bigquery.BigQuerySource.CONFIG_DATASET_ID; +import static io.airbyte.integrations.source.bigquery.BigQuerySource.CONFIG_DATASET_LOCATION; +import static io.airbyte.integrations.source.bigquery.BigQuerySource.CONFIG_PROJECT_ID; + import com.fasterxml.jackson.databind.JsonNode; import com.google.cloud.bigquery.Dataset; import com.google.cloud.bigquery.DatasetInfo; @@ -12,19 +17,17 @@ import io.airbyte.integrations.standardtest.source.TestDestinationEnv; import java.nio.file.Files; import java.nio.file.Path; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.TestInstance; +import org.junit.jupiter.api.TestInstance.Lifecycle; +@TestInstance(Lifecycle.PER_CLASS) public class BigQuerySourceComprehensiveTest extends SourceComprehensiveTest { private static final Path CREDENTIALS_PATH = Path.of("secrets/credentials.json"); - private static final String CONFIG_DATASET_ID = "dataset_id"; - private static final String CONFIG_PROJECT_ID = "project_id"; - private static final String CONFIG_DATASET_LOCATION = "dataset_location"; - private static final String CONFIG_CREDS = "credentials_json"; - private TempBigQueryJoolDatabaseImpl database; private Dataset dataset; - private boolean tornDown; private JsonNode config; @Override @@ -56,7 +59,7 @@ protected Database setupDatabase() throws Exception { final String projectId = credentialsJson.get(CONFIG_PROJECT_ID).asText(); final String datasetLocation = "US"; - final String datasetId = Strings.addRandomSuffix("airbyte_tests", "_", 8); + final String datasetId = Strings.addRandomSuffix("airbyte_tests_compr", "_", 8); config = Jsons.jsonNode(ImmutableMap.builder() .put(CONFIG_PROJECT_ID, projectId) @@ -70,6 +73,7 @@ protected Database setupDatabase() throws Exception { final DatasetInfo datasetInfo = DatasetInfo.newBuilder(config.get(CONFIG_DATASET_ID).asText()).setLocation(config.get(CONFIG_DATASET_LOCATION).asText()).build(); dataset = database.getRealDatabase().getBigQuery().create(datasetInfo); + return database; } @@ -80,6 +84,11 @@ protected void initTests() { @Override protected String getNameSpace() { - return dataset.getFriendlyName(); + return dataset.getDatasetId().getDataset(); + } + + @AfterAll + public void cleanTestInstance() { + database.getRealDatabase().cleanDataSet(getNameSpace()); } } From 32bd99970b8d602537978f5ae9a185e374fce616 Mon Sep 17 00:00:00 2001 From: Andrii Leonets Date: Fri, 9 Jul 2021 14:02:35 +0300 Subject: [PATCH 35/63] minor changes in the source processing --- .../io/airbyte/db/bigquery/BigQueryUtils.java | 26 +++++++++++-------- .../source/bigquery/BigQuerySource.java | 2 +- 2 files changed, 16 insertions(+), 12 deletions(-) diff --git a/airbyte-db/src/main/java/io/airbyte/db/bigquery/BigQueryUtils.java b/airbyte-db/src/main/java/io/airbyte/db/bigquery/BigQueryUtils.java index 6864801fecb8..143d2242580a 100644 --- a/airbyte-db/src/main/java/io/airbyte/db/bigquery/BigQueryUtils.java +++ b/airbyte-db/src/main/java/io/airbyte/db/bigquery/BigQueryUtils.java @@ -62,18 +62,22 @@ private static void setJsonField(Field field, FieldValue fieldValue, ObjectNode if (fieldValue.getAttribute().equals(Attribute.PRIMITIVE)) { LegacySQLTypeName fieldType = field.getType(); String fieldName = field.getName(); - switch (fieldType.getStandardType()) { - case BOOL -> node.put(fieldName, fieldValue.getBooleanValue()); - case INT64 -> node.put(fieldName, fieldValue.getLongValue()); - case FLOAT64 -> node.put(fieldName, fieldValue.getDoubleValue()); - case NUMERIC -> node.put(fieldName, fieldValue.getNumericValue()); - case BIGNUMERIC -> node.put(fieldName, nullIfInvalid(fieldValue::getNumericValue)); - case STRING -> node.put(fieldName, fieldValue.getStringValue()); - case BYTES -> node.put(fieldName, fieldValue.getBytesValue()); - case TIMESTAMP, DATE, TIME, DATETIME -> node - .put(fieldName, toISO8601String(fieldValue.getTimestampValue())); - default -> node.put(fieldName, fieldValue.getStringValue()); + if (fieldValue.isNull()) { + node.put(fieldName, (String) null); } + else + switch (fieldType.getStandardType()) { + case BOOL -> node.put(fieldName, fieldValue.getBooleanValue()); + case INT64 -> node.put(fieldName, fieldValue.getLongValue()); + case FLOAT64 -> node.put(fieldName, fieldValue.getDoubleValue()); + case NUMERIC -> node.put(fieldName, fieldValue.getNumericValue()); + case BIGNUMERIC -> node.put(fieldName, nullIfInvalid(fieldValue::getNumericValue)); + case STRING -> node.put(fieldName, fieldValue.getStringValue()); + case BYTES -> node.put(fieldName, fieldValue.getBytesValue()); + case TIMESTAMP, DATE, TIME, DATETIME -> node + .put(fieldName, toISO8601String(fieldValue.getTimestampValue())); + default -> node.put(fieldName, fieldValue.getStringValue()); + } } } diff --git a/airbyte-integrations/connectors/source-bigquery/src/main/java/io/airbyte/integrations/source/bigquery/BigQuerySource.java b/airbyte-integrations/connectors/source-bigquery/src/main/java/io/airbyte/integrations/source/bigquery/BigQuerySource.java index c122e4f50607..d3d596fd8e38 100644 --- a/airbyte-integrations/connectors/source-bigquery/src/main/java/io/airbyte/integrations/source/bigquery/BigQuerySource.java +++ b/airbyte-integrations/connectors/source-bigquery/src/main/java/io/airbyte/integrations/source/bigquery/BigQuerySource.java @@ -59,7 +59,7 @@ public class BigQuerySource extends AbstractRelationalDbSource Date: Fri, 9 Jul 2021 15:40:25 +0300 Subject: [PATCH 36/63] acceptance tests; discover method fix --- .../source/bigquery/BigQuerySource.java | 21 +++--- .../BigQuerySourceAcceptanceTest.java | 71 +++++++++++++++---- 2 files changed, 71 insertions(+), 21 deletions(-) diff --git a/airbyte-integrations/connectors/source-bigquery/src/main/java/io/airbyte/integrations/source/bigquery/BigQuerySource.java b/airbyte-integrations/connectors/source-bigquery/src/main/java/io/airbyte/integrations/source/bigquery/BigQuerySource.java index c122e4f50607..b8871b813db8 100644 --- a/airbyte-integrations/connectors/source-bigquery/src/main/java/io/airbyte/integrations/source/bigquery/BigQuerySource.java +++ b/airbyte-integrations/connectors/source-bigquery/src/main/java/io/airbyte/integrations/source/bigquery/BigQuerySource.java @@ -98,17 +98,20 @@ public Set getExcludedInternalNameSpaces() { @Override protected List>> discoverInternal(BigQueryDatabase database) { String projectId = dbConfig.get(CONFIG_PROJECT_ID).asText(); + String datasetId = dbConfig.get(CONFIG_DATASET_ID).asText(); List
tables = (isDatasetConfigured(database) ? database.getDatasetTables(projectId, getConfigDatasetId(database)) : database.getProjectTables(projectId)); List>> result = new ArrayList<>(); - tables.stream() - .map(table -> Objects.requireNonNull(table.getDefinition().getSchema()).getFields()) - .forEach(fields -> fields.stream().map(field -> TableInfo.>builder() - .nameSpace(projectId) - .name(field.getName()) - .fields(fields.stream().map(f -> { - StandardSQLTypeName standardType = f.getType().getStandardType(); - return new CommonField<>(f.getName(), standardType); - }).collect(Collectors.toList())).build()).forEach(result::add)); + tables.stream().map(table -> TableInfo.>builder() + .nameSpace(datasetId) + .name(table.getTableId().getTable()) + .fields(Objects.requireNonNull(table.getDefinition().getSchema()).getFields().stream() + .map(f -> { + StandardSQLTypeName standardType = f.getType().getStandardType(); + return new CommonField<>(f.getName(), standardType); + }) + .collect(Collectors.toList())) + .build()) + .forEach(result::add); return result; } diff --git a/airbyte-integrations/connectors/source-bigquery/src/test-integration/java/io/airbyte/integrations/source/bigquery/BigQuerySourceAcceptanceTest.java b/airbyte-integrations/connectors/source-bigquery/src/test-integration/java/io/airbyte/integrations/source/bigquery/BigQuerySourceAcceptanceTest.java index 3f11291c89ba..1ff981e4d4ad 100644 --- a/airbyte-integrations/connectors/source-bigquery/src/test-integration/java/io/airbyte/integrations/source/bigquery/BigQuerySourceAcceptanceTest.java +++ b/airbyte-integrations/connectors/source-bigquery/src/test-integration/java/io/airbyte/integrations/source/bigquery/BigQuerySourceAcceptanceTest.java @@ -25,33 +25,77 @@ package io.airbyte.integrations.source.bigquery; import com.fasterxml.jackson.databind.JsonNode; +import com.google.cloud.bigquery.BigQuery; +import com.google.cloud.bigquery.Dataset; +import com.google.cloud.bigquery.DatasetInfo; +import com.google.common.collect.ImmutableMap; import io.airbyte.commons.json.Jsons; import io.airbyte.commons.resources.MoreResources; +import io.airbyte.commons.string.Strings; +import io.airbyte.db.bigquery.BigQueryDatabase; +import io.airbyte.db.bigquery.TempBigQueryJoolDatabaseImpl; import io.airbyte.integrations.standardtest.source.SourceAcceptanceTest; import io.airbyte.integrations.standardtest.source.TestDestinationEnv; -import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; -import io.airbyte.protocol.models.ConnectorSpecification; +import io.airbyte.protocol.models.*; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.sql.SQLException; import java.util.Collections; import java.util.HashMap; import java.util.List; +import static io.airbyte.integrations.source.bigquery.BigQuerySource.*; + public class BigQuerySourceAcceptanceTest extends SourceAcceptanceTest { + private static final Path CREDENTIALS_PATH = Path.of("secrets/credentials.json"); + private static final String SCHEMA_NAME = "public"; + private static final String STREAM_NAME = "id_and_name"; + + private BigQueryDatabase database; + private Dataset dataset; private JsonNode config; @Override - protected void setupEnvironment(TestDestinationEnv testEnv) { - // TODO create new container. Ex: "new OracleContainer("epiclabs/docker-oracle-xe-11g");" - // TODO make container started. Ex: "container.start();" - // TODO init JsonNode config - // TODO crete airbyte Database object "Databases.createJdbcDatabase(...)" - // TODO insert test data to DB. Ex: "database.execute(connection-> ...)" - // TODO close Database. Ex: "database.close();" + protected void setupEnvironment(TestDestinationEnv testEnv) throws IOException, SQLException { + if (!Files.exists(CREDENTIALS_PATH)) { + throw new IllegalStateException( + "Must provide path to a big query credentials file. By default {module-root}/" + CREDENTIALS_PATH + + ". Override by setting setting path with the CREDENTIALS_PATH constant."); + } + + final String credentialsJsonString = new String(Files.readAllBytes(CREDENTIALS_PATH)); + + final JsonNode credentialsJson = Jsons.deserialize(credentialsJsonString); + final String projectId = credentialsJson.get(CONFIG_PROJECT_ID).asText(); + final String datasetLocation = "US"; + + final String datasetId = Strings.addRandomSuffix("airbyte_tests_acceptance", "_", 8); + + config = Jsons.jsonNode(ImmutableMap.builder() + .put(CONFIG_PROJECT_ID, projectId) + .put(CONFIG_CREDS, credentialsJsonString) + .put(CONFIG_DATASET_ID, datasetId) + .put(CONFIG_DATASET_LOCATION, datasetLocation) + .build()); + + database = new BigQueryDatabase(config.get(CONFIG_PROJECT_ID).asText(), credentialsJsonString); + + final DatasetInfo datasetInfo = + DatasetInfo.newBuilder(config.get(CONFIG_DATASET_ID).asText()).setLocation(config.get(CONFIG_DATASET_LOCATION).asText()).build(); + dataset = database.getBigQuery().create(datasetInfo); + + database.execute("CREATE TABLE " + datasetId + ".id_and_name(id INT64, name STRING);"); + database.execute("INSERT INTO " + datasetId + ".id_and_name (id, name) VALUES (1,'picard'), (2, 'crusher'), (3, 'vash');"); } @Override protected void tearDown(TestDestinationEnv testEnv) { - // TODO close container that was initialized in setup() method. Ex: "container.close();" + database.cleanDataSet(dataset.getDatasetId().getDataset()); } @Override @@ -71,8 +115,11 @@ protected JsonNode getConfig() { @Override protected ConfiguredAirbyteCatalog getConfiguredCatalog() { - // TODO Return the ConfiguredAirbyteCatalog with ConfiguredAirbyteStream objects - return null; + return CatalogHelpers.createConfiguredAirbyteCatalog( + STREAM_NAME, + SCHEMA_NAME, + Field.of("id", JsonSchemaPrimitive.NUMBER), + Field.of("name", JsonSchemaPrimitive.STRING)); } @Override From 4e8910fe95fbad6a112675a61b50c340cb64056c Mon Sep 17 00:00:00 2001 From: heade Date: Fri, 9 Jul 2021 15:42:57 +0300 Subject: [PATCH 37/63] discover method fix --- .../io/airbyte/integrations/source/bigquery/BigQuerySource.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/airbyte-integrations/connectors/source-bigquery/src/main/java/io/airbyte/integrations/source/bigquery/BigQuerySource.java b/airbyte-integrations/connectors/source-bigquery/src/main/java/io/airbyte/integrations/source/bigquery/BigQuerySource.java index 6b0002196115..bdd9f1419897 100644 --- a/airbyte-integrations/connectors/source-bigquery/src/main/java/io/airbyte/integrations/source/bigquery/BigQuerySource.java +++ b/airbyte-integrations/connectors/source-bigquery/src/main/java/io/airbyte/integrations/source/bigquery/BigQuerySource.java @@ -98,7 +98,7 @@ public Set getExcludedInternalNameSpaces() { @Override protected List>> discoverInternal(BigQueryDatabase database) { String projectId = dbConfig.get(CONFIG_PROJECT_ID).asText(); - String datasetId = dbConfig.get(CONFIG_DATASET_ID).asText(); + String datasetId = getConfigDatasetId(database); List
tables = (isDatasetConfigured(database) ? database.getDatasetTables(projectId, getConfigDatasetId(database)) : database.getProjectTables(projectId)); List>> result = new ArrayList<>(); tables.stream().map(table -> TableInfo.>builder() From 36693ed8fd773864fcaea475d5894de5e768924d Mon Sep 17 00:00:00 2001 From: Andrii Leonets Date: Fri, 9 Jul 2021 16:47:09 +0300 Subject: [PATCH 38/63] first comprehensinve test --- .../bigquery/BigQuerySourceComprehensiveTest.java | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/airbyte-integrations/connectors/source-bigquery/src/test-integration/java/io/airbyte/integrations/source/bigquery/BigQuerySourceComprehensiveTest.java b/airbyte-integrations/connectors/source-bigquery/src/test-integration/java/io/airbyte/integrations/source/bigquery/BigQuerySourceComprehensiveTest.java index 3217b62f8d00..3244d3e0bdc0 100644 --- a/airbyte-integrations/connectors/source-bigquery/src/test-integration/java/io/airbyte/integrations/source/bigquery/BigQuerySourceComprehensiveTest.java +++ b/airbyte-integrations/connectors/source-bigquery/src/test-integration/java/io/airbyte/integrations/source/bigquery/BigQuerySourceComprehensiveTest.java @@ -14,7 +14,9 @@ import io.airbyte.db.Database; import io.airbyte.db.bigquery.TempBigQueryJoolDatabaseImpl; import io.airbyte.integrations.standardtest.source.SourceComprehensiveTest; +import io.airbyte.integrations.standardtest.source.TestDataHolder; import io.airbyte.integrations.standardtest.source.TestDestinationEnv; +import io.airbyte.protocol.models.JsonSchemaPrimitive; import java.nio.file.Files; import java.nio.file.Path; import org.junit.jupiter.api.AfterAll; @@ -79,7 +81,15 @@ protected Database setupDatabase() throws Exception { @Override protected void initTests() { - + addDataTypeTestData( + TestDataHolder.builder() + .sourceType("numeric") + .airbyteType(JsonSchemaPrimitive.NUMBER) + .createTablePatternSql("CREATE TABLE %1$s(%2$s NUMERIC(29), %3$s %4$s)") + .fullSourceDataType("numeric(10)") + .addInsertValues("null", "-128", "127") + .addExpectedValues(null, "-128", "127") + .build()); } @Override From 8dc3f445886be1ea29fac8830dee1a14c017afca Mon Sep 17 00:00:00 2001 From: Andrii Leonets Date: Sun, 11 Jul 2021 14:04:06 +0300 Subject: [PATCH 39/63] Comprehensive tests for the BigQuery source + database timeout config --- .../airbyte/db/bigquery/BigQueryDatabase.java | 8 + .../io/airbyte/db/bigquery/BigQueryUtils.java | 13 +- .../BigQuerySourceComprehensiveTest.java | 181 +++++++++++++++++- 3 files changed, 194 insertions(+), 8 deletions(-) diff --git a/airbyte-db/src/main/java/io/airbyte/db/bigquery/BigQueryDatabase.java b/airbyte-db/src/main/java/io/airbyte/db/bigquery/BigQueryDatabase.java index fc293485393e..c01354e3aae8 100644 --- a/airbyte-db/src/main/java/io/airbyte/db/bigquery/BigQueryDatabase.java +++ b/airbyte-db/src/main/java/io/airbyte/db/bigquery/BigQueryDatabase.java @@ -27,6 +27,7 @@ import static java.util.Objects.isNull; import com.fasterxml.jackson.databind.JsonNode; +import com.google.api.gax.retrying.RetrySettings; import com.google.auth.oauth2.ServiceAccountCredentials; import com.google.cloud.bigquery.BigQuery; import com.google.cloud.bigquery.BigQueryOptions; @@ -54,6 +55,7 @@ import org.apache.commons.lang3.tuple.ImmutablePair; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.threeten.bp.Duration; public class BigQueryDatabase extends SqlDatabase { @@ -72,6 +74,12 @@ public BigQueryDatabase(String projectId, String jsonCreds) { bigQuery = bigQueryBuilder .setProjectId(projectId) .setCredentials(!isNull(credentials) ? credentials : ServiceAccountCredentials.getApplicationDefault()) + .setRetrySettings(RetrySettings + .newBuilder() + .setMaxAttempts(10) + .setRetryDelayMultiplier(1.5) + .setTotalTimeout(Duration.ofMinutes(60)) + .build()) .build() .getService(); } catch (IOException e) { diff --git a/airbyte-db/src/main/java/io/airbyte/db/bigquery/BigQueryUtils.java b/airbyte-db/src/main/java/io/airbyte/db/bigquery/BigQueryUtils.java index 143d2242580a..922d4258ac2f 100644 --- a/airbyte-db/src/main/java/io/airbyte/db/bigquery/BigQueryUtils.java +++ b/airbyte-db/src/main/java/io/airbyte/db/bigquery/BigQueryUtils.java @@ -50,7 +50,8 @@ public class BigQueryUtils { private static final Logger LOGGER = LoggerFactory.getLogger(BigQueryUtils.class); - public static final String BIG_QUERY_DATE_FORMAT = "YYYY-MM-DD"; + public static final String BIG_QUERY_DATE_FORMAT = "yyyy-MM-dd"; + public static final String BIG_QUERY_DATETIME_FORMAT = "yyyy-MM-dd'T'HH:mm:ss"; public static JsonNode rowToJson(FieldValueList rowValues, FieldList fieldList) { ObjectNode jsonNode = (ObjectNode) Jsons.jsonNode(Collections.emptyMap()); @@ -74,18 +75,20 @@ private static void setJsonField(Field field, FieldValue fieldValue, ObjectNode case BIGNUMERIC -> node.put(fieldName, nullIfInvalid(fieldValue::getNumericValue)); case STRING -> node.put(fieldName, fieldValue.getStringValue()); case BYTES -> node.put(fieldName, fieldValue.getBytesValue()); - case TIMESTAMP, DATE, TIME, DATETIME -> node - .put(fieldName, toISO8601String(fieldValue.getTimestampValue())); + case DATE -> node.put(fieldName, toISO8601String(getDateValue(fieldValue, BIG_QUERY_DATE_FORMAT))); + case DATETIME -> node.put(fieldName, toISO8601String(getDateValue(fieldValue, BIG_QUERY_DATETIME_FORMAT))); + case TIMESTAMP -> node.put(fieldName, toISO8601String(fieldValue.getTimestampValue()/1000)); + case TIME -> node.put(fieldName, fieldValue.getStringValue()); default -> node.put(fieldName, fieldValue.getStringValue()); } } } - public static Date getDateValue(FieldValue fieldValue) { + public static Date getDateValue(FieldValue fieldValue, String dateFormat) { Date parsedValue = null; String value = fieldValue.getStringValue(); try { - parsedValue = new SimpleDateFormat(BIG_QUERY_DATE_FORMAT).parse(value); + parsedValue = new SimpleDateFormat(dateFormat).parse(value); } catch (ParseException e) { LOGGER.error("Fail to parse date value : " + value + ". Null is returned."); } diff --git a/airbyte-integrations/connectors/source-bigquery/src/test-integration/java/io/airbyte/integrations/source/bigquery/BigQuerySourceComprehensiveTest.java b/airbyte-integrations/connectors/source-bigquery/src/test-integration/java/io/airbyte/integrations/source/bigquery/BigQuerySourceComprehensiveTest.java index 3244d3e0bdc0..0231325958f9 100644 --- a/airbyte-integrations/connectors/source-bigquery/src/test-integration/java/io/airbyte/integrations/source/bigquery/BigQuerySourceComprehensiveTest.java +++ b/airbyte-integrations/connectors/source-bigquery/src/test-integration/java/io/airbyte/integrations/source/bigquery/BigQuerySourceComprehensiveTest.java @@ -27,6 +27,7 @@ public class BigQuerySourceComprehensiveTest extends SourceComprehensiveTest { private static final Path CREDENTIALS_PATH = Path.of("secrets/credentials.json"); + private static final String CREATE_SQL_PATTERN = "CREATE TABLE %1$s(%2$s NUMERIC(29), %3$s %4$s)"; private TempBigQueryJoolDatabaseImpl database; private Dataset dataset; @@ -83,13 +84,187 @@ protected Database setupDatabase() throws Exception { protected void initTests() { addDataTypeTestData( TestDataHolder.builder() - .sourceType("numeric") + .sourceType("int64") + .airbyteType(JsonSchemaPrimitive.NUMBER) + .createTablePatternSql(CREATE_SQL_PATTERN) + .addInsertValues("null", "-128", "127", "9223372036854775807", "-9223372036854775808") + .addExpectedValues(null, "-128", "127", "9223372036854775807", "-9223372036854775808") + .build()); + + addDataTypeTestData( + TestDataHolder.builder() + .sourceType("int") + .airbyteType(JsonSchemaPrimitive.NUMBER) + .createTablePatternSql(CREATE_SQL_PATTERN) + .addInsertValues("null", "-128", "127") + .addExpectedValues(null, "-128", "127") + .build()); + + addDataTypeTestData( + TestDataHolder.builder() + .sourceType("smallint") .airbyteType(JsonSchemaPrimitive.NUMBER) - .createTablePatternSql("CREATE TABLE %1$s(%2$s NUMERIC(29), %3$s %4$s)") - .fullSourceDataType("numeric(10)") + .createTablePatternSql(CREATE_SQL_PATTERN) .addInsertValues("null", "-128", "127") .addExpectedValues(null, "-128", "127") .build()); + + addDataTypeTestData( + TestDataHolder.builder() + .sourceType("integer") + .airbyteType(JsonSchemaPrimitive.NUMBER) + .createTablePatternSql(CREATE_SQL_PATTERN) + .addInsertValues("null", "-128", "127") + .addExpectedValues(null, "-128", "127") + .build()); + + addDataTypeTestData( + TestDataHolder.builder() + .sourceType("bigint") + .airbyteType(JsonSchemaPrimitive.NUMBER) + .createTablePatternSql(CREATE_SQL_PATTERN) + .addInsertValues("null", "-128", "127") + .addExpectedValues(null, "-128", "127") + .build()); + + addDataTypeTestData( + TestDataHolder.builder() + .sourceType("tinyint") + .airbyteType(JsonSchemaPrimitive.NUMBER) + .createTablePatternSql(CREATE_SQL_PATTERN) + .addInsertValues("null", "-128", "127") + .addExpectedValues(null, "-128", "127") + .build()); + + addDataTypeTestData( + TestDataHolder.builder() + .sourceType("byteint") + .airbyteType(JsonSchemaPrimitive.NUMBER) + .createTablePatternSql(CREATE_SQL_PATTERN) + .addInsertValues("null", "-128", "127") + .addExpectedValues(null, "-128", "127") + .build()); + + addDataTypeTestData( + TestDataHolder.builder() + .sourceType("numeric") + .fullSourceDataType("numeric(29,9)") + .airbyteType(JsonSchemaPrimitive.NUMBER) + .createTablePatternSql(CREATE_SQL_PATTERN) + .addInsertValues("null", "-128", "127", "999999999999999999", "-999999999999999999", "0.123456789", "-0.123456789") + .addExpectedValues(null, "-128", "127", "999999999999999999", "-999999999999999999", "0.123456789", "-0.123456789") + .build()); + + addDataTypeTestData( + TestDataHolder.builder() + .sourceType("bignumeric") + .fullSourceDataType("bignumeric(76,38)") + .airbyteType(JsonSchemaPrimitive.NUMBER) + .createTablePatternSql(CREATE_SQL_PATTERN) + .addInsertValues("null", "-128", "127", "999999999999999999", "-999999999999999999", "0.123456789", "-0.123456789") + .addExpectedValues(null, "-128", "127", "999999999999999999", "-999999999999999999", "0.123456789", "-0.123456789") + .build()); + + addDataTypeTestData( + TestDataHolder.builder() + .sourceType("decimal") + .fullSourceDataType("decimal(29,9)") + .airbyteType(JsonSchemaPrimitive.NUMBER) + .createTablePatternSql(CREATE_SQL_PATTERN) + .addInsertValues("null", "-128", "127", "999999999999999999", "-999999999999999999", "0.123456789", "-0.123456789") + .addExpectedValues(null, "-128", "127", "999999999999999999", "-999999999999999999", "0.123456789", "-0.123456789") + .build()); + + addDataTypeTestData( + TestDataHolder.builder() + .sourceType("bigdecimal") + .fullSourceDataType("bigdecimal(76,38)") + .airbyteType(JsonSchemaPrimitive.NUMBER) + .createTablePatternSql(CREATE_SQL_PATTERN) + .addInsertValues("null", "-128", "127", "999999999999999999", "-999999999999999999", "0.123456789", "-0.123456789") + .addExpectedValues(null, "-128", "127", "999999999999999999", "-999999999999999999", "0.123456789", "-0.123456789") + .build()); + + addDataTypeTestData( + TestDataHolder.builder() + .sourceType("float64") + .airbyteType(JsonSchemaPrimitive.NUMBER) + .createTablePatternSql(CREATE_SQL_PATTERN) + .addInsertValues("null", "-128", "127", "0.123456789", "-0.123456789") + .addExpectedValues(null, "-128.0", "127.0", "0.123456789", "-0.123456789") + .build()); + + addDataTypeTestData( + TestDataHolder.builder() + .sourceType("bool") + .airbyteType(JsonSchemaPrimitive.BOOLEAN) + .createTablePatternSql(CREATE_SQL_PATTERN) + .addInsertValues("true", "false", "null") + .addExpectedValues("true", "false", null) + .build()); + + addDataTypeTestData( + TestDataHolder.builder() + .sourceType("bytes") + .airbyteType(JsonSchemaPrimitive.STRING) + .createTablePatternSql(CREATE_SQL_PATTERN) + .addInsertValues("FROM_BASE64(\"test\")", "null") + .addExpectedValues("test", null) + .build()); + + addDataTypeTestData( + TestDataHolder.builder() + .sourceType("date") + .airbyteType(JsonSchemaPrimitive.STRING) + .createTablePatternSql(CREATE_SQL_PATTERN) + .addInsertValues("date('2021-10-20')", "date('9999-12-31')", "date('0001-01-01')", "null") + .addExpectedValues("2021-10-20T00:00:00Z", "9999-12-31T00:00:00Z", "0001-01-01T00:00:00Z", null) + .build()); + + addDataTypeTestData( + TestDataHolder.builder() + .sourceType("datetime") + .airbyteType(JsonSchemaPrimitive.STRING) + .createTablePatternSql(CREATE_SQL_PATTERN) + .addInsertValues("datetime('2021-10-20 11:22:33')", "datetime('9999-12-31 11:22:33')", "datetime('0001-01-01 11:22:33')", "null") + .addExpectedValues("2021-10-20T11:22:33Z", "9999-12-31T11:22:33Z", "0001-01-01T11:22:33Z", null) + .build()); + + addDataTypeTestData( + TestDataHolder.builder() + .sourceType("geography") + .airbyteType(JsonSchemaPrimitive.STRING) + .createTablePatternSql(CREATE_SQL_PATTERN) + .addInsertValues("ST_GEOGFROMTEXT('POINT(1 2)')", "null") + .addExpectedValues("POINT(1 2)", null) + .build()); + + addDataTypeTestData( + TestDataHolder.builder() + .sourceType("string") + .airbyteType(JsonSchemaPrimitive.STRING) + .createTablePatternSql(CREATE_SQL_PATTERN) + .addInsertValues("'qwe'", "'йцу'", "null") + .addExpectedValues("qwe", "йцу", null) + .build()); + + addDataTypeTestData( + TestDataHolder.builder() + .sourceType("struct") + .fullSourceDataType("STRUCT") + .airbyteType(JsonSchemaPrimitive.STRING) + .createTablePatternSql(CREATE_SQL_PATTERN) + .addInsertValues("STRUCT(\"B.A\",12)", "null") + .build()); + + addDataTypeTestData( + TestDataHolder.builder() + .sourceType("time") + .airbyteType(JsonSchemaPrimitive.STRING) + .createTablePatternSql(CREATE_SQL_PATTERN) + .addInsertValues("TIME(15, 30, 00)", "null") + .addExpectedValues("15:30:00", null) + .build()); } @Override From 194af3f83288c0c4d66703b3e7278965961be0e7 Mon Sep 17 00:00:00 2001 From: heade Date: Mon, 12 Jul 2021 17:53:50 +0300 Subject: [PATCH 40/63] bigquery acceptance tests fix; formatting --- .../airbyte/db/bigquery/BigQueryDatabase.java | 12 +-- .../io/airbyte/db/bigquery/BigQueryUtils.java | 5 +- .../TempBigQueryJoolDatabaseImpl.java | 28 +++++- .../source/bigquery/BigQuerySource.java | 32 ++++--- .../BigQuerySourceAcceptanceTest.java | 34 +++---- .../BigQuerySourceComprehensiveTest.java | 29 +++++- .../BigqueryJdbcSourceAcceptanceTest.java | 88 ------------------- 7 files changed, 94 insertions(+), 134 deletions(-) delete mode 100644 airbyte-integrations/connectors/source-bigquery/src/test/java/io/airbyte/integrations/source/bigquery/BigqueryJdbcSourceAcceptanceTest.java diff --git a/airbyte-db/src/main/java/io/airbyte/db/bigquery/BigQueryDatabase.java b/airbyte-db/src/main/java/io/airbyte/db/bigquery/BigQueryDatabase.java index c01354e3aae8..1b77e3c6cf8c 100644 --- a/airbyte-db/src/main/java/io/airbyte/db/bigquery/BigQueryDatabase.java +++ b/airbyte-db/src/main/java/io/airbyte/db/bigquery/BigQueryDatabase.java @@ -147,8 +147,9 @@ public ImmutablePair executeQuery(BigQuery bigquery, QueryJobConfig /** * Returns full information about all tables from entire project + * * @param projectId BigQuery project id - * @return List of BigQuery tables + * @return List of BigQuery tables */ public List
getProjectTables(String projectId) { List
tableList = new ArrayList<>(); @@ -156,16 +157,16 @@ public List
getProjectTables(String projectId) { .iterateAll() .forEach(dataset -> bigQuery.listTables(dataset.getDatasetId()) .iterateAll() - .forEach(table -> tableList.add(bigQuery.getTable(table.getTableId()))) - ); + .forEach(table -> tableList.add(bigQuery.getTable(table.getTableId())))); return tableList; } /** * Returns full information about all tables from specific Dataset + * * @param projectId BigQuery project id * @param datasetId BigQuery dataset id - * @return List of BigQuery tables + * @return List of BigQuery tables */ public List
getDatasetTables(String projectId, String datasetId) { List
tableList = new ArrayList<>(); @@ -176,8 +177,7 @@ public List
getDatasetTables(String projectId, String datasetId) { .forEach(table -> { if (table.getTableId().getDataset().equalsIgnoreCase(datasetId)) tableList.add(bigQuery.getTable(table.getTableId())); - }) - ); + })); return tableList; } diff --git a/airbyte-db/src/main/java/io/airbyte/db/bigquery/BigQueryUtils.java b/airbyte-db/src/main/java/io/airbyte/db/bigquery/BigQueryUtils.java index 922d4258ac2f..5c44dbf45fb5 100644 --- a/airbyte-db/src/main/java/io/airbyte/db/bigquery/BigQueryUtils.java +++ b/airbyte-db/src/main/java/io/airbyte/db/bigquery/BigQueryUtils.java @@ -65,8 +65,7 @@ private static void setJsonField(Field field, FieldValue fieldValue, ObjectNode String fieldName = field.getName(); if (fieldValue.isNull()) { node.put(fieldName, (String) null); - } - else + } else switch (fieldType.getStandardType()) { case BOOL -> node.put(fieldName, fieldValue.getBooleanValue()); case INT64 -> node.put(fieldName, fieldValue.getLongValue()); @@ -77,7 +76,7 @@ private static void setJsonField(Field field, FieldValue fieldValue, ObjectNode case BYTES -> node.put(fieldName, fieldValue.getBytesValue()); case DATE -> node.put(fieldName, toISO8601String(getDateValue(fieldValue, BIG_QUERY_DATE_FORMAT))); case DATETIME -> node.put(fieldName, toISO8601String(getDateValue(fieldValue, BIG_QUERY_DATETIME_FORMAT))); - case TIMESTAMP -> node.put(fieldName, toISO8601String(fieldValue.getTimestampValue()/1000)); + case TIMESTAMP -> node.put(fieldName, toISO8601String(fieldValue.getTimestampValue() / 1000)); case TIME -> node.put(fieldName, fieldValue.getStringValue()); default -> node.put(fieldName, fieldValue.getStringValue()); } diff --git a/airbyte-db/src/main/java/io/airbyte/db/bigquery/TempBigQueryJoolDatabaseImpl.java b/airbyte-db/src/main/java/io/airbyte/db/bigquery/TempBigQueryJoolDatabaseImpl.java index eaf19be1ebe1..23d662d4633a 100644 --- a/airbyte-db/src/main/java/io/airbyte/db/bigquery/TempBigQueryJoolDatabaseImpl.java +++ b/airbyte-db/src/main/java/io/airbyte/db/bigquery/TempBigQueryJoolDatabaseImpl.java @@ -1,3 +1,27 @@ +/* + * MIT License + * + * Copyright (c) 2020 Airbyte + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + package io.airbyte.db.bigquery; import io.airbyte.db.ContextQueryFunction; @@ -46,7 +70,7 @@ private static class FakeDefaultDSLContext extends DefaultDSLContext { private final BigQueryDatabase database; public FakeDefaultDSLContext(BigQueryDatabase database) { - super((SQLDialect)null); + super((SQLDialect) null); this.database = database; } @@ -59,5 +83,7 @@ public Result fetch(String sql) throws DataAccessException { } return null; } + } + } diff --git a/airbyte-integrations/connectors/source-bigquery/src/main/java/io/airbyte/integrations/source/bigquery/BigQuerySource.java b/airbyte-integrations/connectors/source-bigquery/src/main/java/io/airbyte/integrations/source/bigquery/BigQuerySource.java index bdd9f1419897..6a016133368d 100644 --- a/airbyte-integrations/connectors/source-bigquery/src/main/java/io/airbyte/integrations/source/bigquery/BigQuerySource.java +++ b/airbyte-integrations/connectors/source-bigquery/src/main/java/io/airbyte/integrations/source/bigquery/BigQuerySource.java @@ -41,15 +41,12 @@ import io.airbyte.integrations.source.relationaldb.TableInfo; import io.airbyte.protocol.models.CommonField; import io.airbyte.protocol.models.JsonSchemaPrimitive; - -import java.sql.JDBCType; +import java.util.*; import java.util.stream.Collectors; import java.util.stream.Stream; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.util.*; - public class BigQuerySource extends AbstractRelationalDbSource implements Source { private static final Logger LOGGER = LoggerFactory.getLogger(BigQuerySource.class); @@ -62,16 +59,15 @@ public class BigQuerySource extends AbstractRelationalDbSource getExcludedInternalNameSpaces() { protected List>> discoverInternal(BigQueryDatabase database) { String projectId = dbConfig.get(CONFIG_PROJECT_ID).asText(); String datasetId = getConfigDatasetId(database); - List
tables = (isDatasetConfigured(database) ? database.getDatasetTables(projectId, getConfigDatasetId(database)) : database.getProjectTables(projectId)); + List
tables = + (isDatasetConfigured(database) ? database.getDatasetTables(projectId, getConfigDatasetId(database)) : database.getProjectTables(projectId)); List>> result = new ArrayList<>(); tables.stream().map(table -> TableInfo.>builder() .nameSpace(datasetId) .name(table.getTableId().getTable()) .fields(Objects.requireNonNull(table.getDefinition().getSchema()).getFields().stream() .map(f -> { - StandardSQLTypeName standardType = f.getType().getStandardType(); - return new CommonField<>(f.getName(), standardType); + StandardSQLTypeName standardType = f.getType().getStandardType(); + return new CommonField<>(f.getName(), standardType); }) .collect(Collectors.toList())) .build()) @@ -126,7 +123,13 @@ protected String getQuoteString() { } @Override - public AutoCloseableIterator queryTableIncremental(BigQueryDatabase database, List columnNames, String schemaName, String tableName, String cursorField, StandardSQLTypeName cursorFieldType, String cursor) { + public AutoCloseableIterator queryTableIncremental(BigQueryDatabase database, + List columnNames, + String schemaName, + String tableName, + String cursorField, + StandardSQLTypeName cursorFieldType, + String cursor) { return queryTableWithParams(database, String.format("SELECT %s FROM %s WHERE %s >= @cursor", enquoteIdentifierList(columnNames), getFullTableName(schemaName, tableName), @@ -159,4 +162,5 @@ public static void main(String[] args) throws Exception { new IntegrationRunner(source).run(args); LOGGER.info("completed source: {}", BigQuerySource.class); } + } diff --git a/airbyte-integrations/connectors/source-bigquery/src/test-integration/java/io/airbyte/integrations/source/bigquery/BigQuerySourceAcceptanceTest.java b/airbyte-integrations/connectors/source-bigquery/src/test-integration/java/io/airbyte/integrations/source/bigquery/BigQuerySourceAcceptanceTest.java index 1ff981e4d4ad..1e31187f7d82 100644 --- a/airbyte-integrations/connectors/source-bigquery/src/test-integration/java/io/airbyte/integrations/source/bigquery/BigQuerySourceAcceptanceTest.java +++ b/airbyte-integrations/connectors/source-bigquery/src/test-integration/java/io/airbyte/integrations/source/bigquery/BigQuerySourceAcceptanceTest.java @@ -24,8 +24,9 @@ package io.airbyte.integrations.source.bigquery; +import static io.airbyte.integrations.source.bigquery.BigQuerySource.*; + import com.fasterxml.jackson.databind.JsonNode; -import com.google.cloud.bigquery.BigQuery; import com.google.cloud.bigquery.Dataset; import com.google.cloud.bigquery.DatasetInfo; import com.google.common.collect.ImmutableMap; @@ -33,13 +34,9 @@ import io.airbyte.commons.resources.MoreResources; import io.airbyte.commons.string.Strings; import io.airbyte.db.bigquery.BigQueryDatabase; -import io.airbyte.db.bigquery.TempBigQueryJoolDatabaseImpl; import io.airbyte.integrations.standardtest.source.SourceAcceptanceTest; import io.airbyte.integrations.standardtest.source.TestDestinationEnv; import io.airbyte.protocol.models.*; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; @@ -48,12 +45,9 @@ import java.util.HashMap; import java.util.List; -import static io.airbyte.integrations.source.bigquery.BigQuerySource.*; - public class BigQuerySourceAcceptanceTest extends SourceAcceptanceTest { private static final Path CREDENTIALS_PATH = Path.of("secrets/credentials.json"); - private static final String SCHEMA_NAME = "public"; private static final String STREAM_NAME = "id_and_name"; private BigQueryDatabase database; @@ -64,8 +58,8 @@ public class BigQuerySourceAcceptanceTest extends SourceAcceptanceTest { protected void setupEnvironment(TestDestinationEnv testEnv) throws IOException, SQLException { if (!Files.exists(CREDENTIALS_PATH)) { throw new IllegalStateException( - "Must provide path to a big query credentials file. By default {module-root}/" + CREDENTIALS_PATH - + ". Override by setting setting path with the CREDENTIALS_PATH constant."); + "Must provide path to a big query credentials file. By default {module-root}/" + CREDENTIALS_PATH + + ". Override by setting setting path with the CREDENTIALS_PATH constant."); } final String credentialsJsonString = new String(Files.readAllBytes(CREDENTIALS_PATH)); @@ -77,16 +71,16 @@ protected void setupEnvironment(TestDestinationEnv testEnv) throws IOException, final String datasetId = Strings.addRandomSuffix("airbyte_tests_acceptance", "_", 8); config = Jsons.jsonNode(ImmutableMap.builder() - .put(CONFIG_PROJECT_ID, projectId) - .put(CONFIG_CREDS, credentialsJsonString) - .put(CONFIG_DATASET_ID, datasetId) - .put(CONFIG_DATASET_LOCATION, datasetLocation) - .build()); + .put(CONFIG_PROJECT_ID, projectId) + .put(CONFIG_CREDS, credentialsJsonString) + .put(CONFIG_DATASET_ID, datasetId) + .put(CONFIG_DATASET_LOCATION, datasetLocation) + .build()); database = new BigQueryDatabase(config.get(CONFIG_PROJECT_ID).asText(), credentialsJsonString); final DatasetInfo datasetInfo = - DatasetInfo.newBuilder(config.get(CONFIG_DATASET_ID).asText()).setLocation(config.get(CONFIG_DATASET_LOCATION).asText()).build(); + DatasetInfo.newBuilder(config.get(CONFIG_DATASET_ID).asText()).setLocation(config.get(CONFIG_DATASET_LOCATION).asText()).build(); dataset = database.getBigQuery().create(datasetInfo); database.execute("CREATE TABLE " + datasetId + ".id_and_name(id INT64, name STRING);"); @@ -116,10 +110,10 @@ protected JsonNode getConfig() { @Override protected ConfiguredAirbyteCatalog getConfiguredCatalog() { return CatalogHelpers.createConfiguredAirbyteCatalog( - STREAM_NAME, - SCHEMA_NAME, - Field.of("id", JsonSchemaPrimitive.NUMBER), - Field.of("name", JsonSchemaPrimitive.STRING)); + STREAM_NAME, + config.get(CONFIG_DATASET_ID).asText(), + Field.of("id", JsonSchemaPrimitive.NUMBER), + Field.of("name", JsonSchemaPrimitive.STRING)); } @Override diff --git a/airbyte-integrations/connectors/source-bigquery/src/test-integration/java/io/airbyte/integrations/source/bigquery/BigQuerySourceComprehensiveTest.java b/airbyte-integrations/connectors/source-bigquery/src/test-integration/java/io/airbyte/integrations/source/bigquery/BigQuerySourceComprehensiveTest.java index 0231325958f9..96e4be1f0016 100644 --- a/airbyte-integrations/connectors/source-bigquery/src/test-integration/java/io/airbyte/integrations/source/bigquery/BigQuerySourceComprehensiveTest.java +++ b/airbyte-integrations/connectors/source-bigquery/src/test-integration/java/io/airbyte/integrations/source/bigquery/BigQuerySourceComprehensiveTest.java @@ -1,3 +1,27 @@ +/* + * MIT License + * + * Copyright (c) 2020 Airbyte + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + package io.airbyte.integrations.source.bigquery; import static io.airbyte.integrations.source.bigquery.BigQuerySource.CONFIG_CREDS; @@ -230,7 +254,7 @@ protected void initTests() { .addExpectedValues("2021-10-20T11:22:33Z", "9999-12-31T11:22:33Z", "0001-01-01T11:22:33Z", null) .build()); - addDataTypeTestData( + addDataTypeTestData( TestDataHolder.builder() .sourceType("geography") .airbyteType(JsonSchemaPrimitive.STRING) @@ -257,7 +281,7 @@ protected void initTests() { .addInsertValues("STRUCT(\"B.A\",12)", "null") .build()); - addDataTypeTestData( + addDataTypeTestData( TestDataHolder.builder() .sourceType("time") .airbyteType(JsonSchemaPrimitive.STRING) @@ -276,4 +300,5 @@ protected String getNameSpace() { public void cleanTestInstance() { database.getRealDatabase().cleanDataSet(getNameSpace()); } + } diff --git a/airbyte-integrations/connectors/source-bigquery/src/test/java/io/airbyte/integrations/source/bigquery/BigqueryJdbcSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-bigquery/src/test/java/io/airbyte/integrations/source/bigquery/BigqueryJdbcSourceAcceptanceTest.java deleted file mode 100644 index fa88e9668df1..000000000000 --- a/airbyte-integrations/connectors/source-bigquery/src/test/java/io/airbyte/integrations/source/bigquery/BigqueryJdbcSourceAcceptanceTest.java +++ /dev/null @@ -1,88 +0,0 @@ -///* -// * MIT License -// * -// * Copyright (c) 2020 Airbyte -// * -// * Permission is hereby granted, free of charge, to any person obtaining a copy -// * of this software and associated documentation files (the "Software"), to deal -// * in the Software without restriction, including without limitation the rights -// * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// * copies of the Software, and to permit persons to whom the Software is -// * furnished to do so, subject to the following conditions: -// * -// * The above copyright notice and this permission notice shall be included in all -// * copies or substantial portions of the Software. -// * -// * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -// * SOFTWARE. -// */ -// -//package io.airbyte.integrations.source.bigquery; -// -//import com.fasterxml.jackson.databind.JsonNode; -//import io.airbyte.integrations.source.jdbc.AbstractJdbcSource; -//import io.airbyte.integrations.source.jdbc.test.JdbcSourceAcceptanceTest; -//import org.junit.jupiter.api.AfterAll; -//import org.junit.jupiter.api.AfterEach; -//import org.junit.jupiter.api.BeforeAll; -//import org.junit.jupiter.api.BeforeEach; -//import org.slf4j.Logger; -//import org.slf4j.LoggerFactory; -// -//class BigqueryJdbcSourceAcceptanceTest extends JdbcSourceAcceptanceTest { -// -// private static final Logger LOGGER = LoggerFactory.getLogger(BigqueryJdbcSourceAcceptanceTest.class); -// -// // TODO declare a test container for DB. EX: org.testcontainers.containers.OracleContainer -// -// @BeforeAll -// static void init() { -// // Oracle returns uppercase values -// // TODO init test container. Ex: "new OracleContainer("epiclabs/docker-oracle-xe-11g")" -// // TODO start container. Ex: "container.start();" -// } -// -// @BeforeEach -// public void setup() throws Exception { -// // TODO init config. Ex: "config = Jsons.jsonNode(ImmutableMap.builder().put("host", -// // host).put("port", port)....build()); -// super.setup(); -// } -// -// @AfterEach -// public void tearDown() { -// // TODO clean used resources -// } -// -// @Override -// public AbstractJdbcSource getSource() { -// return new AbstractJdbcSource(); -// } -// -// @Override -// public boolean supportsSchemas() { -// // TODO check if your db supports it and update method accordingly -// return false; -// } -// -// @Override -// public JsonNode getConfig() { -// return config; -// } -// -// @Override -// public String getDriverClass() { -// return BigQuerySource.DRIVER_CLASS; -// } -// -// @AfterAll -// static void cleanUp() { -// // TODO close the container. Ex: "container.close();" -// } -// -//} From 62b3f8960691ab0b51f6bc5c706e1c995179f565 Mon Sep 17 00:00:00 2001 From: Andrii Leonets Date: Tue, 13 Jul 2021 14:18:54 +0300 Subject: [PATCH 41/63] fix incremental sync using date, datetime, time and timestamp types --- .../io/airbyte/db/bigquery/BigQueryUtils.java | 31 +++++++++++++++---- .../source/bigquery/BigQuerySource.java | 4 +-- .../BigQuerySourceComprehensiveTest.java | 9 ++++++ 3 files changed, 35 insertions(+), 9 deletions(-) diff --git a/airbyte-db/src/main/java/io/airbyte/db/bigquery/BigQueryUtils.java b/airbyte-db/src/main/java/io/airbyte/db/bigquery/BigQueryUtils.java index 5c44dbf45fb5..8b497b4b7ae6 100644 --- a/airbyte-db/src/main/java/io/airbyte/db/bigquery/BigQueryUtils.java +++ b/airbyte-db/src/main/java/io/airbyte/db/bigquery/BigQueryUtils.java @@ -38,7 +38,9 @@ import com.google.cloud.bigquery.QueryParameterValue; import com.google.cloud.bigquery.StandardSQLTypeName; import io.airbyte.commons.json.Jsons; +import io.airbyte.db.DataTypeUtils; import io.airbyte.protocol.models.JsonSchemaPrimitive; +import java.text.DateFormat; import java.text.ParseException; import java.text.SimpleDateFormat; import java.util.Collections; @@ -50,8 +52,9 @@ public class BigQueryUtils { private static final Logger LOGGER = LoggerFactory.getLogger(BigQueryUtils.class); - public static final String BIG_QUERY_DATE_FORMAT = "yyyy-MM-dd"; - public static final String BIG_QUERY_DATETIME_FORMAT = "yyyy-MM-dd'T'HH:mm:ss"; + public static final DateFormat BIG_QUERY_DATE_FORMAT = new SimpleDateFormat("yyyy-MM-dd"); + public static final DateFormat BIG_QUERY_DATETIME_FORMAT = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss"); + public static final DateFormat BIG_QUERY_TIMESTAMP_FORMAT = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSSSSS z"); public static JsonNode rowToJson(FieldValueList rowValues, FieldList fieldList) { ObjectNode jsonNode = (ObjectNode) Jsons.jsonNode(Collections.emptyMap()); @@ -83,11 +86,11 @@ private static void setJsonField(Field field, FieldValue fieldValue, ObjectNode } } - public static Date getDateValue(FieldValue fieldValue, String dateFormat) { + public static Date getDateValue(FieldValue fieldValue, DateFormat dateFormat) { Date parsedValue = null; String value = fieldValue.getStringValue(); try { - parsedValue = new SimpleDateFormat(dateFormat).parse(value); + parsedValue = dateFormat.parse(value); } catch (ParseException e) { LOGGER.error("Fail to parse date value : " + value + ". Null is returned."); } @@ -103,9 +106,25 @@ public static JsonSchemaPrimitive getType(StandardSQLTypeName bigQueryType) { }; } - // @TODO probably we need a reverse value transformation. especially for time and date types. + private static String getFormattedValue(StandardSQLTypeName paramType, String paramValue) { + try { + return switch (paramType) { + case DATE -> BIG_QUERY_DATE_FORMAT.format(DataTypeUtils.DATE_FORMAT.parse(paramValue)); + case DATETIME -> BIG_QUERY_DATETIME_FORMAT + .format(DataTypeUtils.DATE_FORMAT.parse(paramValue)); + case TIMESTAMP -> BIG_QUERY_TIMESTAMP_FORMAT + .format(DataTypeUtils.DATE_FORMAT.parse(paramValue)); + default -> paramValue; + }; + } catch (ParseException e) { + throw new RuntimeException("Fail to parse value " + paramValue + " to type " + paramType.name()); + } + } + public static QueryParameterValue getQueryParameter(StandardSQLTypeName paramType, String paramValue) { - return QueryParameterValue.newBuilder().setType(paramType).setValue(paramValue).build(); + String value = getFormattedValue(paramType, paramValue); + LOGGER.info("Query parameter for set : " + value + ". Type: " + paramType.name()); + return QueryParameterValue.newBuilder().setType(paramType).setValue(value).build(); } } diff --git a/airbyte-integrations/connectors/source-bigquery/src/main/java/io/airbyte/integrations/source/bigquery/BigQuerySource.java b/airbyte-integrations/connectors/source-bigquery/src/main/java/io/airbyte/integrations/source/bigquery/BigQuerySource.java index 6a016133368d..d8c4df34f09c 100644 --- a/airbyte-integrations/connectors/source-bigquery/src/main/java/io/airbyte/integrations/source/bigquery/BigQuerySource.java +++ b/airbyte-integrations/connectors/source-bigquery/src/main/java/io/airbyte/integrations/source/bigquery/BigQuerySource.java @@ -59,8 +59,6 @@ public class BigQuerySource extends AbstractRelationalDbSource queryTableIncremental(BigQueryDatabase da String cursorField, StandardSQLTypeName cursorFieldType, String cursor) { - return queryTableWithParams(database, String.format("SELECT %s FROM %s WHERE %s >= @cursor", + return queryTableWithParams(database, String.format("SELECT %s FROM %s WHERE %s > ?", enquoteIdentifierList(columnNames), getFullTableName(schemaName, tableName), cursorField), diff --git a/airbyte-integrations/connectors/source-bigquery/src/test-integration/java/io/airbyte/integrations/source/bigquery/BigQuerySourceComprehensiveTest.java b/airbyte-integrations/connectors/source-bigquery/src/test-integration/java/io/airbyte/integrations/source/bigquery/BigQuerySourceComprehensiveTest.java index 96e4be1f0016..0a70639d1321 100644 --- a/airbyte-integrations/connectors/source-bigquery/src/test-integration/java/io/airbyte/integrations/source/bigquery/BigQuerySourceComprehensiveTest.java +++ b/airbyte-integrations/connectors/source-bigquery/src/test-integration/java/io/airbyte/integrations/source/bigquery/BigQuerySourceComprehensiveTest.java @@ -254,6 +254,15 @@ protected void initTests() { .addExpectedValues("2021-10-20T11:22:33Z", "9999-12-31T11:22:33Z", "0001-01-01T11:22:33Z", null) .build()); + addDataTypeTestData( + TestDataHolder.builder() + .sourceType("timestamp") + .airbyteType(JsonSchemaPrimitive.STRING) + .createTablePatternSql(CREATE_SQL_PATTERN) + .addInsertValues("timestamp('2021-10-20 11:22:33')", "null") + .addExpectedValues("2021-10-20T11:22:33Z", null) + .build()); + addDataTypeTestData( TestDataHolder.builder() .sourceType("geography") From 954995a352f57993cb503a19ea387fc3aeaf206e Mon Sep 17 00:00:00 2001 From: Andrii Leonets Date: Tue, 13 Jul 2021 18:15:17 +0300 Subject: [PATCH 42/63] Implement source checks: basic and dataset --- .../airbyte/db/bigquery/BigQueryDatabase.java | 4 ++++ .../source/bigquery/BigQuerySource.java | 21 ++++++++++++++++++- 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/airbyte-db/src/main/java/io/airbyte/db/bigquery/BigQueryDatabase.java b/airbyte-db/src/main/java/io/airbyte/db/bigquery/BigQueryDatabase.java index 1b77e3c6cf8c..db70b14331f4 100644 --- a/airbyte-db/src/main/java/io/airbyte/db/bigquery/BigQueryDatabase.java +++ b/airbyte-db/src/main/java/io/airbyte/db/bigquery/BigQueryDatabase.java @@ -96,6 +96,10 @@ public void execute(String sql) throws SQLException { LOGGER.info("BigQuery successfully finished execution SQL: " + sql); } + public Stream query(String sql) throws Exception { + return query(sql, Collections.emptyList()); + } + public Stream query(String sql, QueryParameterValue... params) throws Exception { return query(sql, (params == null ? Collections.emptyList() : Arrays.asList(params))); } diff --git a/airbyte-integrations/connectors/source-bigquery/src/main/java/io/airbyte/integrations/source/bigquery/BigQuerySource.java b/airbyte-integrations/connectors/source-bigquery/src/main/java/io/airbyte/integrations/source/bigquery/BigQuerySource.java index d8c4df34f09c..1ec7c00e3b20 100644 --- a/airbyte-integrations/connectors/source-bigquery/src/main/java/io/airbyte/integrations/source/bigquery/BigQuerySource.java +++ b/airbyte-integrations/connectors/source-bigquery/src/main/java/io/airbyte/integrations/source/bigquery/BigQuerySource.java @@ -76,7 +76,26 @@ protected BigQueryDatabase createDatabase(JsonNode config) { @Override public List> getCheckOperations(JsonNode config) { - return Collections.emptyList(); + List> checkList = new ArrayList<>(); + checkList.add(database -> { + if (database.query("select 1").count() < 1) + throw new Exception("Unable to execute any query on the source!"); + else + LOGGER.info("The source passed the basic query test!"); + }); + + checkList.add(database -> { + if (isDatasetConfigured(database)) { + database.query(String.format("select 1 from %s where 1=0", + getFullTableName(getConfigDatasetId(database), "INFORMATION_SCHEMA.TABLES"))); + LOGGER.info("The source passed the Dataset query test!"); + } else { + LOGGER.info("The Dataset query test is skipped due to not configured datasetId!"); + } + } + ); + + return checkList; } @Override From e258c82399b0593fc2c95a5764c1a3acd9be9060 Mon Sep 17 00:00:00 2001 From: Andrii Leonets Date: Tue, 13 Jul 2021 19:00:26 +0300 Subject: [PATCH 43/63] format --- .../source/bigquery/BigQuerySource.java | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/airbyte-integrations/connectors/source-bigquery/src/main/java/io/airbyte/integrations/source/bigquery/BigQuerySource.java b/airbyte-integrations/connectors/source-bigquery/src/main/java/io/airbyte/integrations/source/bigquery/BigQuerySource.java index 1ec7c00e3b20..f150ba6334e2 100644 --- a/airbyte-integrations/connectors/source-bigquery/src/main/java/io/airbyte/integrations/source/bigquery/BigQuerySource.java +++ b/airbyte-integrations/connectors/source-bigquery/src/main/java/io/airbyte/integrations/source/bigquery/BigQuerySource.java @@ -85,15 +85,14 @@ public List> getCheckOperations(Jso }); checkList.add(database -> { - if (isDatasetConfigured(database)) { - database.query(String.format("select 1 from %s where 1=0", - getFullTableName(getConfigDatasetId(database), "INFORMATION_SCHEMA.TABLES"))); - LOGGER.info("The source passed the Dataset query test!"); - } else { - LOGGER.info("The Dataset query test is skipped due to not configured datasetId!"); - } - } - ); + if (isDatasetConfigured(database)) { + database.query(String.format("select 1 from %s where 1=0", + getFullTableName(getConfigDatasetId(database), "INFORMATION_SCHEMA.TABLES"))); + LOGGER.info("The source passed the Dataset query test!"); + } else { + LOGGER.info("The Dataset query test is skipped due to not configured datasetId!"); + } + }); return checkList; } From 6107f304b47cb1dc963f691ad040eaf524f37002 Mon Sep 17 00:00:00 2001 From: Andrii Leonets Date: Tue, 13 Jul 2021 19:14:27 +0300 Subject: [PATCH 44/63] revert: airbyte_protocol.by --- .../airbyte_cdk/models/airbyte_protocol.py | 120 ++++++++++-------- 1 file changed, 66 insertions(+), 54 deletions(-) diff --git a/airbyte-cdk/python/airbyte_cdk/models/airbyte_protocol.py b/airbyte-cdk/python/airbyte_cdk/models/airbyte_protocol.py index cb7e2f70bd65..da45f5cebd14 100644 --- a/airbyte-cdk/python/airbyte_cdk/models/airbyte_protocol.py +++ b/airbyte-cdk/python/airbyte_cdk/models/airbyte_protocol.py @@ -1,3 +1,27 @@ +# +# MIT License +# +# Copyright (c) 2020 Airbyte +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# + # generated by datamodel-codegen: # filename: airbyte_protocol.yaml @@ -10,12 +34,12 @@ class Type(Enum): - RECORD = 'RECORD' - STATE = 'STATE' - LOG = 'LOG' - SPEC = 'SPEC' - CONNECTION_STATUS = 'CONNECTION_STATUS' - CATALOG = 'CATALOG' + RECORD = "RECORD" + STATE = "STATE" + LOG = "LOG" + SPEC = "SPEC" + CONNECTION_STATUS = "CONNECTION_STATUS" + CATALOG = "CATALOG" class AirbyteRecordMessage(BaseModel): @@ -23,43 +47,41 @@ class Config: extra = Extra.allow stream: str = Field(..., description="the name of this record's stream") - data: Dict[str, Any] = Field(..., description='the record data') + data: Dict[str, Any] = Field(..., description="the record data") emitted_at: int = Field( ..., - description='when the data was emitted from the source. epoch in millisecond.', - ) - namespace: Optional[str] = Field( - None, description="the namespace of this record's stream" + description="when the data was emitted from the source. epoch in millisecond.", ) + namespace: Optional[str] = Field(None, description="the namespace of this record's stream") class AirbyteStateMessage(BaseModel): class Config: extra = Extra.allow - data: Dict[str, Any] = Field(..., description='the state data') + data: Dict[str, Any] = Field(..., description="the state data") class Level(Enum): - FATAL = 'FATAL' - ERROR = 'ERROR' - WARN = 'WARN' - INFO = 'INFO' - DEBUG = 'DEBUG' - TRACE = 'TRACE' + FATAL = "FATAL" + ERROR = "ERROR" + WARN = "WARN" + INFO = "INFO" + DEBUG = "DEBUG" + TRACE = "TRACE" class AirbyteLogMessage(BaseModel): class Config: extra = Extra.allow - level: Level = Field(..., description='the type of logging') - message: str = Field(..., description='the log message') + level: Level = Field(..., description="the type of logging") + message: str = Field(..., description="the log message") class Status(Enum): - SUCCEEDED = 'SUCCEEDED' - FAILED = 'FAILED' + SUCCEEDED = "SUCCEEDED" + FAILED = "FAILED" class AirbyteConnectionStatus(BaseModel): @@ -71,14 +93,14 @@ class Config: class SyncMode(Enum): - full_refresh = 'full_refresh' - incremental = 'incremental' + full_refresh = "full_refresh" + incremental = "incremental" class DestinationSyncMode(Enum): - append = 'append' - overwrite = 'overwrite' - append_dedup = 'append_dedup' + append = "append" + overwrite = "overwrite" + append_dedup = "append_dedup" class ConnectorSpecification(BaseModel): @@ -89,19 +111,13 @@ class Config: changelogUrl: Optional[AnyUrl] = None connectionSpecification: Dict[str, Any] = Field( ..., - description='ConnectorDefinition specific blob. Must be a valid JSON string.', - ) - supportsIncremental: Optional[bool] = Field( - None, description='If the connector supports incremental mode or not.' - ) - supportsNormalization: Optional[bool] = Field( - False, description='If the connector supports normalization or not.' - ) - supportsDBT: Optional[bool] = Field( - False, description='If the connector supports DBT or not.' + description="ConnectorDefinition specific blob. Must be a valid JSON string.", ) + supportsIncremental: Optional[bool] = Field(None, description="If the connector supports incremental mode or not.") + supportsNormalization: Optional[bool] = Field(False, description="If the connector supports normalization or not.") + supportsDBT: Optional[bool] = Field(False, description="If the connector supports DBT or not.") supported_destination_sync_modes: Optional[List[DestinationSyncMode]] = Field( - None, description='List of destination sync modes supported by the connector' + None, description="List of destination sync modes supported by the connector" ) @@ -110,25 +126,23 @@ class Config: extra = Extra.allow name: str = Field(..., description="Stream's name.") - json_schema: Dict[str, Any] = Field( - ..., description='Stream schema using Json Schema specs.' - ) + json_schema: Dict[str, Any] = Field(..., description="Stream schema using Json Schema specs.") supported_sync_modes: Optional[List[SyncMode]] = None source_defined_cursor: Optional[bool] = Field( None, - description='If the source defines the cursor field, then any other cursor field inputs will be ignored. If it does not, either the user_provided one is used, or the default one is used as a backup.', + description="If the source defines the cursor field, then any other cursor field inputs will be ignored. If it does not, either the user_provided one is used, or the default one is used as a backup.", ) default_cursor_field: Optional[List[str]] = Field( None, - description='Path to the field that will be used to determine if a record is new or modified since the last sync. If not provided by the source, the end user will have to specify the comparable themselves.', + description="Path to the field that will be used to determine if a record is new or modified since the last sync. If not provided by the source, the end user will have to specify the comparable themselves.", ) source_defined_primary_key: Optional[List[List[str]]] = Field( None, - description='If the source defines the primary key, paths to the fields that will be used as a primary key. If not provided by the source, the end user will have to specify the primary key themselves.', + description="If the source defines the primary key, paths to the fields that will be used as a primary key. If not provided by the source, the end user will have to specify the primary key themselves.", ) namespace: Optional[str] = Field( None, - description='Optional Source-defined namespace. Currently only used by JDBC destinations to determine what schema to write to. Airbyte streams from the same sources should have the same namespace.', + description="Optional Source-defined namespace. Currently only used by JDBC destinations to determine what schema to write to. Airbyte streams from the same sources should have the same namespace.", ) @@ -140,12 +154,12 @@ class Config: sync_mode: SyncMode cursor_field: Optional[List[str]] = Field( None, - description='Path to the field that will be used to determine if a record is new or modified since the last sync. This field is REQUIRED if `sync_mode` is `incremental`. Otherwise it is ignored.', + description="Path to the field that will be used to determine if a record is new or modified since the last sync. This field is REQUIRED if `sync_mode` is `incremental`. Otherwise it is ignored.", ) destination_sync_mode: DestinationSyncMode primary_key: Optional[List[List[str]]] = Field( None, - description='Paths to the fields that will be used as primary key. This field is REQUIRED if `destination_sync_mode` is `*_dedup`. Otherwise it is ignored.', + description="Paths to the fields that will be used as primary key. This field is REQUIRED if `destination_sync_mode` is `*_dedup`. Otherwise it is ignored.", ) @@ -167,23 +181,21 @@ class AirbyteMessage(BaseModel): class Config: extra = Extra.allow - type: Type = Field(..., description='Message type') + type: Type = Field(..., description="Message type") log: Optional[AirbyteLogMessage] = Field( None, - description='log message: any kind of logging you want the platform to know about.', + description="log message: any kind of logging you want the platform to know about.", ) spec: Optional[ConnectorSpecification] = None connectionStatus: Optional[AirbyteConnectionStatus] = None catalog: Optional[AirbyteCatalog] = Field( None, - description='log message: any kind of logging you want the platform to know about.', - ) - record: Optional[AirbyteRecordMessage] = Field( - None, description='record message: the record' + description="log message: any kind of logging you want the platform to know about.", ) + record: Optional[AirbyteRecordMessage] = Field(None, description="record message: the record") state: Optional[AirbyteStateMessage] = Field( None, - description='schema message: the state. Must be the last message produced. The platform uses this information', + description="schema message: the state. Must be the last message produced. The platform uses this information", ) From bfa5cf357eff37119ca39555bbb3919932e69973 Mon Sep 17 00:00:00 2001 From: Andrii Leonets Date: Wed, 14 Jul 2021 18:33:22 +0300 Subject: [PATCH 45/63] internal review update --- airbyte-db/build.gradle | 2 +- .../java/io/airbyte/db/DataTypeUtils.java | 6 +-- .../io/airbyte/db/bigquery/BigQueryUtils.java | 4 +- .../TempBigQueryJoolDatabaseImpl.java | 2 +- .../java/io/airbyte/db/jdbc/JdbcUtils.java | 10 ++-- .../source/bigquery/BigQuerySourceTests.java | 51 ------------------- 6 files changed, 12 insertions(+), 63 deletions(-) delete mode 100644 airbyte-integrations/connectors/source-bigquery/src/test/java/io/airbyte/integrations/source/bigquery/BigQuerySourceTests.java diff --git a/airbyte-db/build.gradle b/airbyte-db/build.gradle index 81266bb73b09..e62dac724e4d 100644 --- a/airbyte-db/build.gradle +++ b/airbyte-db/build.gradle @@ -17,7 +17,7 @@ dependencies { // Big Query implementation platform('com.google.cloud:libraries-bom:20.6.0') - compile 'com.google.cloud:google-cloud-bigquery:1.133.1' + implementation('com.google.cloud:google-cloud-bigquery:1.133.1') // Lombok implementation 'org.projectlombok:lombok:1.18.20' diff --git a/airbyte-db/src/main/java/io/airbyte/db/DataTypeUtils.java b/airbyte-db/src/main/java/io/airbyte/db/DataTypeUtils.java index 0ec38e828a18..04ab4a98af25 100644 --- a/airbyte-db/src/main/java/io/airbyte/db/DataTypeUtils.java +++ b/airbyte-db/src/main/java/io/airbyte/db/DataTypeUtils.java @@ -35,11 +35,11 @@ public class DataTypeUtils { public static final DateFormat DATE_FORMAT = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'"); // Quoted "Z" to indicate UTC, no timezone offset - public static T nullIfInvalid(DataTypeSupplier valueProducer) { - return nullIfInvalid(valueProducer, ignored -> true); + public static T returnNullIfInvalid(DataTypeSupplier valueProducer) { + return returnNullIfInvalid(valueProducer, ignored -> true); } - public static T nullIfInvalid(DataTypeSupplier valueProducer, Function isValidFn) { + public static T returnNullIfInvalid(DataTypeSupplier valueProducer, Function isValidFn) { // Some edge case values (e.g: Infinity, NaN) have no java or JSON equivalent, and will throw an // exception when parsed. We want to parse those // values as null. diff --git a/airbyte-db/src/main/java/io/airbyte/db/bigquery/BigQueryUtils.java b/airbyte-db/src/main/java/io/airbyte/db/bigquery/BigQueryUtils.java index 8b497b4b7ae6..6fa255530c55 100644 --- a/airbyte-db/src/main/java/io/airbyte/db/bigquery/BigQueryUtils.java +++ b/airbyte-db/src/main/java/io/airbyte/db/bigquery/BigQueryUtils.java @@ -24,7 +24,7 @@ package io.airbyte.db.bigquery; -import static io.airbyte.db.DataTypeUtils.nullIfInvalid; +import static io.airbyte.db.DataTypeUtils.returnNullIfInvalid; import static io.airbyte.db.DataTypeUtils.toISO8601String; import com.fasterxml.jackson.databind.JsonNode; @@ -74,7 +74,7 @@ private static void setJsonField(Field field, FieldValue fieldValue, ObjectNode case INT64 -> node.put(fieldName, fieldValue.getLongValue()); case FLOAT64 -> node.put(fieldName, fieldValue.getDoubleValue()); case NUMERIC -> node.put(fieldName, fieldValue.getNumericValue()); - case BIGNUMERIC -> node.put(fieldName, nullIfInvalid(fieldValue::getNumericValue)); + case BIGNUMERIC -> node.put(fieldName, returnNullIfInvalid(fieldValue::getNumericValue)); case STRING -> node.put(fieldName, fieldValue.getStringValue()); case BYTES -> node.put(fieldName, fieldValue.getBytesValue()); case DATE -> node.put(fieldName, toISO8601String(getDateValue(fieldValue, BIG_QUERY_DATE_FORMAT))); diff --git a/airbyte-db/src/main/java/io/airbyte/db/bigquery/TempBigQueryJoolDatabaseImpl.java b/airbyte-db/src/main/java/io/airbyte/db/bigquery/TempBigQueryJoolDatabaseImpl.java index 23d662d4633a..57297009c716 100644 --- a/airbyte-db/src/main/java/io/airbyte/db/bigquery/TempBigQueryJoolDatabaseImpl.java +++ b/airbyte-db/src/main/java/io/airbyte/db/bigquery/TempBigQueryJoolDatabaseImpl.java @@ -35,7 +35,7 @@ import org.jooq.impl.DefaultDSLContext; /** - * This class is a temporary and will be removed as part of the issue #4547 + * This class is a temporary and will be removed as part of the issue @TODO #4547 */ public class TempBigQueryJoolDatabaseImpl extends Database { diff --git a/airbyte-db/src/main/java/io/airbyte/db/jdbc/JdbcUtils.java b/airbyte-db/src/main/java/io/airbyte/db/jdbc/JdbcUtils.java index 4c2c37587b9b..1df721e471db 100644 --- a/airbyte-db/src/main/java/io/airbyte/db/jdbc/JdbcUtils.java +++ b/airbyte-db/src/main/java/io/airbyte/db/jdbc/JdbcUtils.java @@ -125,11 +125,11 @@ private static void setJsonField(ResultSet r, int i, ObjectNode o) throws SQLExc case BIT, BOOLEAN -> o.put(columnName, r.getBoolean(i)); case TINYINT, SMALLINT -> o.put(columnName, r.getShort(i)); case INTEGER -> putInteger(o, columnName, r, i); - case BIGINT -> o.put(columnName, DataTypeUtils.nullIfInvalid(() -> r.getLong(i))); + case BIGINT -> o.put(columnName, DataTypeUtils.returnNullIfInvalid(() -> r.getLong(i))); case FLOAT, DOUBLE -> o.put(columnName, DataTypeUtils - .nullIfInvalid(() -> r.getDouble(i), Double::isFinite)); - case REAL -> o.put(columnName, DataTypeUtils.nullIfInvalid(() -> r.getFloat(i), Float::isFinite)); - case NUMERIC, DECIMAL -> o.put(columnName, DataTypeUtils.nullIfInvalid(() -> r.getBigDecimal(i))); + .returnNullIfInvalid(() -> r.getDouble(i), Double::isFinite)); + case REAL -> o.put(columnName, DataTypeUtils.returnNullIfInvalid(() -> r.getFloat(i), Float::isFinite)); + case NUMERIC, DECIMAL -> o.put(columnName, DataTypeUtils.returnNullIfInvalid(() -> r.getBigDecimal(i))); case CHAR, VARCHAR, LONGVARCHAR -> o.put(columnName, r.getString(i)); case DATE -> o.put(columnName, DataTypeUtils.toISO8601String(r.getDate(i))); case TIME -> o.put(columnName, DataTypeUtils.toISO8601String(r.getTime(i))); @@ -153,7 +153,7 @@ private static void putInteger(ObjectNode node, String columnName, ResultSet res try { node.put(columnName, resultSet.getInt(index)); } catch (SQLException e) { - node.put(columnName, DataTypeUtils.nullIfInvalid(() -> resultSet.getLong(index))); + node.put(columnName, DataTypeUtils.returnNullIfInvalid(() -> resultSet.getLong(index))); } } diff --git a/airbyte-integrations/connectors/source-bigquery/src/test/java/io/airbyte/integrations/source/bigquery/BigQuerySourceTests.java b/airbyte-integrations/connectors/source-bigquery/src/test/java/io/airbyte/integrations/source/bigquery/BigQuerySourceTests.java deleted file mode 100644 index 4196237094cc..000000000000 --- a/airbyte-integrations/connectors/source-bigquery/src/test/java/io/airbyte/integrations/source/bigquery/BigQuerySourceTests.java +++ /dev/null @@ -1,51 +0,0 @@ -/* - * MIT License - * - * Copyright (c) 2020 Airbyte - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -package io.airbyte.integrations.source.bigquery; - -import com.fasterxml.jackson.databind.JsonNode; -import io.airbyte.db.Database; -import org.junit.jupiter.api.Test; - -public class BigQuerySourceTests { - - private JsonNode config; - private Database database; - - @Test - public void testSettingTimezones() throws Exception { - // TODO init your container. Ex: "new - // org.testcontainers.containers.MSSQLServerContainer<>("mcr.microsoft.com/mssql/server:2019-latest").acceptLicense();" - // TODO start the container. Ex: "container.start();" - // TODO prepare DB config. Ex: "config = getConfig(container, dbName, - // "serverTimezone=Europe/London");" - // TODO create DB, grant all privileges, etc. - // TODO check connection status. Ex: "AirbyteConnectionStatus check = new - // ScaffoldJavaJdbcGenericSource().check(config);" - // TODO assert connection status. Ex: "assertEquals(AirbyteConnectionStatus.Status.SUCCEEDED, - // check.getStatus());" - // TODO cleanup used resources and close used container. Ex: "container.close();" - } - -} From d64ce0b2efcdb83351770b980b5f44f10eda667d Mon Sep 17 00:00:00 2001 From: Andrii Leonets Date: Wed, 14 Jul 2021 19:02:56 +0300 Subject: [PATCH 46/63] Add possibility to get list of comprehensive tests in a Markdown table format. --- .../source/SourceComprehensiveTest.java | 29 +++++++++++++++++++ .../standardtest/source/TestDataHolder.java | 4 +++ 2 files changed, 33 insertions(+) diff --git a/airbyte-integrations/bases/standard-source-test/src/main/java/io/airbyte/integrations/standardtest/source/SourceComprehensiveTest.java b/airbyte-integrations/bases/standard-source-test/src/main/java/io/airbyte/integrations/standardtest/source/SourceComprehensiveTest.java index be8af318dec7..d014c9b865e8 100644 --- a/airbyte-integrations/bases/standard-source-test/src/main/java/io/airbyte/integrations/standardtest/source/SourceComprehensiveTest.java +++ b/airbyte-integrations/bases/standard-source-test/src/main/java/io/airbyte/integrations/standardtest/source/SourceComprehensiveTest.java @@ -39,6 +39,7 @@ import io.airbyte.protocol.models.JsonSchemaPrimitive; import io.airbyte.protocol.models.SyncMode; import java.util.ArrayList; +import java.util.Collection; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -201,4 +202,32 @@ public void addDataTypeTestData(TestDataHolder test) { test.setTestColumnName(getTestColumnName()); } + private String formatCollection(Collection collection) { + return collection.stream().map(s -> "`" + s + "`").collect(Collectors.joining(", ")); + } + + /** + * Builds a table with all registered test cases with values using Markdown syntax (can be used in + * the github). + * + * @return formatted list of test cases + */ + public String getMarkdownTestTable() { + StringBuilder table = new StringBuilder() + .append("|**Data Type**|**Insert values**|**Expected values**|**Comment**|**Common test result**|\n") + .append("|----|----|----|----|----|\n"); + + testDataHolders.forEach(test -> table.append(String.format("| %s | %s | %s | %s | %s |\n", + test.getSourceType(), + formatCollection(test.getValues()), + formatCollection(test.getExpectedValues()), + "", + "Ok"))); + return table.toString(); + } + + protected void printMarkdownTestTable() { + LOGGER.info(getMarkdownTestTable()); + } + } diff --git a/airbyte-integrations/bases/standard-source-test/src/main/java/io/airbyte/integrations/standardtest/source/TestDataHolder.java b/airbyte-integrations/bases/standard-source-test/src/main/java/io/airbyte/integrations/standardtest/source/TestDataHolder.java index edb2e1820224..a14d85da445e 100644 --- a/airbyte-integrations/bases/standard-source-test/src/main/java/io/airbyte/integrations/standardtest/source/TestDataHolder.java +++ b/airbyte-integrations/bases/standard-source-test/src/main/java/io/airbyte/integrations/standardtest/source/TestDataHolder.java @@ -223,6 +223,10 @@ public List getExpectedValues() { return expectedValues; } + public List getValues() { + return values; + } + public String getNameWithTestPrefix() { return nameSpace + "_" + testNumber + "_" + sourceType; } From 6a5854047563e54845358810a93945a1b960d2af Mon Sep 17 00:00:00 2001 From: Andrii Leonets <30464745+DoNotPanicUA@users.noreply.github.com> Date: Fri, 16 Jul 2021 13:33:24 +0300 Subject: [PATCH 47/63] Update airbyte-integrations/connectors/source-bigquery/src/main/resources/spec.json Co-authored-by: Sherif A. Nada --- .../connectors/source-bigquery/src/main/resources/spec.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/airbyte-integrations/connectors/source-bigquery/src/main/resources/spec.json b/airbyte-integrations/connectors/source-bigquery/src/main/resources/spec.json index 33bee15e6b43..b73d10a80073 100644 --- a/airbyte-integrations/connectors/source-bigquery/src/main/resources/spec.json +++ b/airbyte-integrations/connectors/source-bigquery/src/main/resources/spec.json @@ -18,7 +18,7 @@ }, "dataset_id": { "type": "string", - "description": "Default BigQuery Dataset ID tables are replicated to if the source does not specify a namespace.", + "description": "The BigQuery Dataset ID to look for tables to replicate from.", "title": "Default Dataset ID" }, "credentials_json": { From d6053f92c2d495389a0a72db8178a890b2bca85c Mon Sep 17 00:00:00 2001 From: Andrii Leonets Date: Fri, 16 Jul 2021 14:18:52 +0300 Subject: [PATCH 48/63] review update --- .../io/airbyte/db/bigquery/BigQueryDatabase.java | 14 +++++--------- .../source/bigquery/BigQuerySource.java | 13 ++++++++++--- .../source-bigquery/src/main/resources/spec.json | 2 +- 3 files changed, 16 insertions(+), 13 deletions(-) diff --git a/airbyte-db/src/main/java/io/airbyte/db/bigquery/BigQueryDatabase.java b/airbyte-db/src/main/java/io/airbyte/db/bigquery/BigQueryDatabase.java index db70b14331f4..7b929a15bd60 100644 --- a/airbyte-db/src/main/java/io/airbyte/db/bigquery/BigQueryDatabase.java +++ b/airbyte-db/src/main/java/io/airbyte/db/bigquery/BigQueryDatabase.java @@ -168,20 +168,16 @@ public List
getProjectTables(String projectId) { /** * Returns full information about all tables from specific Dataset * - * @param projectId BigQuery project id * @param datasetId BigQuery dataset id * @return List of BigQuery tables */ - public List
getDatasetTables(String projectId, String datasetId) { + public List
getDatasetTables(String datasetId) { List
tableList = new ArrayList<>(); - bigQuery.listDatasets(projectId) - .iterateAll() - .forEach(dataset -> bigQuery.listTables(dataset.getDatasetId()) + bigQuery.listTables(datasetId) .iterateAll() - .forEach(table -> { - if (table.getTableId().getDataset().equalsIgnoreCase(datasetId)) - tableList.add(bigQuery.getTable(table.getTableId())); - })); + .forEach(table -> + tableList.add(bigQuery.getTable(table.getTableId())) + ); return tableList; } diff --git a/airbyte-integrations/connectors/source-bigquery/src/main/java/io/airbyte/integrations/source/bigquery/BigQuerySource.java b/airbyte-integrations/connectors/source-bigquery/src/main/java/io/airbyte/integrations/source/bigquery/BigQuerySource.java index f150ba6334e2..18f28a5091d5 100644 --- a/airbyte-integrations/connectors/source-bigquery/src/main/java/io/airbyte/integrations/source/bigquery/BigQuerySource.java +++ b/airbyte-integrations/connectors/source-bigquery/src/main/java/io/airbyte/integrations/source/bigquery/BigQuerySource.java @@ -25,7 +25,9 @@ package io.airbyte.integrations.source.bigquery; import com.fasterxml.jackson.databind.JsonNode; -import com.google.cloud.bigquery.*; +import com.google.cloud.bigquery.QueryParameterValue; +import com.google.cloud.bigquery.StandardSQLTypeName; +import com.google.cloud.bigquery.Table; import com.google.common.collect.ImmutableMap; import io.airbyte.commons.functional.CheckedConsumer; import io.airbyte.commons.json.Jsons; @@ -41,7 +43,12 @@ import io.airbyte.integrations.source.relationaldb.TableInfo; import io.airbyte.protocol.models.CommonField; import io.airbyte.protocol.models.JsonSchemaPrimitive; -import java.util.*; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Set; import java.util.stream.Collectors; import java.util.stream.Stream; import org.slf4j.Logger; @@ -112,7 +119,7 @@ protected List>> discoverInternal(Big String projectId = dbConfig.get(CONFIG_PROJECT_ID).asText(); String datasetId = getConfigDatasetId(database); List
tables = - (isDatasetConfigured(database) ? database.getDatasetTables(projectId, getConfigDatasetId(database)) : database.getProjectTables(projectId)); + (isDatasetConfigured(database) ? database.getDatasetTables(getConfigDatasetId(database)) : database.getProjectTables(projectId)); List>> result = new ArrayList<>(); tables.stream().map(table -> TableInfo.>builder() .nameSpace(datasetId) diff --git a/airbyte-integrations/connectors/source-bigquery/src/main/resources/spec.json b/airbyte-integrations/connectors/source-bigquery/src/main/resources/spec.json index b73d10a80073..7a2721f1d549 100644 --- a/airbyte-integrations/connectors/source-bigquery/src/main/resources/spec.json +++ b/airbyte-integrations/connectors/source-bigquery/src/main/resources/spec.json @@ -8,7 +8,7 @@ "$schema": "http://json-schema.org/draft-07/schema#", "title": "BigQuery Destination Spec", "type": "object", - "required": ["project_id", "dataset_id"], + "required": ["project_id", "credentials_json"], "additionalProperties": false, "properties": { "project_id": { From e545247626ca4f058e6aba2080ae4068c3ce7b60 Mon Sep 17 00:00:00 2001 From: Andrii Leonets Date: Fri, 16 Jul 2021 19:25:47 +0300 Subject: [PATCH 49/63] Implement processing for arrays and structures --- .../io/airbyte/db/bigquery/BigQueryUtils.java | 47 ++++++++++++------- .../source/SourceComprehensiveTest.java | 17 ++++--- .../BigQuerySourceComprehensiveTest.java | 20 ++++++++ 3 files changed, 59 insertions(+), 25 deletions(-) diff --git a/airbyte-db/src/main/java/io/airbyte/db/bigquery/BigQueryUtils.java b/airbyte-db/src/main/java/io/airbyte/db/bigquery/BigQueryUtils.java index 6fa255530c55..30db7d4f5045 100644 --- a/airbyte-db/src/main/java/io/airbyte/db/bigquery/BigQueryUtils.java +++ b/airbyte-db/src/main/java/io/airbyte/db/bigquery/BigQueryUtils.java @@ -28,13 +28,13 @@ import static io.airbyte.db.DataTypeUtils.toISO8601String; import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.node.ArrayNode; import com.fasterxml.jackson.databind.node.ObjectNode; import com.google.cloud.bigquery.Field; import com.google.cloud.bigquery.FieldList; import com.google.cloud.bigquery.FieldValue; import com.google.cloud.bigquery.FieldValue.Attribute; import com.google.cloud.bigquery.FieldValueList; -import com.google.cloud.bigquery.LegacySQLTypeName; import com.google.cloud.bigquery.QueryParameterValue; import com.google.cloud.bigquery.StandardSQLTypeName; import io.airbyte.commons.json.Jsons; @@ -62,27 +62,38 @@ public static JsonNode rowToJson(FieldValueList rowValues, FieldList fieldList) return jsonNode; } + private static void fillObjectNode(String fieldName, StandardSQLTypeName fieldType, FieldValue fieldValue, ObjectNode node) { + switch (fieldType) { + case BOOL -> node.put(fieldName, fieldValue.getBooleanValue()); + case INT64 -> node.put(fieldName, fieldValue.getLongValue()); + case FLOAT64 -> node.put(fieldName, fieldValue.getDoubleValue()); + case NUMERIC -> node.put(fieldName, fieldValue.getNumericValue()); + case BIGNUMERIC -> node.put(fieldName, returnNullIfInvalid(fieldValue::getNumericValue)); + case STRING -> node.put(fieldName, fieldValue.getStringValue()); + case BYTES -> node.put(fieldName, fieldValue.getBytesValue()); + case DATE -> node.put(fieldName, toISO8601String(getDateValue(fieldValue, BIG_QUERY_DATE_FORMAT))); + case DATETIME -> node.put(fieldName, toISO8601String(getDateValue(fieldValue, BIG_QUERY_DATETIME_FORMAT))); + case TIMESTAMP -> node.put(fieldName, toISO8601String(fieldValue.getTimestampValue() / 1000)); + case TIME -> node.put(fieldName, fieldValue.getStringValue()); + default -> node.put(fieldName, fieldValue.getStringValue()); + } + } + private static void setJsonField(Field field, FieldValue fieldValue, ObjectNode node) { + String fieldName = field.getName(); if (fieldValue.getAttribute().equals(Attribute.PRIMITIVE)) { - LegacySQLTypeName fieldType = field.getType(); - String fieldName = field.getName(); if (fieldValue.isNull()) { node.put(fieldName, (String) null); - } else - switch (fieldType.getStandardType()) { - case BOOL -> node.put(fieldName, fieldValue.getBooleanValue()); - case INT64 -> node.put(fieldName, fieldValue.getLongValue()); - case FLOAT64 -> node.put(fieldName, fieldValue.getDoubleValue()); - case NUMERIC -> node.put(fieldName, fieldValue.getNumericValue()); - case BIGNUMERIC -> node.put(fieldName, returnNullIfInvalid(fieldValue::getNumericValue)); - case STRING -> node.put(fieldName, fieldValue.getStringValue()); - case BYTES -> node.put(fieldName, fieldValue.getBytesValue()); - case DATE -> node.put(fieldName, toISO8601String(getDateValue(fieldValue, BIG_QUERY_DATE_FORMAT))); - case DATETIME -> node.put(fieldName, toISO8601String(getDateValue(fieldValue, BIG_QUERY_DATETIME_FORMAT))); - case TIMESTAMP -> node.put(fieldName, toISO8601String(fieldValue.getTimestampValue() / 1000)); - case TIME -> node.put(fieldName, fieldValue.getStringValue()); - default -> node.put(fieldName, fieldValue.getStringValue()); - } + } else { + fillObjectNode(fieldName, field.getType().getStandardType(), fieldValue, node); + } + } else if (fieldValue.getAttribute().equals(Attribute.REPEATED)) { + ArrayNode arrayNode = node.putArray(fieldName); + StandardSQLTypeName fieldType = field.getType().getStandardType(); + fieldValue.getRepeatedValue().forEach(arrayFieldValue -> fillObjectNode(fieldName, fieldType, arrayFieldValue, arrayNode.addObject())); + } else if (fieldValue.getAttribute().equals(Attribute.RECORD)) { + ObjectNode newNode = node.putObject(fieldName); + field.getSubFields().forEach(recordField -> setJsonField(recordField, fieldValue.getRecordValue().get(recordField.getName()), newNode)); } } diff --git a/airbyte-integrations/bases/standard-source-test/src/main/java/io/airbyte/integrations/standardtest/source/SourceComprehensiveTest.java b/airbyte-integrations/bases/standard-source-test/src/main/java/io/airbyte/integrations/standardtest/source/SourceComprehensiveTest.java index d014c9b865e8..4ce2128b7cad 100644 --- a/airbyte-integrations/bases/standard-source-test/src/main/java/io/airbyte/integrations/standardtest/source/SourceComprehensiveTest.java +++ b/airbyte-integrations/bases/standard-source-test/src/main/java/io/airbyte/integrations/standardtest/source/SourceComprehensiveTest.java @@ -108,7 +108,6 @@ public void testDataTypes() throws Exception { ConfiguredAirbyteCatalog catalog = getConfiguredCatalog(); List allMessages = runRead(catalog); final List recordMessages = allMessages.stream().filter(m -> m.getType() == Type.RECORD).collect(Collectors.toList()); - Map> expectedValues = new HashMap<>(); testDataHolders.forEach(testDataHolder -> { if (!testDataHolder.getExpectedValues().isEmpty()) @@ -126,15 +125,19 @@ public void testDataTypes() throws Exception { } }); - expectedValues.forEach((streamName, values) -> { - assertTrue(values.isEmpty(), "The streamer " + streamName + " should return all expected values. Missing values: " + values); - }); + expectedValues.forEach((streamName, values) -> + assertTrue(values.isEmpty(), "The streamer " + streamName + " should return all expected values. Missing values: " + values) + ); } private String getValueFromJsonNode(JsonNode jsonNode) { - String value = (jsonNode != null ? jsonNode.asText() : null); - value = (value != null && value.equals("null") ? null : value); - return value; + if (jsonNode != null) { + String nodeText = jsonNode.asText(); + String nodeString = jsonNode.toString(); + String value = (nodeText != null && !nodeText.equals("") ? nodeText : nodeString); + value = (value != null && value.equals("null") ? null : value); + return value; + } else return null; } /** diff --git a/airbyte-integrations/connectors/source-bigquery/src/test-integration/java/io/airbyte/integrations/source/bigquery/BigQuerySourceComprehensiveTest.java b/airbyte-integrations/connectors/source-bigquery/src/test-integration/java/io/airbyte/integrations/source/bigquery/BigQuerySourceComprehensiveTest.java index 0a70639d1321..a3e9f41eb776 100644 --- a/airbyte-integrations/connectors/source-bigquery/src/test-integration/java/io/airbyte/integrations/source/bigquery/BigQuerySourceComprehensiveTest.java +++ b/airbyte-integrations/connectors/source-bigquery/src/test-integration/java/io/airbyte/integrations/source/bigquery/BigQuerySourceComprehensiveTest.java @@ -298,6 +298,26 @@ protected void initTests() { .addInsertValues("TIME(15, 30, 00)", "null") .addExpectedValues("15:30:00", null) .build()); + + addDataTypeTestData( + TestDataHolder.builder() + .sourceType("array") + .fullSourceDataType("array") + .airbyteType(JsonSchemaPrimitive.STRING) + .createTablePatternSql(CREATE_SQL_PATTERN) + .addInsertValues("['a', 'b']") + .addExpectedValues("[{\"test_column\":\"a\"},{\"test_column\":\"b\"}]") + .build()); + + addDataTypeTestData( + TestDataHolder.builder() + .sourceType("struct") + .fullSourceDataType("STRUCT>>") + .airbyteType(JsonSchemaPrimitive.STRING) + .createTablePatternSql(CREATE_SQL_PATTERN) + .addInsertValues("STRUCT('s' as frst, 1 as sec, STRUCT(555 as id_col, STRUCT(TIME(15, 30, 00) as time) as mega_obbj) as obbj)") + .addExpectedValues("{\"frst\":\"s\",\"sec\":1,\"obbj\":{\"id_col\":555,\"mega_obbj\":{\"last_col\":\"15:30:00\"}}}") + .build()); } @Override From 2fd91994f8b4e12c66202860a0f50f41782079e8 Mon Sep 17 00:00:00 2001 From: Andrii Leonets Date: Fri, 16 Jul 2021 19:39:21 +0300 Subject: [PATCH 50/63] format --- .../java/io/airbyte/db/bigquery/BigQueryDatabase.java | 6 ++---- .../standardtest/source/SourceComprehensiveTest.java | 8 ++++---- .../source/bigquery/BigQuerySourceComprehensiveTest.java | 2 +- 3 files changed, 7 insertions(+), 9 deletions(-) diff --git a/airbyte-db/src/main/java/io/airbyte/db/bigquery/BigQueryDatabase.java b/airbyte-db/src/main/java/io/airbyte/db/bigquery/BigQueryDatabase.java index 7b929a15bd60..15cfc38e2e64 100644 --- a/airbyte-db/src/main/java/io/airbyte/db/bigquery/BigQueryDatabase.java +++ b/airbyte-db/src/main/java/io/airbyte/db/bigquery/BigQueryDatabase.java @@ -174,10 +174,8 @@ public List
getProjectTables(String projectId) { public List
getDatasetTables(String datasetId) { List
tableList = new ArrayList<>(); bigQuery.listTables(datasetId) - .iterateAll() - .forEach(table -> - tableList.add(bigQuery.getTable(table.getTableId())) - ); + .iterateAll() + .forEach(table -> tableList.add(bigQuery.getTable(table.getTableId()))); return tableList; } diff --git a/airbyte-integrations/bases/standard-source-test/src/main/java/io/airbyte/integrations/standardtest/source/SourceComprehensiveTest.java b/airbyte-integrations/bases/standard-source-test/src/main/java/io/airbyte/integrations/standardtest/source/SourceComprehensiveTest.java index 4ce2128b7cad..09e19ab44186 100644 --- a/airbyte-integrations/bases/standard-source-test/src/main/java/io/airbyte/integrations/standardtest/source/SourceComprehensiveTest.java +++ b/airbyte-integrations/bases/standard-source-test/src/main/java/io/airbyte/integrations/standardtest/source/SourceComprehensiveTest.java @@ -125,9 +125,8 @@ public void testDataTypes() throws Exception { } }); - expectedValues.forEach((streamName, values) -> - assertTrue(values.isEmpty(), "The streamer " + streamName + " should return all expected values. Missing values: " + values) - ); + expectedValues.forEach((streamName, values) -> assertTrue(values.isEmpty(), + "The streamer " + streamName + " should return all expected values. Missing values: " + values)); } private String getValueFromJsonNode(JsonNode jsonNode) { @@ -137,7 +136,8 @@ private String getValueFromJsonNode(JsonNode jsonNode) { String value = (nodeText != null && !nodeText.equals("") ? nodeText : nodeString); value = (value != null && value.equals("null") ? null : value); return value; - } else return null; + } else + return null; } /** diff --git a/airbyte-integrations/connectors/source-bigquery/src/test-integration/java/io/airbyte/integrations/source/bigquery/BigQuerySourceComprehensiveTest.java b/airbyte-integrations/connectors/source-bigquery/src/test-integration/java/io/airbyte/integrations/source/bigquery/BigQuerySourceComprehensiveTest.java index a3e9f41eb776..a3b91756b53c 100644 --- a/airbyte-integrations/connectors/source-bigquery/src/test-integration/java/io/airbyte/integrations/source/bigquery/BigQuerySourceComprehensiveTest.java +++ b/airbyte-integrations/connectors/source-bigquery/src/test-integration/java/io/airbyte/integrations/source/bigquery/BigQuerySourceComprehensiveTest.java @@ -299,7 +299,7 @@ protected void initTests() { .addExpectedValues("15:30:00", null) .build()); - addDataTypeTestData( + addDataTypeTestData( TestDataHolder.builder() .sourceType("array") .fullSourceDataType("array") From 46f5b3e3cef33e00fbf81f71ec4e6ec8f4752aa0 Mon Sep 17 00:00:00 2001 From: heade Date: Tue, 20 Jul 2021 20:23:39 +0300 Subject: [PATCH 51/63] added bigquery secrets --- .github/workflows/publish-command.yml | 1 + .github/workflows/test-command.yml | 1 + tools/bin/ci_credentials.sh | 1 + 3 files changed, 3 insertions(+) diff --git a/.github/workflows/publish-command.yml b/.github/workflows/publish-command.yml index 86cf91c15608..4233a1b4e74d 100644 --- a/.github/workflows/publish-command.yml +++ b/.github/workflows/publish-command.yml @@ -77,6 +77,7 @@ jobs: SOURCE_AWS_CLOUDTRAIL_CREDS: ${{ secrets.SOURCE_AWS_CLOUDTRAIL_CREDS }} AZURE_STORAGE_INTEGRATION_TEST_CREDS: ${{ secrets.AZURE_STORAGE_INTEGRATION_TEST_CREDS }} BIGQUERY_INTEGRATION_TEST_CREDS: ${{ secrets.BIGQUERY_INTEGRATION_TEST_CREDS }} + BIGQUERY_TEST_CREDS: ${{ secrets.BIGQUERY_TEST_CREDS }} BRAINTREE_TEST_CREDS: ${{ secrets.BRAINTREE_TEST_CREDS }} DESTINATION_PUBSUB_TEST_CREDS: ${{ secrets.DESTINATION_PUBSUB_TEST_CREDS }} DRIFT_INTEGRATION_TEST_CREDS: ${{ secrets.DRIFT_INTEGRATION_TEST_CREDS }} diff --git a/.github/workflows/test-command.yml b/.github/workflows/test-command.yml index f4cc2e8f687a..7398b8ff29df 100644 --- a/.github/workflows/test-command.yml +++ b/.github/workflows/test-command.yml @@ -75,6 +75,7 @@ jobs: AWS_REDSHIFT_INTEGRATION_TEST_CREDS: ${{ secrets.AWS_REDSHIFT_INTEGRATION_TEST_CREDS }} AZURE_STORAGE_INTEGRATION_TEST_CREDS: ${{ secrets.AZURE_STORAGE_INTEGRATION_TEST_CREDS }} BIGQUERY_INTEGRATION_TEST_CREDS: ${{ secrets.BIGQUERY_INTEGRATION_TEST_CREDS }} + BIGQUERY_TEST_CREDS: ${{ secrets.BIGQUERY_TEST_CREDS }} BRAINTREE_TEST_CREDS: ${{ secrets.BRAINTREE_TEST_CREDS }} DESTINATION_PUBSUB_TEST_CREDS: ${{ secrets.DESTINATION_PUBSUB_TEST_CREDS }} DRIFT_INTEGRATION_TEST_CREDS: ${{ secrets.DRIFT_INTEGRATION_TEST_CREDS }} diff --git a/tools/bin/ci_credentials.sh b/tools/bin/ci_credentials.sh index 3f4a438281ca..ed3b6ec99023 100755 --- a/tools/bin/ci_credentials.sh +++ b/tools/bin/ci_credentials.sh @@ -38,6 +38,7 @@ write_standard_creds base-normalization "$AWS_REDSHIFT_INTEGRATION_TEST_CREDS" " write_standard_creds source-amplitude "$AMPLITUDE_INTEGRATION_TEST_CREDS" write_standard_creds source-asana "$SOURCE_ASANA_TEST_CREDS" write_standard_creds source-aws-cloudtrail "$SOURCE_AWS_CLOUDTRAIL_CREDS" +write_standard_creds source-bigquery "$BIGQUERY_TEST_CREDS" write_standard_creds source-braintree-singer "$BRAINTREE_TEST_CREDS" write_standard_creds source-drift "$DRIFT_INTEGRATION_TEST_CREDS" write_standard_creds source-exchange-rates "$EXCHANGE_RATES_TEST_CREDS" From e4934685fdc86c8eef43f552885ffe832e5286b5 Mon Sep 17 00:00:00 2001 From: heade Date: Tue, 20 Jul 2021 21:54:59 +0300 Subject: [PATCH 52/63] added bigquery secrets --- tools/bin/ci_credentials.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/bin/ci_credentials.sh b/tools/bin/ci_credentials.sh index ed3b6ec99023..2c49b26b44b2 100755 --- a/tools/bin/ci_credentials.sh +++ b/tools/bin/ci_credentials.sh @@ -38,7 +38,7 @@ write_standard_creds base-normalization "$AWS_REDSHIFT_INTEGRATION_TEST_CREDS" " write_standard_creds source-amplitude "$AMPLITUDE_INTEGRATION_TEST_CREDS" write_standard_creds source-asana "$SOURCE_ASANA_TEST_CREDS" write_standard_creds source-aws-cloudtrail "$SOURCE_AWS_CLOUDTRAIL_CREDS" -write_standard_creds source-bigquery "$BIGQUERY_TEST_CREDS" +write_standard_creds source-bigquery "$BIGQUERY_TEST_CREDS" "credentials.json" write_standard_creds source-braintree-singer "$BRAINTREE_TEST_CREDS" write_standard_creds source-drift "$DRIFT_INTEGRATION_TEST_CREDS" write_standard_creds source-exchange-rates "$EXCHANGE_RATES_TEST_CREDS" From 05067c328b14835d653bcede6c691ad5147b1d79 Mon Sep 17 00:00:00 2001 From: heade Date: Thu, 22 Jul 2021 09:42:22 +0300 Subject: [PATCH 53/63] spec fix --- .../connectors/source-bigquery/src/main/resources/spec.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/airbyte-integrations/connectors/source-bigquery/src/main/resources/spec.json b/airbyte-integrations/connectors/source-bigquery/src/main/resources/spec.json index 7a2721f1d549..8a9b181d0dc9 100644 --- a/airbyte-integrations/connectors/source-bigquery/src/main/resources/spec.json +++ b/airbyte-integrations/connectors/source-bigquery/src/main/resources/spec.json @@ -3,10 +3,10 @@ "supportsIncremental": true, "supportsNormalization": true, "supportsDBT": true, - "supported_destination_sync_modes": ["overwrite", "append", "append_dedup"], + "supported_sync_modes": ["overwrite", "append", "append_dedup"], "connectionSpecification": { "$schema": "http://json-schema.org/draft-07/schema#", - "title": "BigQuery Destination Spec", + "title": "BigQuery Source Spec", "type": "object", "required": ["project_id", "credentials_json"], "additionalProperties": false, From 449a0b57f22d14671c6d4ac2e61076f7f584132b Mon Sep 17 00:00:00 2001 From: heade Date: Thu, 22 Jul 2021 12:38:18 +0300 Subject: [PATCH 54/63] test configs fix --- .../airbyte/integrations/source/bigquery/BigQuerySource.java | 1 - .../source/bigquery/BigQuerySourceAcceptanceTest.java | 3 +-- .../source/bigquery/BigQuerySourceComprehensiveTest.java | 4 +--- 3 files changed, 2 insertions(+), 6 deletions(-) diff --git a/airbyte-integrations/connectors/source-bigquery/src/main/java/io/airbyte/integrations/source/bigquery/BigQuerySource.java b/airbyte-integrations/connectors/source-bigquery/src/main/java/io/airbyte/integrations/source/bigquery/BigQuerySource.java index 18f28a5091d5..d1b4348e8508 100644 --- a/airbyte-integrations/connectors/source-bigquery/src/main/java/io/airbyte/integrations/source/bigquery/BigQuerySource.java +++ b/airbyte-integrations/connectors/source-bigquery/src/main/java/io/airbyte/integrations/source/bigquery/BigQuerySource.java @@ -60,7 +60,6 @@ public class BigQuerySource extends AbstractRelationalDbSource Date: Tue, 20 Jul 2021 15:20:04 +0300 Subject: [PATCH 55/63] extend mapping for Arrays and Structs --- .../src/main/java/io/airbyte/db/bigquery/BigQueryUtils.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/airbyte-db/src/main/java/io/airbyte/db/bigquery/BigQueryUtils.java b/airbyte-db/src/main/java/io/airbyte/db/bigquery/BigQueryUtils.java index 30db7d4f5045..9306e9908aa0 100644 --- a/airbyte-db/src/main/java/io/airbyte/db/bigquery/BigQueryUtils.java +++ b/airbyte-db/src/main/java/io/airbyte/db/bigquery/BigQueryUtils.java @@ -113,6 +113,8 @@ public static JsonSchemaPrimitive getType(StandardSQLTypeName bigQueryType) { case BOOL -> JsonSchemaPrimitive.BOOLEAN; case INT64, FLOAT64, NUMERIC, BIGNUMERIC -> JsonSchemaPrimitive.NUMBER; case STRING, BYTES, TIMESTAMP, DATE, TIME, DATETIME -> JsonSchemaPrimitive.STRING; + case ARRAY -> JsonSchemaPrimitive.ARRAY; + case STRUCT -> JsonSchemaPrimitive.OBJECT; default -> JsonSchemaPrimitive.STRING; }; } From 365b7616a18f6edd2a4b55f8f6af762c0bfc837d Mon Sep 17 00:00:00 2001 From: Andrii Leonets Date: Mon, 26 Jul 2021 18:21:44 +0300 Subject: [PATCH 56/63] Process nested arrays --- .../io/airbyte/db/bigquery/BigQueryUtils.java | 17 +++++++++-------- .../connectors/source-bigquery/Dockerfile | 2 +- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/airbyte-db/src/main/java/io/airbyte/db/bigquery/BigQueryUtils.java b/airbyte-db/src/main/java/io/airbyte/db/bigquery/BigQueryUtils.java index 9306e9908aa0..09348d0d23c2 100644 --- a/airbyte-db/src/main/java/io/airbyte/db/bigquery/BigQueryUtils.java +++ b/airbyte-db/src/main/java/io/airbyte/db/bigquery/BigQueryUtils.java @@ -75,25 +75,26 @@ private static void fillObjectNode(String fieldName, StandardSQLTypeName fieldTy case DATETIME -> node.put(fieldName, toISO8601String(getDateValue(fieldValue, BIG_QUERY_DATETIME_FORMAT))); case TIMESTAMP -> node.put(fieldName, toISO8601String(fieldValue.getTimestampValue() / 1000)); case TIME -> node.put(fieldName, fieldValue.getStringValue()); + case ARRAY -> { + ArrayNode arrayNode = node.putArray(fieldName); + fieldValue.getRepeatedValue().forEach(arrayValue -> fillObjectNode(fieldName, fieldType, arrayValue, arrayNode.addObject())); + } default -> node.put(fieldName, fieldValue.getStringValue()); } } private static void setJsonField(Field field, FieldValue fieldValue, ObjectNode node) { String fieldName = field.getName(); - if (fieldValue.getAttribute().equals(Attribute.PRIMITIVE)) { + if (fieldValue.getAttribute().equals(Attribute.RECORD)) { + ObjectNode newNode = node.putObject(fieldName); + field.getSubFields().forEach(recordField -> setJsonField(recordField, fieldValue.getRecordValue().get(recordField.getName()), newNode)); + } + else { if (fieldValue.isNull()) { node.put(fieldName, (String) null); } else { fillObjectNode(fieldName, field.getType().getStandardType(), fieldValue, node); } - } else if (fieldValue.getAttribute().equals(Attribute.REPEATED)) { - ArrayNode arrayNode = node.putArray(fieldName); - StandardSQLTypeName fieldType = field.getType().getStandardType(); - fieldValue.getRepeatedValue().forEach(arrayFieldValue -> fillObjectNode(fieldName, fieldType, arrayFieldValue, arrayNode.addObject())); - } else if (fieldValue.getAttribute().equals(Attribute.RECORD)) { - ObjectNode newNode = node.putObject(fieldName); - field.getSubFields().forEach(recordField -> setJsonField(recordField, fieldValue.getRecordValue().get(recordField.getName()), newNode)); } } diff --git a/airbyte-integrations/connectors/source-bigquery/Dockerfile b/airbyte-integrations/connectors/source-bigquery/Dockerfile index cfdaa65dde59..9cdae4cb9f47 100644 --- a/airbyte-integrations/connectors/source-bigquery/Dockerfile +++ b/airbyte-integrations/connectors/source-bigquery/Dockerfile @@ -9,5 +9,5 @@ COPY build/distributions/${APPLICATION}*.tar ${APPLICATION}.tar RUN tar xf ${APPLICATION}.tar --strip-components=1 # Airbyte's build system uses these labels to know what to name and tag the docker images produced by this Dockerfile. -LABEL io.airbyte.version=0.1.0 +LABEL io.airbyte.version=0.1.1 LABEL io.airbyte.name=airbyte/source-bigquery \ No newline at end of file From 4a08717ff5669c4e0f2fe8048c4960171c010864 Mon Sep 17 00:00:00 2001 From: Andrii Leonets Date: Tue, 27 Jul 2021 00:23:55 +0300 Subject: [PATCH 57/63] handle arrays of records properly. --- .../io/airbyte/db/bigquery/BigQueryUtils.java | 33 ++++++++++++++----- .../connectors/source-bigquery/build.gradle | 2 -- .../BigQuerySourceComprehensiveTest.java | 20 +++++++++++ 3 files changed, 44 insertions(+), 11 deletions(-) diff --git a/airbyte-db/src/main/java/io/airbyte/db/bigquery/BigQueryUtils.java b/airbyte-db/src/main/java/io/airbyte/db/bigquery/BigQueryUtils.java index 09348d0d23c2..b3838bdfcf44 100644 --- a/airbyte-db/src/main/java/io/airbyte/db/bigquery/BigQueryUtils.java +++ b/airbyte-db/src/main/java/io/airbyte/db/bigquery/BigQueryUtils.java @@ -75,26 +75,41 @@ private static void fillObjectNode(String fieldName, StandardSQLTypeName fieldTy case DATETIME -> node.put(fieldName, toISO8601String(getDateValue(fieldValue, BIG_QUERY_DATETIME_FORMAT))); case TIMESTAMP -> node.put(fieldName, toISO8601String(fieldValue.getTimestampValue() / 1000)); case TIME -> node.put(fieldName, fieldValue.getStringValue()); - case ARRAY -> { - ArrayNode arrayNode = node.putArray(fieldName); - fieldValue.getRepeatedValue().forEach(arrayValue -> fillObjectNode(fieldName, fieldType, arrayValue, arrayNode.addObject())); - } default -> node.put(fieldName, fieldValue.getStringValue()); } } private static void setJsonField(Field field, FieldValue fieldValue, ObjectNode node) { String fieldName = field.getName(); - if (fieldValue.getAttribute().equals(Attribute.RECORD)) { - ObjectNode newNode = node.putObject(fieldName); - field.getSubFields().forEach(recordField -> setJsonField(recordField, fieldValue.getRecordValue().get(recordField.getName()), newNode)); - } - else { + if (fieldValue.getAttribute().equals(Attribute.PRIMITIVE)) { if (fieldValue.isNull()) { node.put(fieldName, (String) null); } else { fillObjectNode(fieldName, field.getType().getStandardType(), fieldValue, node); } + } else if (fieldValue.getAttribute().equals(Attribute.REPEATED)) { + ArrayNode arrayNode = node.putArray(fieldName); + StandardSQLTypeName fieldType = field.getType().getStandardType(); + FieldList subFields = field.getSubFields(); + // Array of primitive + if (subFields == null || subFields.isEmpty()) { + fieldValue.getRepeatedValue().forEach(arrayFieldValue -> fillObjectNode(fieldName, fieldType, arrayFieldValue, arrayNode.addObject())); + // Array of records + } else { + for (FieldValue arrayFieldValue : fieldValue.getRepeatedValue()) { + int count = 0; // named get doesn't work here for some reasons. + ObjectNode newNode = arrayNode.addObject(); + for (Field repeatedField : subFields) { + setJsonField(repeatedField, arrayFieldValue.getRecordValue().get(count++), + newNode); + } + } + } + } else if (fieldValue.getAttribute().equals(Attribute.RECORD)) { + ObjectNode newNode = node.putObject(fieldName); + field.getSubFields().forEach(recordField -> { + setJsonField(recordField, fieldValue.getRecordValue().get(recordField.getName()), newNode); + }); } } diff --git a/airbyte-integrations/connectors/source-bigquery/build.gradle b/airbyte-integrations/connectors/source-bigquery/build.gradle index 0214905cf428..33e45955f719 100644 --- a/airbyte-integrations/connectors/source-bigquery/build.gradle +++ b/airbyte-integrations/connectors/source-bigquery/build.gradle @@ -17,8 +17,6 @@ dependencies { implementation project(':airbyte-integrations:connectors:source-jdbc') implementation project(':airbyte-integrations:connectors:source-relational-db') - //TODO Add jdbc driver import here. Ex: implementation 'com.microsoft.sqlserver:mssql-jdbc:8.4.1.jre14' - testImplementation testFixtures(project(':airbyte-integrations:connectors:source-jdbc')) testImplementation 'org.apache.commons:commons-lang3:3.11' diff --git a/airbyte-integrations/connectors/source-bigquery/src/test-integration/java/io/airbyte/integrations/source/bigquery/BigQuerySourceComprehensiveTest.java b/airbyte-integrations/connectors/source-bigquery/src/test-integration/java/io/airbyte/integrations/source/bigquery/BigQuerySourceComprehensiveTest.java index ff5e46a68dd0..92702bf472b0 100644 --- a/airbyte-integrations/connectors/source-bigquery/src/test-integration/java/io/airbyte/integrations/source/bigquery/BigQuerySourceComprehensiveTest.java +++ b/airbyte-integrations/connectors/source-bigquery/src/test-integration/java/io/airbyte/integrations/source/bigquery/BigQuerySourceComprehensiveTest.java @@ -316,6 +316,26 @@ protected void initTests() { .addInsertValues("STRUCT('s' as frst, 1 as sec, STRUCT(555 as id_col, STRUCT(TIME(15, 30, 00) as time) as mega_obbj) as obbj)") .addExpectedValues("{\"frst\":\"s\",\"sec\":1,\"obbj\":{\"id_col\":555,\"mega_obbj\":{\"last_col\":\"15:30:00\"}}}") .build()); + + addDataTypeTestData( + TestDataHolder.builder() + .sourceType("array") + .fullSourceDataType("array>") + .airbyteType(JsonSchemaPrimitive.STRING) + .createTablePatternSql(CREATE_SQL_PATTERN) + .addInsertValues("[STRUCT('qqq' as fff, 1 as ggg), STRUCT('kkk' as fff, 2 as ggg)]") + .addExpectedValues("[{\"fff\":\"qqq\",\"ggg\":1},{\"fff\":\"kkk\",\"ggg\":2}]") + .build()); + + addDataTypeTestData( + TestDataHolder.builder() + .sourceType("array") + .fullSourceDataType("array>>>") + .airbyteType(JsonSchemaPrimitive.STRING) + .createTablePatternSql(CREATE_SQL_PATTERN) + .addInsertValues("[STRUCT('qqq' as fff, [STRUCT('fff' as ooo, 1 as kkk), STRUCT('hhh' as ooo, 2 as kkk)] as ggg)]") + .addExpectedValues("[{\"fff\":\"qqq\",\"ggg\":[{\"ooo\":\"fff\",\"kkk\":1},{\"ooo\":\"hhh\",\"kkk\":2}]}]") + .build()); } @Override From 80d541d451ed2bf08ffc1e2ae717aaccf2d462f7 Mon Sep 17 00:00:00 2001 From: Andrii Leonets Date: Tue, 27 Jul 2021 01:12:38 +0300 Subject: [PATCH 58/63] format --- .../src/main/java/io/airbyte/db/bigquery/BigQueryUtils.java | 2 +- .../source/bigquery/BigQuerySourceComprehensiveTest.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/airbyte-db/src/main/java/io/airbyte/db/bigquery/BigQueryUtils.java b/airbyte-db/src/main/java/io/airbyte/db/bigquery/BigQueryUtils.java index b3838bdfcf44..6d0b49aca80b 100644 --- a/airbyte-db/src/main/java/io/airbyte/db/bigquery/BigQueryUtils.java +++ b/airbyte-db/src/main/java/io/airbyte/db/bigquery/BigQueryUtils.java @@ -94,7 +94,7 @@ private static void setJsonField(Field field, FieldValue fieldValue, ObjectNode // Array of primitive if (subFields == null || subFields.isEmpty()) { fieldValue.getRepeatedValue().forEach(arrayFieldValue -> fillObjectNode(fieldName, fieldType, arrayFieldValue, arrayNode.addObject())); - // Array of records + // Array of records } else { for (FieldValue arrayFieldValue : fieldValue.getRepeatedValue()) { int count = 0; // named get doesn't work here for some reasons. diff --git a/airbyte-integrations/connectors/source-bigquery/src/test-integration/java/io/airbyte/integrations/source/bigquery/BigQuerySourceComprehensiveTest.java b/airbyte-integrations/connectors/source-bigquery/src/test-integration/java/io/airbyte/integrations/source/bigquery/BigQuerySourceComprehensiveTest.java index 92702bf472b0..9068da62dd09 100644 --- a/airbyte-integrations/connectors/source-bigquery/src/test-integration/java/io/airbyte/integrations/source/bigquery/BigQuerySourceComprehensiveTest.java +++ b/airbyte-integrations/connectors/source-bigquery/src/test-integration/java/io/airbyte/integrations/source/bigquery/BigQuerySourceComprehensiveTest.java @@ -317,7 +317,7 @@ protected void initTests() { .addExpectedValues("{\"frst\":\"s\",\"sec\":1,\"obbj\":{\"id_col\":555,\"mega_obbj\":{\"last_col\":\"15:30:00\"}}}") .build()); - addDataTypeTestData( + addDataTypeTestData( TestDataHolder.builder() .sourceType("array") .fullSourceDataType("array>") From 5c8c65ff4d15b34a011272c559bf8e436f6f689d Mon Sep 17 00:00:00 2001 From: Andrii Leonets Date: Tue, 27 Jul 2021 18:01:03 +0300 Subject: [PATCH 59/63] BigQuery source docs --- .../connectors/source-bigquery/README.md | 21 +++++ docs/SUMMARY.md | 1 + docs/integrations/sources/bigquery.md | 92 +++++++++++++++++++ 3 files changed, 114 insertions(+) create mode 100644 airbyte-integrations/connectors/source-bigquery/README.md create mode 100644 docs/integrations/sources/bigquery.md diff --git a/airbyte-integrations/connectors/source-bigquery/README.md b/airbyte-integrations/connectors/source-bigquery/README.md new file mode 100644 index 000000000000..776d3f58a293 --- /dev/null +++ b/airbyte-integrations/connectors/source-bigquery/README.md @@ -0,0 +1,21 @@ +# BigQuery Test Configuration + +In order to test the BigQuery source, you need a service account key file. + +## Community Contributor + +As a community contributor, you will need access to a GCP project and BigQuery to run tests. + +1. Go to the `Service Accounts` page on the GCP console +1. Click on `+ Create Service Account" button +1. Fill out a descriptive name/id/description +1. Click the edit icon next to the service account you created on the `IAM` page +1. Add the `BigQuery Data Editor` and `BigQuery User` role +1. Go back to the `Service Accounts` page and use the actions modal to `Create Key` +1. Download this key as a JSON file +1. Move and rename this file to `secrets/credentials.json` + +## Airbyte Employee + +1. Access the `BigQuery Integration Test User` secret on Rippling under the `Engineering` folder +1. Create a file with the contents at `secrets/credentials.json` diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md index 65cd95e3104e..9280d7ecc4aa 100644 --- a/docs/SUMMARY.md +++ b/docs/SUMMARY.md @@ -33,6 +33,7 @@ * [Asana](integrations/sources/asana.md) * [AWS CloudTrail](integrations/sources/aws-cloudtrail.md) * [Braintree](integrations/sources/braintree.md) + * [BigQuery](integrations/sources/bigquery.md) * [Cart](integrations/sources/cart.md) * [ClickHouse](integrations/sources/clickhouse.md) * [CockroachDB](integrations/sources/cockroachdb.md) diff --git a/docs/integrations/sources/bigquery.md b/docs/integrations/sources/bigquery.md new file mode 100644 index 000000000000..38afe6cb950f --- /dev/null +++ b/docs/integrations/sources/bigquery.md @@ -0,0 +1,92 @@ +--- +description: >- + BigQuery is a serverless, highly scalable, and cost-effective data warehouse + offered by Google Cloud Provider. +--- + +# BigQuery + +## Overview + +The BigQuery source supports both Full Refresh and Incremental syncs. You can choose if this connector will copy only the new or updated data, or all rows in the tables and columns you set up for replication, every time a sync is running. + +### Resulting schema + +The BigQuery source does not alter the schema present in your database. Depending on the destination connected to this source, however, the schema may be altered. See the destination's documentation for more details. + +### Data type mapping + +The BigQuery data types mapping: + +| CockroachDb Type | Resulting Type | Notes | +| :--- | :--- | :--- | +| `BOOL` | Boolean | | +| `INT64` | Number | | +| `FLOAT64` | Number | | +| `NUMERIC` | Number | | +| `BIGNUMERIC` | Number | | +| `STRING` | String | | +| `BYTES` | String | | +| `DATE` | String | In ISO8601 format | +| `DATETIME` | String | In ISO8601 format | +| `TIMESTAMP` | String | In ISO8601 format | +| `TIME` | String | | +| `ARRAY` | Array | | +| `STRUCT` | Object | | +| `GEOGRAPHY` | String | | + +### Features + +| Feature | Supported | Notes | +| :--- | :--- | :--- | +| Full Refresh Sync | Yes | | +| Incremental Sync| Yes | | +| Change Data Capture | No | | +| SSL Support | Yes | | + +## Getting started + +### Requirements + +To use the BigQuery source, you'll need: + +* A Google Cloud Project with BigQuery enabled +* A Google Cloud Service Account with the "BigQuery User" and "BigQuery Data Editor" roles in your GCP project +* A Service Account Key to authenticate into your Service Account + +See the setup guide for more information about how to create the required resources. + +#### Service account + +In order for Airbyte to sync data from BigQuery, it needs credentials for a [Service Account](https://cloud.google.com/iam/docs/service-accounts) with the "BigQuery User" and "BigQuery Data Editor" roles, which grants permissions to run BigQuery jobs, write to BigQuery Datasets, and read table metadata. We highly recommend that this Service Account is exclusive to Airbyte for ease of permissioning and auditing. However, you can use a pre-existing Service Account if you already have one with the correct permissions. + +The easiest way to create a Service Account is to follow GCP's guide for [Creating a Service Account](https://cloud.google.com/iam/docs/creating-managing-service-accounts). Once you've created the Service Account, make sure to keep its ID handy as you will need to reference it when granting roles. Service Account IDs typically take the form `@.iam.gserviceaccount.com` + +Then, add the service account as a Member in your Google Cloud Project with the "BigQuery User" role. To do this, follow the instructions for [Granting Access](https://cloud.google.com/iam/docs/granting-changing-revoking-access#granting-console) in the Google documentation. The email address of the member you are adding is the same as the Service Account ID you just created. + +At this point you should have a service account with the "BigQuery User" project-level permission. + +#### Service account key + +Service Account Keys are used to authenticate as Google Service Accounts. For Airbyte to leverage the permissions you granted to the Service Account in the previous step, you'll need to provide its Service Account Keys. See the [Google documentation](https://cloud.google.com/iam/docs/service-accounts#service_account_keys) for more information about Keys. + +Follow the [Creating and Managing Service Account Keys](https://cloud.google.com/iam/docs/creating-managing-service-account-keys) guide to create a key. Airbyte currently supports JSON Keys only, so make sure you create your key in that format. As soon as you created the key, make sure to download it, as that is the only time Google will allow you to see its contents. Once you've successfully configured BigQuery as a destination in Airbyte, delete this key from your computer. + +### Setup the BigQuery source in Airbyte + +You should now have all the requirements needed to configure BigQuery as a source in the UI. You'll need the following information to configure the BigQuery destination: + +* **Project ID** +* **Default Dataset ID [Optional]**: the schema name if only one schema is interested. Dramatically boost source discover operation. +* **Credentials JSON**: the contents of your Service Account Key JSON file + +Once you've configured BigQuery as a source, delete the Service Account Key from your computer. + +## CHANGELOG + +### source-bigquery + +| Version | Date | Pull Request | Subject | +| :--- | :--- | :--- | :--- | +| 0.1.0 | 2021-07-22 | [#4457](https://github.com/airbytehq/airbyte/pull/4457) | 🎉 New Source: Big Query. | +| 0.1.1 | 2021-07-28 | [#4981](https://github.com/airbytehq/airbyte/pull/4981) | 🐛 BigQuery source: Fix nested arrays | \ No newline at end of file From fd957ab52ff75ecf0758e48bc569f74b580ddb0a Mon Sep 17 00:00:00 2001 From: Andrii Leonets Date: Tue, 27 Jul 2021 18:09:42 +0300 Subject: [PATCH 60/63] docs readme update --- docs/integrations/README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/integrations/README.md b/docs/integrations/README.md index c0129f9ccc7d..832032e0b451 100644 --- a/docs/integrations/README.md +++ b/docs/integrations/README.md @@ -18,6 +18,7 @@ Airbyte uses a grading system for connectors to help users understand what to ex |[Asana](./sources/asana.md) | Beta | |[AWS CloudTrail](./sources/aws-cloudtrail.md)| Beta | |[Braintree](./sources/braintree.md)| Alpha | +|[BigQuery](./sources/bigquery.md)| Beta | |[Cart](./sources/cart.md)| Beta | |[ClickHouse](./sources/clickhouse.md)| Beta | |[CockroachDB](./sources/cockroachdb.md)| Beta | From 8d76778d97bf11703c22ff4590545063660aa92b Mon Sep 17 00:00:00 2001 From: Andrii Leonets Date: Tue, 27 Jul 2021 18:12:26 +0300 Subject: [PATCH 61/63] hide evidences --- docs/integrations/sources/bigquery.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/integrations/sources/bigquery.md b/docs/integrations/sources/bigquery.md index 38afe6cb950f..f9ec76a4a634 100644 --- a/docs/integrations/sources/bigquery.md +++ b/docs/integrations/sources/bigquery.md @@ -70,11 +70,11 @@ At this point you should have a service account with the "BigQuery User" project Service Account Keys are used to authenticate as Google Service Accounts. For Airbyte to leverage the permissions you granted to the Service Account in the previous step, you'll need to provide its Service Account Keys. See the [Google documentation](https://cloud.google.com/iam/docs/service-accounts#service_account_keys) for more information about Keys. -Follow the [Creating and Managing Service Account Keys](https://cloud.google.com/iam/docs/creating-managing-service-account-keys) guide to create a key. Airbyte currently supports JSON Keys only, so make sure you create your key in that format. As soon as you created the key, make sure to download it, as that is the only time Google will allow you to see its contents. Once you've successfully configured BigQuery as a destination in Airbyte, delete this key from your computer. +Follow the [Creating and Managing Service Account Keys](https://cloud.google.com/iam/docs/creating-managing-service-account-keys) guide to create a key. Airbyte currently supports JSON Keys only, so make sure you create your key in that format. As soon as you created the key, make sure to download it, as that is the only time Google will allow you to see its contents. Once you've successfully configured BigQuery as a source in Airbyte, delete this key from your computer. ### Setup the BigQuery source in Airbyte -You should now have all the requirements needed to configure BigQuery as a source in the UI. You'll need the following information to configure the BigQuery destination: +You should now have all the requirements needed to configure BigQuery as a source in the UI. You'll need the following information to configure the BigQuery source: * **Project ID** * **Default Dataset ID [Optional]**: the schema name if only one schema is interested. Dramatically boost source discover operation. From 8f5983718c5a0f33b769bd194cadd894905fdc6c Mon Sep 17 00:00:00 2001 From: Andrii Leonets Date: Tue, 27 Jul 2021 19:09:37 +0300 Subject: [PATCH 62/63] fix changlog order --- docs/integrations/sources/bigquery.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/integrations/sources/bigquery.md b/docs/integrations/sources/bigquery.md index f9ec76a4a634..e49778f32e1f 100644 --- a/docs/integrations/sources/bigquery.md +++ b/docs/integrations/sources/bigquery.md @@ -88,5 +88,5 @@ Once you've configured BigQuery as a source, delete the Service Account Key from | Version | Date | Pull Request | Subject | | :--- | :--- | :--- | :--- | -| 0.1.0 | 2021-07-22 | [#4457](https://github.com/airbytehq/airbyte/pull/4457) | 🎉 New Source: Big Query. | -| 0.1.1 | 2021-07-28 | [#4981](https://github.com/airbytehq/airbyte/pull/4981) | 🐛 BigQuery source: Fix nested arrays | \ No newline at end of file +| 0.1.1 | 2021-07-28 | [#4981](https://github.com/airbytehq/airbyte/pull/4981) | 🐛 BigQuery source: Fix nested arrays | +| 0.1.0 | 2021-07-22 | [#4457](https://github.com/airbytehq/airbyte/pull/4457) | 🎉 New Source: Big Query. | \ No newline at end of file From 864580ab1eb1a3634dcaab6d953b42c388909649 Mon Sep 17 00:00:00 2001 From: Andrii Leonets Date: Tue, 27 Jul 2021 19:22:47 +0300 Subject: [PATCH 63/63] Add bigquery to source_defintions yaml --- .../bfd1ddf8-ae8a-4620-b1d7-55597d2ba08c.json | 7 +++++++ .../init/src/main/resources/seed/source_definitions.yaml | 5 +++++ 2 files changed, 12 insertions(+) create mode 100644 airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/bfd1ddf8-ae8a-4620-b1d7-55597d2ba08c.json diff --git a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/bfd1ddf8-ae8a-4620-b1d7-55597d2ba08c.json b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/bfd1ddf8-ae8a-4620-b1d7-55597d2ba08c.json new file mode 100644 index 000000000000..2a89ab26cc66 --- /dev/null +++ b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/bfd1ddf8-ae8a-4620-b1d7-55597d2ba08c.json @@ -0,0 +1,7 @@ +{ + "sourceDefinitionId": "bfd1ddf8-ae8a-4620-b1d7-55597d2ba08c", + "name": "BigQuery", + "dockerRepository": "airbyte/source-bigquery", + "dockerImageTag": "0.1.1", + "documentationUrl": "https://docs.airbyte.io/integrations/sources/bigquery" +} diff --git a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml index bf5841124ace..5da1a12e9c58 100644 --- a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml @@ -404,3 +404,8 @@ dockerRepository: airbyte/source-prestashop dockerImageTag: 0.1.0 documentationUrl: https://docs.airbyte.io/integrations/sources/prestashop +- sourceDefinitionId: bfd1ddf8-ae8a-4620-b1d7-55597d2ba08c + name: BigQuery + dockerRepository: airbyte/source-bigquery + dockerImageTag: 0.1.1 + documentationUrl: https://docs.airbyte.io/integrations/sources/bigquery