From 2aa4aa5261685872a647dc72f07fef0c40455b7c Mon Sep 17 00:00:00 2001 From: Edward Gao Date: Wed, 14 Feb 2024 14:06:29 -0800 Subject: [PATCH 01/33] snowflake stuff --- .../destination-snowflake/build.gradle | 3 +- .../SnowflakeInternalStagingDestination.java | 12 ++- .../SnowflakeDestinationHandler.java | 3 +- .../ExtractedAtUtcTimezoneMigration.kt | 84 +++++++++++++++++++ .../migrations/SnowflakeState.kt | 16 ++++ .../AbstractSnowflakeTypingDedupingTest.java | 31 +++++++ .../SnowflakeSqlGeneratorIntegrationTest.java | 3 +- ...at_sync1_expectedrecords_dedup_final.jsonl | 5 ++ ...tracted_at_sync1_expectedrecords_raw.jsonl | 6 ++ ...orchange_expectedrecords_dedup_final.jsonl | 6 +- ...rsorchange_expectedrecords_dedup_raw.jsonl | 8 +- .../sync1_expectedrecords_dedup_final.jsonl | 8 +- .../sync1_expectedrecords_dedup_final2.jsonl | 2 +- ...sync1_expectedrecords_nondedup_final.jsonl | 10 +-- .../dat/sync1_expectedrecords_raw.jsonl | 10 +-- .../dat/sync1_expectedrecords_raw2.jsonl | 2 +- ...ectedrecords_incremental_dedup_final.jsonl | 4 +- ...xpectedrecords_incremental_dedup_raw.jsonl | 14 ++-- ...ctedrecords_fullrefresh_append_final.jsonl | 16 ++-- ...drecords_fullrefresh_overwrite_final.jsonl | 6 +- ...tedrecords_fullrefresh_overwrite_raw.jsonl | 6 +- ...ectedrecords_incremental_dedup_final.jsonl | 6 +- ...ctedrecords_incremental_dedup_final2.jsonl | 2 +- .../dat/sync2_expectedrecords_raw.jsonl | 16 ++-- .../dat/sync2_expectedrecords_raw2.jsonl | 4 +- 25 files changed, 218 insertions(+), 65 deletions(-) create mode 100644 airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/typing_deduping/migrations/ExtractedAtUtcTimezoneMigration.kt create mode 100644 airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/typing_deduping/migrations/SnowflakeState.kt create mode 100644 airbyte-integrations/connectors/destination-snowflake/src/test-integration/resources/dat/ltz_extracted_at_sync1_expectedrecords_dedup_final.jsonl create mode 100644 airbyte-integrations/connectors/destination-snowflake/src/test-integration/resources/dat/ltz_extracted_at_sync1_expectedrecords_raw.jsonl diff --git a/airbyte-integrations/connectors/destination-snowflake/build.gradle b/airbyte-integrations/connectors/destination-snowflake/build.gradle index b84e054c0609..43c25808d469 100644 --- a/airbyte-integrations/connectors/destination-snowflake/build.gradle +++ b/airbyte-integrations/connectors/destination-snowflake/build.gradle @@ -1,11 +1,12 @@ plugins { id 'airbyte-java-connector' + id 'org.jetbrains.kotlin.jvm' version '1.9.22' } airbyteJavaConnector { cdkVersionRequired = '0.23.2' features = ['db-destinations', 's3-destinations', 'typing-deduping'] - useLocalCdk = false + useLocalCdk = true } java { diff --git a/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/SnowflakeInternalStagingDestination.java b/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/SnowflakeInternalStagingDestination.java index 253212ecf628..0edfad4399d0 100644 --- a/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/SnowflakeInternalStagingDestination.java +++ b/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/SnowflakeInternalStagingDestination.java @@ -27,11 +27,13 @@ import io.airbyte.integrations.destination.snowflake.typing_deduping.SnowflakeSqlGenerator; import io.airbyte.integrations.destination.snowflake.typing_deduping.SnowflakeV1V2Migrator; import io.airbyte.integrations.destination.snowflake.typing_deduping.SnowflakeV2TableMigrator; +import io.airbyte.integrations.destination.snowflake.typing_deduping.migrations.ExtractedAtUtcTimezoneMigration; import io.airbyte.protocol.models.v0.AirbyteConnectionStatus; import io.airbyte.protocol.models.v0.AirbyteMessage; import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog; import io.airbyte.protocol.models.v0.ConfiguredAirbyteStream; import java.util.Collections; +import java.util.List; import java.util.Map; import java.util.Optional; import java.util.UUID; @@ -163,10 +165,16 @@ public SerializedAirbyteMessageConsumer getSerializedMessageConsumer(final JsonN final SnowflakeV2TableMigrator v2TableMigrator = new SnowflakeV2TableMigrator(database, databaseName, sqlGenerator, snowflakeDestinationHandler); final boolean disableTypeDedupe = config.has(DISABLE_TYPE_DEDUPE) && config.get(DISABLE_TYPE_DEDUPE).asBoolean(false); if (disableTypeDedupe) { - typerDeduper = new NoOpTyperDeduperWithV1V2Migrations(sqlGenerator, snowflakeDestinationHandler, parsedCatalog, migrator, v2TableMigrator); + typerDeduper = new NoOpTyperDeduperWithV1V2Migrations<>(sqlGenerator, snowflakeDestinationHandler, parsedCatalog, migrator, v2TableMigrator); } else { typerDeduper = - new DefaultTyperDeduper(sqlGenerator, snowflakeDestinationHandler, parsedCatalog, migrator, v2TableMigrator); + new DefaultTyperDeduper<>( + sqlGenerator, + snowflakeDestinationHandler, + parsedCatalog, + migrator, + v2TableMigrator, + List.of(new ExtractedAtUtcTimezoneMigration(database))); } return StagingConsumerFactory.builder( diff --git a/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/typing_deduping/SnowflakeDestinationHandler.java b/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/typing_deduping/SnowflakeDestinationHandler.java index 5bfeb5d6b25e..296509e9aef6 100644 --- a/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/typing_deduping/SnowflakeDestinationHandler.java +++ b/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/typing_deduping/SnowflakeDestinationHandler.java @@ -24,6 +24,7 @@ import io.airbyte.integrations.base.destination.typing_deduping.Sql; import io.airbyte.integrations.base.destination.typing_deduping.StreamConfig; import io.airbyte.integrations.base.destination.typing_deduping.StreamId; +import io.airbyte.integrations.destination.snowflake.typing_deduping.migrations.SnowflakeState; import io.airbyte.integrations.base.destination.typing_deduping.Struct; import io.airbyte.integrations.base.destination.typing_deduping.Union; import io.airbyte.integrations.base.destination.typing_deduping.UnsupportedOneOf; @@ -43,7 +44,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -public class SnowflakeDestinationHandler extends JdbcDestinationHandler { +public class SnowflakeDestinationHandler extends JdbcDestinationHandler { private static final Logger LOGGER = LoggerFactory.getLogger(SnowflakeDestinationHandler.class); public static final String EXCEPTION_COMMON_PREFIX = "JavaScript execution error: Uncaught Execution of multiple statements failed on statement"; diff --git a/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/typing_deduping/migrations/ExtractedAtUtcTimezoneMigration.kt b/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/typing_deduping/migrations/ExtractedAtUtcTimezoneMigration.kt new file mode 100644 index 000000000000..57d8fb3b46de --- /dev/null +++ b/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/typing_deduping/migrations/ExtractedAtUtcTimezoneMigration.kt @@ -0,0 +1,84 @@ +package io.airbyte.integrations.destination.snowflake.typing_deduping.migrations + +import com.fasterxml.jackson.databind.JsonNode +import com.fasterxml.jackson.databind.node.ObjectNode +import io.airbyte.cdk.db.jdbc.JdbcDatabase +import io.airbyte.cdk.integrations.base.JavaBaseConstants +import io.airbyte.commons.json.Jsons +import io.airbyte.integrations.base.destination.typing_deduping.* +import io.airbyte.integrations.base.destination.typing_deduping.migrators.Migration +import org.jooq.Field +import org.jooq.conf.ParamType +import org.jooq.impl.DSL.* +import org.slf4j.Logger +import org.slf4j.LoggerFactory + +/* +Slightly sketchy to accept a JdbcDatabase here. Migrations should generally prefer to use the +DestinationHandler argument in [migrateIfNecessary] to execute SQL, and DestinationInitialState for +deciding whether a migration is necessary. However, in this case, we need to actually query for data +in the raw table. There's no performance win to doing this via DestinationHandler.gatherInitialState, +since we need to query each table separately anyway. So we just take the database here. However, we +_do_ still use destinationHandler.execute, because that gives us debug SQL logs for when we run the +actual migration. +*/ +class ExtractedAtUtcTimezoneMigration(private val database: JdbcDatabase) : Migration { + private val logger: Logger = LoggerFactory.getLogger(ExtractedAtUtcTimezoneMigration::class.java) + + override fun requireMigration(state: SnowflakeState): Boolean { + return !state.extractedAtInUtc + } + + override fun migrateIfNecessary(destinationHandler: DestinationHandler, stream: StreamConfig, state: DestinationInitialState): Migration.MigrationResult { + if (!state.initialRawTableState.rawTableExists) { + // The raw table doesn't exist. No migration necessary. Update the state. + logger.info("Skipping ExtractedAtUtcTimezoneMigration for ${state.streamConfig.id.originalNamespace}.${state.streamConfig.id.originalName} because the raw table doesn't exist") + return Migration.MigrationResult(state.destinationState.copy(extractedAtInUtc = true), false) + } + + val rawRecordTimezone: JsonNode? = database.queryJsons( + { connection -> + connection.prepareStatement( + select( + field(sql("extract(timezone_hour from \"_airbyte_extracted_at\")")).`as`("tzh"), + field(sql("extract(timezone_minute from \"_airbyte_extracted_at\")")).`as`("tzm") + ).from(table(quotedName(stream.id().rawNamespace, stream.id().rawName))) + .limit(1) + .getSQL(ParamType.INLINED)) + }, + { rs -> + (Jsons.emptyObject() as ObjectNode) + .put("tzh", rs.getInt("tzh")) + .put("tzm", rs.getInt("tzm")) + } + ).first() + if (rawRecordTimezone == null + || (rawRecordTimezone.get("tzh").intValue() == 0 && rawRecordTimezone.get("tzm").intValue() == 0)) { + // There are no raw records, or the raw records are already in UTC. No migration necessary. Update the state. + logger.info("Skipping ExtractedAtUtcTimezoneMigration for ${state.streamConfig.id.originalNamespace}.${state.streamConfig.id.originalName} because the raw table doesn't contain records needing migration.") + return Migration.MigrationResult(state.destinationState.copy(extractedAtInUtc = true), false) + } + + logger.info("Executing ExtractedAtUtcTimezoneMigration for ${state.streamConfig.id.originalNamespace}.${state.streamConfig.id.originalName} for real.") + + destinationHandler.execute(Sql.of( + update(table(quotedName(stream.id().rawNamespace, stream.id().rawName))) + .set( + field(quotedName(JavaBaseConstants.COLUMN_NAME_AB_EXTRACTED_AT)), + // this is the easiest way to forcibly set the offset on a timestamptz. + // We convert to timestamp_ntz to remove the offset, + // then convert to string and append a 'Z' offset, + // then convert back to timestamp_tz. + // We _could_ go through convert_timezone and manually add a negative offset number of hours + // but that's a lot more work for no real benefit. + field(sql(""" + cast(cast(cast("_airbyte_extracted_at" as timestampntz) as string) || 'Z' as timestamptz) + """.trimIndent())) as Any + ).getSQL(ParamType.INLINED)) + ) + + // Invalidate the initial state - we've modified all the extracted_at timestamps, so need to refetch them. + return Migration.MigrationResult(state.destinationState.copy(needsSoftReset = true, extractedAtInUtc = true), true) + } + +} diff --git a/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/typing_deduping/migrations/SnowflakeState.kt b/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/typing_deduping/migrations/SnowflakeState.kt new file mode 100644 index 000000000000..bf40925dc4a2 --- /dev/null +++ b/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/typing_deduping/migrations/SnowflakeState.kt @@ -0,0 +1,16 @@ +package io.airbyte.integrations.destination.snowflake.typing_deduping.migrations + +import io.airbyte.integrations.base.destination.typing_deduping.migrators.MinimumDestinationState + +// Note the nonnullable fields. Even though the underlying storage medium (a JSON blob) supports +// nullability, we don't want to deal with that in our codebase. +data class SnowflakeState(val needsSoftReset: Boolean, + val extractedAtInUtc: Boolean): MinimumDestinationState { + override fun needsSoftReset(): Boolean { + return needsSoftReset + } + + override fun withSoftReset(needsSoftReset: Boolean): T { + return copy(needsSoftReset = needsSoftReset) as T + } +} diff --git a/airbyte-integrations/connectors/destination-snowflake/src/test-integration/java/io/airbyte/integrations/destination/snowflake/typing_deduping/AbstractSnowflakeTypingDedupingTest.java b/airbyte-integrations/connectors/destination-snowflake/src/test-integration/java/io/airbyte/integrations/destination/snowflake/typing_deduping/AbstractSnowflakeTypingDedupingTest.java index 2c502d1c1ac9..84ea109c25d5 100644 --- a/airbyte-integrations/connectors/destination-snowflake/src/test-integration/java/io/airbyte/integrations/destination/snowflake/typing_deduping/AbstractSnowflakeTypingDedupingTest.java +++ b/airbyte-integrations/connectors/destination-snowflake/src/test-integration/java/io/airbyte/integrations/destination/snowflake/typing_deduping/AbstractSnowflakeTypingDedupingTest.java @@ -218,6 +218,37 @@ public void testRemovingPKNonNullIndexes() throws Exception { assertEquals(1, dumpFinalTableRecords(streamNamespace, streamName).toArray().length); } + @Test + public void testExtractedAtUtcTimezoneMigration() throws Exception { + final ConfiguredAirbyteCatalog catalog = new ConfiguredAirbyteCatalog().withStreams(List.of( + new ConfiguredAirbyteStream() + .withSyncMode(SyncMode.INCREMENTAL) + .withDestinationSyncMode(DestinationSyncMode.APPEND_DEDUP) + .withPrimaryKey(List.of(List.of("id1"), List.of("id2"))) + .withCursorField(List.of("updated_at")) + .withStream(new AirbyteStream() + .withNamespace(streamNamespace) + .withName(streamName) + .withJsonSchema(SCHEMA)))); + + // First sync + final List messages1 = readMessages("dat/sync1_messages.jsonl"); + runSync(catalog, messages1, "airbyte/destination-snowflake:3.5.11"); + + final List expectedRawRecords1 = readRecords("dat/ltz_extracted_at_sync1_expectedrecords_raw.jsonl"); + final List expectedFinalRecords1 = readRecords("dat/ltz_extracted_at_sync1_expectedrecords_dedup_final.jsonl"); + verifySyncResult(expectedRawRecords1, expectedFinalRecords1, disableFinalTableComparison()); + + // Second sync + final List messages2 = readMessages("dat/sync2_messages.jsonl"); + + runSync(catalog, messages2); + + final List expectedRawRecords2 = readRecords("dat/sync2_expectedrecords_raw.jsonl"); + final List expectedFinalRecords2 = readRecords("dat/sync2_expectedrecords_incremental_dedup_final.jsonl"); + verifySyncResult(expectedRawRecords2, expectedFinalRecords2, disableFinalTableComparison()); + } + private String getDefaultSchema() { return getConfig().get("schema").asText(); } diff --git a/airbyte-integrations/connectors/destination-snowflake/src/test-integration/java/io/airbyte/integrations/destination/snowflake/typing_deduping/SnowflakeSqlGeneratorIntegrationTest.java b/airbyte-integrations/connectors/destination-snowflake/src/test-integration/java/io/airbyte/integrations/destination/snowflake/typing_deduping/SnowflakeSqlGeneratorIntegrationTest.java index bf204e1909d7..fe1f47fc2d85 100644 --- a/airbyte-integrations/connectors/destination-snowflake/src/test-integration/java/io/airbyte/integrations/destination/snowflake/typing_deduping/SnowflakeSqlGeneratorIntegrationTest.java +++ b/airbyte-integrations/connectors/destination-snowflake/src/test-integration/java/io/airbyte/integrations/destination/snowflake/typing_deduping/SnowflakeSqlGeneratorIntegrationTest.java @@ -29,6 +29,7 @@ import io.airbyte.integrations.destination.snowflake.SnowflakeDatabase; import io.airbyte.integrations.destination.snowflake.SnowflakeTestSourceOperations; import io.airbyte.integrations.destination.snowflake.SnowflakeTestUtils; +import io.airbyte.integrations.destination.snowflake.typing_deduping.migrations.SnowflakeState; import java.nio.file.Path; import java.sql.SQLException; import java.util.Arrays; @@ -45,7 +46,7 @@ import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; -public class SnowflakeSqlGeneratorIntegrationTest extends BaseSqlGeneratorIntegrationTest { +public class SnowflakeSqlGeneratorIntegrationTest extends BaseSqlGeneratorIntegrationTest { private static String databaseName; private static JdbcDatabase database; diff --git a/airbyte-integrations/connectors/destination-snowflake/src/test-integration/resources/dat/ltz_extracted_at_sync1_expectedrecords_dedup_final.jsonl b/airbyte-integrations/connectors/destination-snowflake/src/test-integration/resources/dat/ltz_extracted_at_sync1_expectedrecords_dedup_final.jsonl new file mode 100644 index 000000000000..cb50cd6fcc31 --- /dev/null +++ b/airbyte-integrations/connectors/destination-snowflake/src/test-integration/resources/dat/ltz_extracted_at_sync1_expectedrecords_dedup_final.jsonl @@ -0,0 +1,5 @@ +// Note the -08:00 offset in extracted_at. +{"_AIRBYTE_EXTRACTED_AT": "1970-01-01T00:00:01.000000000-08:00", "_AIRBYTE_META": {"errors":[]}, "ID1": 1, "ID2": 200, "UPDATED_AT": "2000-01-01T00:01:00.000000000Z", "NAME": "Alice", "ADDRESS": {"city": "Los Angeles", "state": "CA"}} +{"_AIRBYTE_EXTRACTED_AT": "1970-01-01T00:00:01.000000000-08:00", "_AIRBYTE_META": {"errors":[]}, "ID1": 1, "ID2": 201, "UPDATED_AT": "2000-01-01T00:02:00.000000000Z", "NAME": "Bob", "ADDRESS": {"city": "Boston", "state": "MA"}} +{"_AIRBYTE_EXTRACTED_AT": "1970-01-01T00:00:01.000000000-08:00", "_AIRBYTE_META": {"errors":["Problem with `age`", "Problem with `registration_date`"]}, "ID1": 2, "ID2": 200, "UPDATED_AT": "2000-01-01T00:03:00.000000000Z", "NAME": "Charlie"} +{"_AIRBYTE_EXTRACTED_AT": "1970-01-01T00:00:01.000000000-08:00", "_AIRBYTE_META": {"errors":[]}, "ID1": 3, "ID2": 200, "UPDATED_AT": "2000-01-01T00:04:00.000000000Z", "NAME": "a\bb\fc\nd\re\tf`~!@#$%^&*()_+-=[]\\{}|'\",./<>?"} diff --git a/airbyte-integrations/connectors/destination-snowflake/src/test-integration/resources/dat/ltz_extracted_at_sync1_expectedrecords_raw.jsonl b/airbyte-integrations/connectors/destination-snowflake/src/test-integration/resources/dat/ltz_extracted_at_sync1_expectedrecords_raw.jsonl new file mode 100644 index 000000000000..6849b1072a0b --- /dev/null +++ b/airbyte-integrations/connectors/destination-snowflake/src/test-integration/resources/dat/ltz_extracted_at_sync1_expectedrecords_raw.jsonl @@ -0,0 +1,6 @@ +// Note the -08:00 offset in extracted_at. +{"_airbyte_extracted_at": "1970-01-01T00:00:01.000000000-08:00", "_airbyte_data": {"id1": 1, "id2": 200, "updated_at": "2000-01-01T00:00:00Z", "_ab_cdc_deleted_at": null, "name": "Alice", "address": {"city": "San Francisco", "state": "CA"}}} +{"_airbyte_extracted_at": "1970-01-01T00:00:01.000000000-08:00", "_airbyte_data": {"id1": 1, "id2": 200, "updated_at": "2000-01-01T00:01:00Z", "_ab_cdc_deleted_at": null, "name": "Alice", "address": {"city": "Los Angeles", "state": "CA"}}} +{"_airbyte_extracted_at": "1970-01-01T00:00:01.000000000-08:00", "_airbyte_data": {"id1": 1, "id2": 201, "updated_at": "2000-01-01T00:02:00Z", "name": "Bob", "address": {"city": "Boston", "state": "MA"}}} +{"_airbyte_extracted_at": "1970-01-01T00:00:01.000000000-08:00", "_airbyte_data": {"id1": 2, "id2": 200, "updated_at": "2000-01-01T00:03:00Z", "name": "Charlie", "age": "this is not an integer", "registration_date": "this is not a date"}} +{"_airbyte_extracted_at": "1970-01-01T00:00:01.000000000-08:00", "_airbyte_data": {"id1": 3, "id2": 200, "updated_at": "2000-01-01T00:04:00Z", "name": "a\bb\fc\nd\re\tf`~!@#$%^&*()_+-=[]\\{}|'\",./<>?"}} diff --git a/airbyte-integrations/connectors/destination-snowflake/src/test-integration/resources/dat/sync1_cursorchange_expectedrecords_dedup_final.jsonl b/airbyte-integrations/connectors/destination-snowflake/src/test-integration/resources/dat/sync1_cursorchange_expectedrecords_dedup_final.jsonl index 7c9e93b21705..9672e61c9678 100644 --- a/airbyte-integrations/connectors/destination-snowflake/src/test-integration/resources/dat/sync1_cursorchange_expectedrecords_dedup_final.jsonl +++ b/airbyte-integrations/connectors/destination-snowflake/src/test-integration/resources/dat/sync1_cursorchange_expectedrecords_dedup_final.jsonl @@ -1,3 +1,3 @@ -{"_AIRBYTE_EXTRACTED_AT": "1970-01-01T00:00:01.000000000-08:00", "_AIRBYTE_META": {"errors":[]}, "ID1": 1, "ID2": 200, "OLD_CURSOR": 1, "NAME": "Alice", "ADDRESS": {"city": "Los Angeles", "state": "CA"}} -{"_AIRBYTE_EXTRACTED_AT": "1970-01-01T00:00:01.000000000-08:00", "_AIRBYTE_META": {"errors":[]}, "ID1": 1, "ID2": 201, "OLD_CURSOR": 2, "NAME": "Bob", "ADDRESS": {"city": "Boston", "state": "MA"}} -{"_AIRBYTE_EXTRACTED_AT": "1970-01-01T00:00:01.000000000-08:00", "_AIRBYTE_META": {"errors":["Problem with `age`", "Problem with `registration_date`"]}, "ID1": 2, "ID2": 200, "OLD_CURSOR": 3, "NAME": "Charlie"} +{"_AIRBYTE_EXTRACTED_AT": "1970-01-01T00:00:01.000000000Z", "_AIRBYTE_META": {"errors":[]}, "ID1": 1, "ID2": 200, "OLD_CURSOR": 1, "NAME": "Alice", "ADDRESS": {"city": "Los Angeles", "state": "CA"}} +{"_AIRBYTE_EXTRACTED_AT": "1970-01-01T00:00:01.000000000Z", "_AIRBYTE_META": {"errors":[]}, "ID1": 1, "ID2": 201, "OLD_CURSOR": 2, "NAME": "Bob", "ADDRESS": {"city": "Boston", "state": "MA"}} +{"_AIRBYTE_EXTRACTED_AT": "1970-01-01T00:00:01.000000000Z", "_AIRBYTE_META": {"errors":["Problem with `age`", "Problem with `registration_date`"]}, "ID1": 2, "ID2": 200, "OLD_CURSOR": 3, "NAME": "Charlie"} diff --git a/airbyte-integrations/connectors/destination-snowflake/src/test-integration/resources/dat/sync1_cursorchange_expectedrecords_dedup_raw.jsonl b/airbyte-integrations/connectors/destination-snowflake/src/test-integration/resources/dat/sync1_cursorchange_expectedrecords_dedup_raw.jsonl index fcf596ac0380..2f2b22731087 100644 --- a/airbyte-integrations/connectors/destination-snowflake/src/test-integration/resources/dat/sync1_cursorchange_expectedrecords_dedup_raw.jsonl +++ b/airbyte-integrations/connectors/destination-snowflake/src/test-integration/resources/dat/sync1_cursorchange_expectedrecords_dedup_raw.jsonl @@ -1,4 +1,4 @@ -{"_airbyte_extracted_at": "1970-01-01T00:00:01.000000000-08:00", "_airbyte_data": {"id1": 1, "id2": 200, "old_cursor": 0, "_ab_cdc_deleted_at": null, "name" :"Alice", "address": {"city": "San Francisco", "state": "CA"}}} -{"_airbyte_extracted_at": "1970-01-01T00:00:01.000000000-08:00", "_airbyte_data": {"id1": 1, "id2": 200, "old_cursor": 1, "_ab_cdc_deleted_at": null, "name": "Alice", "address": {"city": "Los Angeles", "state": "CA"}}} -{"_airbyte_extracted_at": "1970-01-01T00:00:01.000000000-08:00", "_airbyte_data": {"id1": 1, "id2": 201, "old_cursor": 2, "name": "Bob", "address": {"city": "Boston", "state": "MA"}}} -{"_airbyte_extracted_at": "1970-01-01T00:00:01.000000000-08:00", "_airbyte_data": {"id1": 2, "id2": 200, "old_cursor": 3, "name": "Charlie", "age": "this is not an integer", "registration_date": "this is not a date"}} +{"_airbyte_extracted_at": "1970-01-01T00:00:01.000000000Z", "_airbyte_data": {"id1": 1, "id2": 200, "old_cursor": 0, "_ab_cdc_deleted_at": null, "name" :"Alice", "address": {"city": "San Francisco", "state": "CA"}}} +{"_airbyte_extracted_at": "1970-01-01T00:00:01.000000000Z", "_airbyte_data": {"id1": 1, "id2": 200, "old_cursor": 1, "_ab_cdc_deleted_at": null, "name": "Alice", "address": {"city": "Los Angeles", "state": "CA"}}} +{"_airbyte_extracted_at": "1970-01-01T00:00:01.000000000Z", "_airbyte_data": {"id1": 1, "id2": 201, "old_cursor": 2, "name": "Bob", "address": {"city": "Boston", "state": "MA"}}} +{"_airbyte_extracted_at": "1970-01-01T00:00:01.000000000Z", "_airbyte_data": {"id1": 2, "id2": 200, "old_cursor": 3, "name": "Charlie", "age": "this is not an integer", "registration_date": "this is not a date"}} diff --git a/airbyte-integrations/connectors/destination-snowflake/src/test-integration/resources/dat/sync1_expectedrecords_dedup_final.jsonl b/airbyte-integrations/connectors/destination-snowflake/src/test-integration/resources/dat/sync1_expectedrecords_dedup_final.jsonl index 136fa8a99003..0338cae59ac4 100644 --- a/airbyte-integrations/connectors/destination-snowflake/src/test-integration/resources/dat/sync1_expectedrecords_dedup_final.jsonl +++ b/airbyte-integrations/connectors/destination-snowflake/src/test-integration/resources/dat/sync1_expectedrecords_dedup_final.jsonl @@ -1,5 +1,5 @@ // Keep the Alice record with more recent UPDATED_AT -{"_AIRBYTE_EXTRACTED_AT": "1970-01-01T00:00:01.000000000-08:00", "_AIRBYTE_META": {"errors":[]}, "ID1": 1, "ID2": 200, "UPDATED_AT": "2000-01-01T00:01:00.000000000Z", "NAME": "Alice", "ADDRESS": {"city": "Los Angeles", "state": "CA"}} -{"_AIRBYTE_EXTRACTED_AT": "1970-01-01T00:00:01.000000000-08:00", "_AIRBYTE_META": {"errors":[]}, "ID1": 1, "ID2": 201, "UPDATED_AT": "2000-01-01T00:02:00.000000000Z", "NAME": "Bob", "ADDRESS": {"city": "Boston", "state": "MA"}} -{"_AIRBYTE_EXTRACTED_AT": "1970-01-01T00:00:01.000000000-08:00", "_AIRBYTE_META": {"errors":["Problem with `age`", "Problem with `registration_date`"]}, "ID1": 2, "ID2": 200, "UPDATED_AT": "2000-01-01T00:03:00.000000000Z", "NAME": "Charlie"} -{"_AIRBYTE_EXTRACTED_AT": "1970-01-01T00:00:01.000000000-08:00", "_AIRBYTE_META": {"errors":[]}, "ID1": 3, "ID2": 200, "UPDATED_AT": "2000-01-01T00:04:00.000000000Z", "NAME": "a\bb\fc\nd\re\tf`~!@#$%^&*()_+-=[]\\{}|'\",./<>?"} +{"_AIRBYTE_EXTRACTED_AT": "1970-01-01T00:00:01.000000000Z", "_AIRBYTE_META": {"errors":[]}, "ID1": 1, "ID2": 200, "UPDATED_AT": "2000-01-01T00:01:00.000000000Z", "NAME": "Alice", "ADDRESS": {"city": "Los Angeles", "state": "CA"}} +{"_AIRBYTE_EXTRACTED_AT": "1970-01-01T00:00:01.000000000Z", "_AIRBYTE_META": {"errors":[]}, "ID1": 1, "ID2": 201, "UPDATED_AT": "2000-01-01T00:02:00.000000000Z", "NAME": "Bob", "ADDRESS": {"city": "Boston", "state": "MA"}} +{"_AIRBYTE_EXTRACTED_AT": "1970-01-01T00:00:01.000000000Z", "_AIRBYTE_META": {"errors":["Problem with `age`", "Problem with `registration_date`"]}, "ID1": 2, "ID2": 200, "UPDATED_AT": "2000-01-01T00:03:00.000000000Z", "NAME": "Charlie"} +{"_AIRBYTE_EXTRACTED_AT": "1970-01-01T00:00:01.000000000Z", "_AIRBYTE_META": {"errors":[]}, "ID1": 3, "ID2": 200, "UPDATED_AT": "2000-01-01T00:04:00.000000000Z", "NAME": "a\bb\fc\nd\re\tf`~!@#$%^&*()_+-=[]\\{}|'\",./<>?"} diff --git a/airbyte-integrations/connectors/destination-snowflake/src/test-integration/resources/dat/sync1_expectedrecords_dedup_final2.jsonl b/airbyte-integrations/connectors/destination-snowflake/src/test-integration/resources/dat/sync1_expectedrecords_dedup_final2.jsonl index 5f9395498870..83294d657935 100644 --- a/airbyte-integrations/connectors/destination-snowflake/src/test-integration/resources/dat/sync1_expectedrecords_dedup_final2.jsonl +++ b/airbyte-integrations/connectors/destination-snowflake/src/test-integration/resources/dat/sync1_expectedrecords_dedup_final2.jsonl @@ -1 +1 @@ -{"_AIRBYTE_EXTRACTED_AT": "1970-01-01T00:00:01.000000000-08:00", "_AIRBYTE_META": {"errors":[]}, "ID1": 1, "ID2": 200, "UPDATED_AT": "2001-01-01T00:00:00.000000000Z", "NAME": "Someone completely different"} +{"_AIRBYTE_EXTRACTED_AT": "1970-01-01T00:00:01.000000000Z", "_AIRBYTE_META": {"errors":[]}, "ID1": 1, "ID2": 200, "UPDATED_AT": "2001-01-01T00:00:00.000000000Z", "NAME": "Someone completely different"} diff --git a/airbyte-integrations/connectors/destination-snowflake/src/test-integration/resources/dat/sync1_expectedrecords_nondedup_final.jsonl b/airbyte-integrations/connectors/destination-snowflake/src/test-integration/resources/dat/sync1_expectedrecords_nondedup_final.jsonl index 575aa338976c..ca3c0aafa537 100644 --- a/airbyte-integrations/connectors/destination-snowflake/src/test-integration/resources/dat/sync1_expectedrecords_nondedup_final.jsonl +++ b/airbyte-integrations/connectors/destination-snowflake/src/test-integration/resources/dat/sync1_expectedrecords_nondedup_final.jsonl @@ -1,6 +1,6 @@ -{"_AIRBYTE_EXTRACTED_AT": "1970-01-01T00:00:01.000000000-08:00", "_AIRBYTE_META": {"errors":[]}, "ID1": 1, "ID2": 200, "UPDATED_AT": "2000-01-01T00:00:00.000000000Z", "NAME": "Alice", "ADDRESS": {"city": "San Francisco", "state": "CA"}} -{"_AIRBYTE_EXTRACTED_AT": "1970-01-01T00:00:01.000000000-08:00", "_AIRBYTE_META": {"errors":[]}, "ID1": 1, "ID2": 200, "UPDATED_AT": "2000-01-01T00:01:00.000000000Z", "NAME": "Alice", "ADDRESS": {"city": "Los Angeles", "state": "CA"}} -{"_AIRBYTE_EXTRACTED_AT": "1970-01-01T00:00:01.000000000-08:00", "_AIRBYTE_META": {"errors":[]}, "ID1": 1, "ID2": 201, "UPDATED_AT": "2000-01-01T00:02:00.000000000Z", "NAME": "Bob", "ADDRESS": {"city": "Boston", "state": "MA"}} +{"_AIRBYTE_EXTRACTED_AT": "1970-01-01T00:00:01.000000000Z", "_AIRBYTE_META": {"errors":[]}, "ID1": 1, "ID2": 200, "UPDATED_AT": "2000-01-01T00:00:00.000000000Z", "NAME": "Alice", "ADDRESS": {"city": "San Francisco", "state": "CA"}} +{"_AIRBYTE_EXTRACTED_AT": "1970-01-01T00:00:01.000000000Z", "_AIRBYTE_META": {"errors":[]}, "ID1": 1, "ID2": 200, "UPDATED_AT": "2000-01-01T00:01:00.000000000Z", "NAME": "Alice", "ADDRESS": {"city": "Los Angeles", "state": "CA"}} +{"_AIRBYTE_EXTRACTED_AT": "1970-01-01T00:00:01.000000000Z", "_AIRBYTE_META": {"errors":[]}, "ID1": 1, "ID2": 201, "UPDATED_AT": "2000-01-01T00:02:00.000000000Z", "NAME": "Bob", "ADDRESS": {"city": "Boston", "state": "MA"}} // Invalid columns are nulled out (i.e. SQL null, not JSON null) -{"_AIRBYTE_EXTRACTED_AT": "1970-01-01T00:00:01.000000000-08:00", "_AIRBYTE_META": {"errors":["Problem with `age`", "Problem with `registration_date`"]}, "ID1": 2, "ID2": 200, "UPDATED_AT": "2000-01-01T00:03:00.000000000Z", "NAME": "Charlie"} -{"_AIRBYTE_EXTRACTED_AT": "1970-01-01T00:00:01.000000000-08:00", "_AIRBYTE_META": {"errors":[]}, "ID1": 3, "ID2": 200, "UPDATED_AT": "2000-01-01T00:04:00.000000000Z", "NAME": "a\bb\fc\nd\re\tf`~!@#$%^&*()_+-=[]\\{}|'\",./<>?"} +{"_AIRBYTE_EXTRACTED_AT": "1970-01-01T00:00:01.000000000Z", "_AIRBYTE_META": {"errors":["Problem with `age`", "Problem with `registration_date`"]}, "ID1": 2, "ID2": 200, "UPDATED_AT": "2000-01-01T00:03:00.000000000Z", "NAME": "Charlie"} +{"_AIRBYTE_EXTRACTED_AT": "1970-01-01T00:00:01.000000000Z", "_AIRBYTE_META": {"errors":[]}, "ID1": 3, "ID2": 200, "UPDATED_AT": "2000-01-01T00:04:00.000000000Z", "NAME": "a\bb\fc\nd\re\tf`~!@#$%^&*()_+-=[]\\{}|'\",./<>?"} diff --git a/airbyte-integrations/connectors/destination-snowflake/src/test-integration/resources/dat/sync1_expectedrecords_raw.jsonl b/airbyte-integrations/connectors/destination-snowflake/src/test-integration/resources/dat/sync1_expectedrecords_raw.jsonl index d1c3045997b3..8dbfcd6cbb9c 100644 --- a/airbyte-integrations/connectors/destination-snowflake/src/test-integration/resources/dat/sync1_expectedrecords_raw.jsonl +++ b/airbyte-integrations/connectors/destination-snowflake/src/test-integration/resources/dat/sync1_expectedrecords_raw.jsonl @@ -1,7 +1,7 @@ -{"_airbyte_extracted_at": "1970-01-01T00:00:01.000000000-08:00", "_airbyte_data": {"id1": 1, "id2": 200, "updated_at": "2000-01-01T00:00:00Z", "_ab_cdc_deleted_at": null, "name": "Alice", "address": {"city": "San Francisco", "state": "CA"}}} +{"_airbyte_extracted_at": "1970-01-01T00:00:01.000000000Z", "_airbyte_data": {"id1": 1, "id2": 200, "updated_at": "2000-01-01T00:00:00Z", "_ab_cdc_deleted_at": null, "name": "Alice", "address": {"city": "San Francisco", "state": "CA"}}} // Note the duplicate record. In this sync mode, we don't dedup anything. -{"_airbyte_extracted_at": "1970-01-01T00:00:01.000000000-08:00", "_airbyte_data": {"id1": 1, "id2": 200, "updated_at": "2000-01-01T00:01:00Z", "_ab_cdc_deleted_at": null, "name": "Alice", "address": {"city": "Los Angeles", "state": "CA"}}} -{"_airbyte_extracted_at": "1970-01-01T00:00:01.000000000-08:00", "_airbyte_data": {"id1": 1, "id2": 201, "updated_at": "2000-01-01T00:02:00Z", "name": "Bob", "address": {"city": "Boston", "state": "MA"}}} +{"_airbyte_extracted_at": "1970-01-01T00:00:01.000000000Z", "_airbyte_data": {"id1": 1, "id2": 200, "updated_at": "2000-01-01T00:01:00Z", "_ab_cdc_deleted_at": null, "name": "Alice", "address": {"city": "Los Angeles", "state": "CA"}}} +{"_airbyte_extracted_at": "1970-01-01T00:00:01.000000000Z", "_airbyte_data": {"id1": 1, "id2": 201, "updated_at": "2000-01-01T00:02:00Z", "name": "Bob", "address": {"city": "Boston", "state": "MA"}}} // Invalid data is still allowed in the raw table. -{"_airbyte_extracted_at": "1970-01-01T00:00:01.000000000-08:00", "_airbyte_data": {"id1": 2, "id2": 200, "updated_at": "2000-01-01T00:03:00Z", "name": "Charlie", "age": "this is not an integer", "registration_date": "this is not a date"}} -{"_airbyte_extracted_at": "1970-01-01T00:00:01.000000000-08:00", "_airbyte_data": {"id1": 3, "id2": 200, "updated_at": "2000-01-01T00:04:00Z", "name": "a\bb\fc\nd\re\tf`~!@#$%^&*()_+-=[]\\{}|'\",./<>?"}} +{"_airbyte_extracted_at": "1970-01-01T00:00:01.000000000Z", "_airbyte_data": {"id1": 2, "id2": 200, "updated_at": "2000-01-01T00:03:00Z", "name": "Charlie", "age": "this is not an integer", "registration_date": "this is not a date"}} +{"_airbyte_extracted_at": "1970-01-01T00:00:01.000000000Z", "_airbyte_data": {"id1": 3, "id2": 200, "updated_at": "2000-01-01T00:04:00Z", "name": "a\bb\fc\nd\re\tf`~!@#$%^&*()_+-=[]\\{}|'\",./<>?"}} diff --git a/airbyte-integrations/connectors/destination-snowflake/src/test-integration/resources/dat/sync1_expectedrecords_raw2.jsonl b/airbyte-integrations/connectors/destination-snowflake/src/test-integration/resources/dat/sync1_expectedrecords_raw2.jsonl index b0f0f8823c90..6849e306164f 100644 --- a/airbyte-integrations/connectors/destination-snowflake/src/test-integration/resources/dat/sync1_expectedrecords_raw2.jsonl +++ b/airbyte-integrations/connectors/destination-snowflake/src/test-integration/resources/dat/sync1_expectedrecords_raw2.jsonl @@ -1 +1 @@ -{"_airbyte_extracted_at": "1970-01-01T00:00:01.000000000-08:00", "_airbyte_data": {"id1": 1, "id2": 200, "updated_at": "2001-01-01T00:00:00Z", "_ab_cdc_deleted_at": null, "name": "Someone completely different"}} +{"_airbyte_extracted_at": "1970-01-01T00:00:01.000000000Z", "_airbyte_data": {"id1": 1, "id2": 200, "updated_at": "2001-01-01T00:00:00Z", "_ab_cdc_deleted_at": null, "name": "Someone completely different"}} diff --git a/airbyte-integrations/connectors/destination-snowflake/src/test-integration/resources/dat/sync2_cursorchange_expectedrecords_incremental_dedup_final.jsonl b/airbyte-integrations/connectors/destination-snowflake/src/test-integration/resources/dat/sync2_cursorchange_expectedrecords_incremental_dedup_final.jsonl index 93e29eb904e4..a22c21dfee41 100644 --- a/airbyte-integrations/connectors/destination-snowflake/src/test-integration/resources/dat/sync2_cursorchange_expectedrecords_incremental_dedup_final.jsonl +++ b/airbyte-integrations/connectors/destination-snowflake/src/test-integration/resources/dat/sync2_cursorchange_expectedrecords_incremental_dedup_final.jsonl @@ -1,3 +1,3 @@ -{"_AIRBYTE_EXTRACTED_AT": "1970-01-01T00:00:02.000000000-08:00", "_AIRBYTE_META":{"errors":[]}, "ID1": 1, "ID2": 200, "UPDATED_AT": "2000-01-02T00:00:00.000000000Z", "NAME": "Alice", "ADDRESS": {"city": "Seattle", "state": "WA"}} +{"_AIRBYTE_EXTRACTED_AT": "1970-01-01T00:00:02.000000000Z", "_AIRBYTE_META":{"errors":[]}, "ID1": 1, "ID2": 200, "UPDATED_AT": "2000-01-02T00:00:00.000000000Z", "NAME": "Alice", "ADDRESS": {"city": "Seattle", "state": "WA"}} // Charlie wasn't reemitted with UPDATED_AT, so it still has a null cursor -{"_AIRBYTE_EXTRACTED_AT": "1970-01-01T00:00:01.000000000-08:00", "_AIRBYTE_META": {"errors":["Problem with `age`", "Problem with `registration_date`"]}, "ID1": 2, "ID2": 200, "NAME": "Charlie"} +{"_AIRBYTE_EXTRACTED_AT": "1970-01-01T00:00:01.000000000Z", "_AIRBYTE_META": {"errors":["Problem with `age`", "Problem with `registration_date`"]}, "ID1": 2, "ID2": 200, "NAME": "Charlie"} diff --git a/airbyte-integrations/connectors/destination-snowflake/src/test-integration/resources/dat/sync2_cursorchange_expectedrecords_incremental_dedup_raw.jsonl b/airbyte-integrations/connectors/destination-snowflake/src/test-integration/resources/dat/sync2_cursorchange_expectedrecords_incremental_dedup_raw.jsonl index 347a9248d265..871f03978f60 100644 --- a/airbyte-integrations/connectors/destination-snowflake/src/test-integration/resources/dat/sync2_cursorchange_expectedrecords_incremental_dedup_raw.jsonl +++ b/airbyte-integrations/connectors/destination-snowflake/src/test-integration/resources/dat/sync2_cursorchange_expectedrecords_incremental_dedup_raw.jsonl @@ -1,7 +1,7 @@ -{"_airbyte_extracted_at": "1970-01-01T00:00:01.000000000-08:00", "_airbyte_data": {"id1": 1, "id2": 200, "old_cursor": 0, "_ab_cdc_deleted_at": null, "name" :"Alice", "address": {"city": "San Francisco", "state": "CA"}}} -{"_airbyte_extracted_at": "1970-01-01T00:00:01.000000000-08:00", "_airbyte_data": {"id1": 1, "id2": 200, "old_cursor": 1, "_ab_cdc_deleted_at": null, "name": "Alice", "address": {"city": "Los Angeles", "state": "CA"}}} -{"_airbyte_extracted_at": "1970-01-01T00:00:01.000000000-08:00", "_airbyte_data": {"id1": 1, "id2": 201, "old_cursor": 2, "name": "Bob", "address": {"city": "Boston", "state": "MA"}}} -{"_airbyte_extracted_at": "1970-01-01T00:00:01.000000000-08:00", "_airbyte_data": {"id1": 2, "id2": 200, "old_cursor": 3, "name": "Charlie", "age": "this is not an integer", "registration_date": "this is not a date"}} -{"_airbyte_extracted_at": "1970-01-01T00:00:02.000000000-08:00", "_airbyte_data": {"id1": 1, "id2": 200, "updated_at": "2000-01-02T00:00:00Z", "_ab_cdc_deleted_at": null, "name": "Alice", "address": {"city": "Seattle", "state": "WA"}}} -{"_airbyte_extracted_at": "1970-01-01T00:00:02.000000000-08:00", "_airbyte_data": {"id1": 1, "id2": 201, "updated_at": "2000-01-02T00:00:00Z", "_ab_cdc_deleted_at": null, "name": "Bob", "address": {"city": "New York", "state": "NY"}}} -{"_airbyte_extracted_at": "1970-01-01T00:00:02.000000000-08:00", "_airbyte_data": {"id1": 1, "id2": 201, "updated_at": "2000-01-02T00:01:00Z", "_ab_cdc_deleted_at": "1970-01-01T00:00:00Z"}} +{"_airbyte_extracted_at": "1970-01-01T00:00:01.000000000Z", "_airbyte_data": {"id1": 1, "id2": 200, "old_cursor": 0, "_ab_cdc_deleted_at": null, "name" :"Alice", "address": {"city": "San Francisco", "state": "CA"}}} +{"_airbyte_extracted_at": "1970-01-01T00:00:01.000000000Z", "_airbyte_data": {"id1": 1, "id2": 200, "old_cursor": 1, "_ab_cdc_deleted_at": null, "name": "Alice", "address": {"city": "Los Angeles", "state": "CA"}}} +{"_airbyte_extracted_at": "1970-01-01T00:00:01.000000000Z", "_airbyte_data": {"id1": 1, "id2": 201, "old_cursor": 2, "name": "Bob", "address": {"city": "Boston", "state": "MA"}}} +{"_airbyte_extracted_at": "1970-01-01T00:00:01.000000000Z", "_airbyte_data": {"id1": 2, "id2": 200, "old_cursor": 3, "name": "Charlie", "age": "this is not an integer", "registration_date": "this is not a date"}} +{"_airbyte_extracted_at": "1970-01-01T00:00:02.000000000Z", "_airbyte_data": {"id1": 1, "id2": 200, "updated_at": "2000-01-02T00:00:00Z", "_ab_cdc_deleted_at": null, "name": "Alice", "address": {"city": "Seattle", "state": "WA"}}} +{"_airbyte_extracted_at": "1970-01-01T00:00:02.000000000Z", "_airbyte_data": {"id1": 1, "id2": 201, "updated_at": "2000-01-02T00:00:00Z", "_ab_cdc_deleted_at": null, "name": "Bob", "address": {"city": "New York", "state": "NY"}}} +{"_airbyte_extracted_at": "1970-01-01T00:00:02.000000000Z", "_airbyte_data": {"id1": 1, "id2": 201, "updated_at": "2000-01-02T00:01:00Z", "_ab_cdc_deleted_at": "1970-01-01T00:00:00Z"}} diff --git a/airbyte-integrations/connectors/destination-snowflake/src/test-integration/resources/dat/sync2_expectedrecords_fullrefresh_append_final.jsonl b/airbyte-integrations/connectors/destination-snowflake/src/test-integration/resources/dat/sync2_expectedrecords_fullrefresh_append_final.jsonl index 67171fa4c01b..8b2a3f160f44 100644 --- a/airbyte-integrations/connectors/destination-snowflake/src/test-integration/resources/dat/sync2_expectedrecords_fullrefresh_append_final.jsonl +++ b/airbyte-integrations/connectors/destination-snowflake/src/test-integration/resources/dat/sync2_expectedrecords_fullrefresh_append_final.jsonl @@ -1,9 +1,9 @@ -{"_AIRBYTE_EXTRACTED_AT": "1970-01-01T00:00:01.000000000-08:00", "_AIRBYTE_META": {"errors":[]}, "ID1": 1, "ID2": 200, "UPDATED_AT": "2000-01-01T00:00:00.000000000Z", "NAME": "Alice", "ADDRESS": {"city": "San Francisco", "state": "CA"}} -{"_AIRBYTE_EXTRACTED_AT": "1970-01-01T00:00:01.000000000-08:00", "_AIRBYTE_META": {"errors":[]}, "ID1": 1, "ID2": 200, "UPDATED_AT": "2000-01-01T00:01:00.000000000Z", "NAME": "Alice", "ADDRESS": {"city": "Los Angeles", "state": "CA"}} -{"_AIRBYTE_EXTRACTED_AT": "1970-01-01T00:00:01.000000000-08:00", "_AIRBYTE_META": {"errors":[]}, "ID1": 1, "ID2": 201, "UPDATED_AT": "2000-01-01T00:02:00.000000000Z", "NAME": "Bob", "ADDRESS": {"city": "Boston", "state": "MA"}} -{"_AIRBYTE_EXTRACTED_AT": "1970-01-01T00:00:01.000000000-08:00", "_AIRBYTE_META": {"errors":["Problem with `age`", "Problem with `registration_date`"]}, "ID1": 2, "ID2": 200, "UPDATED_AT": "2000-01-01T00:03:00.000000000Z", "NAME": "Charlie"} -{"_AIRBYTE_EXTRACTED_AT": "1970-01-01T00:00:01.000000000-08:00", "_AIRBYTE_META": {"errors":[]}, "ID1": 3, "ID2": 200, "UPDATED_AT": "2000-01-01T00:04:00.000000000Z", "NAME": "a\bb\fc\nd\re\tf`~!@#$%^&*()_+-=[]\\{}|'\",./<>?"} +{"_AIRBYTE_EXTRACTED_AT": "1970-01-01T00:00:01.000000000Z", "_AIRBYTE_META": {"errors":[]}, "ID1": 1, "ID2": 200, "UPDATED_AT": "2000-01-01T00:00:00.000000000Z", "NAME": "Alice", "ADDRESS": {"city": "San Francisco", "state": "CA"}} +{"_AIRBYTE_EXTRACTED_AT": "1970-01-01T00:00:01.000000000Z", "_AIRBYTE_META": {"errors":[]}, "ID1": 1, "ID2": 200, "UPDATED_AT": "2000-01-01T00:01:00.000000000Z", "NAME": "Alice", "ADDRESS": {"city": "Los Angeles", "state": "CA"}} +{"_AIRBYTE_EXTRACTED_AT": "1970-01-01T00:00:01.000000000Z", "_AIRBYTE_META": {"errors":[]}, "ID1": 1, "ID2": 201, "UPDATED_AT": "2000-01-01T00:02:00.000000000Z", "NAME": "Bob", "ADDRESS": {"city": "Boston", "state": "MA"}} +{"_AIRBYTE_EXTRACTED_AT": "1970-01-01T00:00:01.000000000Z", "_AIRBYTE_META": {"errors":["Problem with `age`", "Problem with `registration_date`"]}, "ID1": 2, "ID2": 200, "UPDATED_AT": "2000-01-01T00:03:00.000000000Z", "NAME": "Charlie"} +{"_AIRBYTE_EXTRACTED_AT": "1970-01-01T00:00:01.000000000Z", "_AIRBYTE_META": {"errors":[]}, "ID1": 3, "ID2": 200, "UPDATED_AT": "2000-01-01T00:04:00.000000000Z", "NAME": "a\bb\fc\nd\re\tf`~!@#$%^&*()_+-=[]\\{}|'\",./<>?"} -{"_AIRBYTE_EXTRACTED_AT": "1970-01-01T00:00:02.000000000-08:00", "_AIRBYTE_META":{"errors":[]}, "ID1": 1, "ID2": 200, "UPDATED_AT": "2000-01-02T00:00:00.000000000Z", "NAME": "Alice", "ADDRESS": {"city": "Seattle", "state": "WA"}} -{"_AIRBYTE_EXTRACTED_AT": "1970-01-01T00:00:02.000000000-08:00", "_AIRBYTE_META":{"errors":[]}, "ID1": 1, "ID2": 201, "UPDATED_AT": "2000-01-02T00:00:00.000000000Z", "NAME": "Bob", "ADDRESS": {"city": "New York", "state": "NY"}} -{"_AIRBYTE_EXTRACTED_AT": "1970-01-01T00:00:02.000000000-08:00", "_AIRBYTE_META":{"errors":[]}, "ID1": 1, "ID2": 201, "UPDATED_AT": "2000-01-02T00:01:00.000000000Z", "_AB_CDC_DELETED_AT": "1970-01-01T00:00:00.000000000Z"} +{"_AIRBYTE_EXTRACTED_AT": "1970-01-01T00:00:02.000000000Z", "_AIRBYTE_META":{"errors":[]}, "ID1": 1, "ID2": 200, "UPDATED_AT": "2000-01-02T00:00:00.000000000Z", "NAME": "Alice", "ADDRESS": {"city": "Seattle", "state": "WA"}} +{"_AIRBYTE_EXTRACTED_AT": "1970-01-01T00:00:02.000000000Z", "_AIRBYTE_META":{"errors":[]}, "ID1": 1, "ID2": 201, "UPDATED_AT": "2000-01-02T00:00:00.000000000Z", "NAME": "Bob", "ADDRESS": {"city": "New York", "state": "NY"}} +{"_AIRBYTE_EXTRACTED_AT": "1970-01-01T00:00:02.000000000Z", "_AIRBYTE_META":{"errors":[]}, "ID1": 1, "ID2": 201, "UPDATED_AT": "2000-01-02T00:01:00.000000000Z", "_AB_CDC_DELETED_AT": "1970-01-01T00:00:00.000000000Z"} diff --git a/airbyte-integrations/connectors/destination-snowflake/src/test-integration/resources/dat/sync2_expectedrecords_fullrefresh_overwrite_final.jsonl b/airbyte-integrations/connectors/destination-snowflake/src/test-integration/resources/dat/sync2_expectedrecords_fullrefresh_overwrite_final.jsonl index 61366dee9ab4..3f3fd3f1f3e7 100644 --- a/airbyte-integrations/connectors/destination-snowflake/src/test-integration/resources/dat/sync2_expectedrecords_fullrefresh_overwrite_final.jsonl +++ b/airbyte-integrations/connectors/destination-snowflake/src/test-integration/resources/dat/sync2_expectedrecords_fullrefresh_overwrite_final.jsonl @@ -1,3 +1,3 @@ -{"_AIRBYTE_EXTRACTED_AT": "1970-01-01T00:00:02.000000000-08:00", "_AIRBYTE_META":{"errors":[]}, "ID1": 1, "ID2": 200, "UPDATED_AT": "2000-01-02T00:00:00.000000000Z", "NAME": "Alice", "ADDRESS": {"city": "Seattle", "state": "WA"}} -{"_AIRBYTE_EXTRACTED_AT": "1970-01-01T00:00:02.000000000-08:00", "_AIRBYTE_META":{"errors":[]}, "ID1": 1, "ID2": 201, "UPDATED_AT": "2000-01-02T00:00:00.000000000Z", "NAME": "Bob", "ADDRESS": {"city": "New York", "state": "NY"}} -{"_AIRBYTE_EXTRACTED_AT": "1970-01-01T00:00:02.000000000-08:00", "_AIRBYTE_META":{"errors":[]}, "ID1": 1, "ID2": 201, "UPDATED_AT": "2000-01-02T00:01:00.000000000Z", "_AB_CDC_DELETED_AT": "1970-01-01T00:00:00.000000000Z"} +{"_AIRBYTE_EXTRACTED_AT": "1970-01-01T00:00:02.000000000Z", "_AIRBYTE_META":{"errors":[]}, "ID1": 1, "ID2": 200, "UPDATED_AT": "2000-01-02T00:00:00.000000000Z", "NAME": "Alice", "ADDRESS": {"city": "Seattle", "state": "WA"}} +{"_AIRBYTE_EXTRACTED_AT": "1970-01-01T00:00:02.000000000Z", "_AIRBYTE_META":{"errors":[]}, "ID1": 1, "ID2": 201, "UPDATED_AT": "2000-01-02T00:00:00.000000000Z", "NAME": "Bob", "ADDRESS": {"city": "New York", "state": "NY"}} +{"_AIRBYTE_EXTRACTED_AT": "1970-01-01T00:00:02.000000000Z", "_AIRBYTE_META":{"errors":[]}, "ID1": 1, "ID2": 201, "UPDATED_AT": "2000-01-02T00:01:00.000000000Z", "_AB_CDC_DELETED_AT": "1970-01-01T00:00:00.000000000Z"} diff --git a/airbyte-integrations/connectors/destination-snowflake/src/test-integration/resources/dat/sync2_expectedrecords_fullrefresh_overwrite_raw.jsonl b/airbyte-integrations/connectors/destination-snowflake/src/test-integration/resources/dat/sync2_expectedrecords_fullrefresh_overwrite_raw.jsonl index 2607c9f73a49..7ea21e905fe2 100644 --- a/airbyte-integrations/connectors/destination-snowflake/src/test-integration/resources/dat/sync2_expectedrecords_fullrefresh_overwrite_raw.jsonl +++ b/airbyte-integrations/connectors/destination-snowflake/src/test-integration/resources/dat/sync2_expectedrecords_fullrefresh_overwrite_raw.jsonl @@ -1,3 +1,3 @@ -{"_airbyte_extracted_at": "1970-01-01T00:00:02.000000000-08:00", "_airbyte_data": {"id1": 1, "id2": 200, "updated_at": "2000-01-02T00:00:00Z", "_ab_cdc_deleted_at": null, "name": "Alice", "address": {"city": "Seattle", "state": "WA"}}} -{"_airbyte_extracted_at": "1970-01-01T00:00:02.000000000-08:00", "_airbyte_data": {"id1": 1, "id2": 201, "updated_at": "2000-01-02T00:00:00Z", "_ab_cdc_deleted_at": null, "name": "Bob", "address": {"city": "New York", "state": "NY"}}} -{"_airbyte_extracted_at": "1970-01-01T00:00:02.000000000-08:00", "_airbyte_data": {"id1": 1, "id2": 201, "updated_at": "2000-01-02T00:01:00Z", "_ab_cdc_deleted_at": "1970-01-01T00:00:00Z"}} +{"_airbyte_extracted_at": "1970-01-01T00:00:02.000000000Z", "_airbyte_data": {"id1": 1, "id2": 200, "updated_at": "2000-01-02T00:00:00Z", "_ab_cdc_deleted_at": null, "name": "Alice", "address": {"city": "Seattle", "state": "WA"}}} +{"_airbyte_extracted_at": "1970-01-01T00:00:02.000000000Z", "_airbyte_data": {"id1": 1, "id2": 201, "updated_at": "2000-01-02T00:00:00Z", "_ab_cdc_deleted_at": null, "name": "Bob", "address": {"city": "New York", "state": "NY"}}} +{"_airbyte_extracted_at": "1970-01-01T00:00:02.000000000Z", "_airbyte_data": {"id1": 1, "id2": 201, "updated_at": "2000-01-02T00:01:00Z", "_ab_cdc_deleted_at": "1970-01-01T00:00:00Z"}} diff --git a/airbyte-integrations/connectors/destination-snowflake/src/test-integration/resources/dat/sync2_expectedrecords_incremental_dedup_final.jsonl b/airbyte-integrations/connectors/destination-snowflake/src/test-integration/resources/dat/sync2_expectedrecords_incremental_dedup_final.jsonl index 2f7a58c51499..02e36c558939 100644 --- a/airbyte-integrations/connectors/destination-snowflake/src/test-integration/resources/dat/sync2_expectedrecords_incremental_dedup_final.jsonl +++ b/airbyte-integrations/connectors/destination-snowflake/src/test-integration/resources/dat/sync2_expectedrecords_incremental_dedup_final.jsonl @@ -1,4 +1,4 @@ -{"_AIRBYTE_EXTRACTED_AT": "1970-01-01T00:00:02.000000000-08:00", "_AIRBYTE_META":{"errors":[]}, "ID1": 1, "ID2": 200, "UPDATED_AT": "2000-01-02T00:00:00.000000000Z", "NAME": "Alice", "ADDRESS": {"city": "Seattle", "state": "WA"}} +{"_AIRBYTE_EXTRACTED_AT": "1970-01-01T00:00:02.000000000Z", "_AIRBYTE_META":{"errors":[]}, "ID1": 1, "ID2": 200, "UPDATED_AT": "2000-01-02T00:00:00.000000000Z", "NAME": "Alice", "ADDRESS": {"city": "Seattle", "state": "WA"}} // Delete Bob, keep Charlie -{"_AIRBYTE_EXTRACTED_AT": "1970-01-01T00:00:01.000000000-08:00", "_AIRBYTE_META": {"errors":["Problem with `age`", "Problem with `registration_date`"]}, "ID1": 2, "ID2": 200, "UPDATED_AT": "2000-01-01T00:03:00.000000000Z", "NAME": "Charlie"} -{"_AIRBYTE_EXTRACTED_AT": "1970-01-01T00:00:01.000000000-08:00", "_AIRBYTE_META": {"errors":[]}, "ID1": 3, "ID2": 200, "UPDATED_AT": "2000-01-01T00:04:00.000000000Z", "NAME": "a\bb\fc\nd\re\tf`~!@#$%^&*()_+-=[]\\{}|'\",./<>?"} +{"_AIRBYTE_EXTRACTED_AT": "1970-01-01T00:00:01.000000000Z", "_AIRBYTE_META": {"errors":["Problem with `age`", "Problem with `registration_date`"]}, "ID1": 2, "ID2": 200, "UPDATED_AT": "2000-01-01T00:03:00.000000000Z", "NAME": "Charlie"} +{"_AIRBYTE_EXTRACTED_AT": "1970-01-01T00:00:01.000000000Z", "_AIRBYTE_META": {"errors":[]}, "ID1": 3, "ID2": 200, "UPDATED_AT": "2000-01-01T00:04:00.000000000Z", "NAME": "a\bb\fc\nd\re\tf`~!@#$%^&*()_+-=[]\\{}|'\",./<>?"} diff --git a/airbyte-integrations/connectors/destination-snowflake/src/test-integration/resources/dat/sync2_expectedrecords_incremental_dedup_final2.jsonl b/airbyte-integrations/connectors/destination-snowflake/src/test-integration/resources/dat/sync2_expectedrecords_incremental_dedup_final2.jsonl index b86eb147ba89..1eefb353ce6b 100644 --- a/airbyte-integrations/connectors/destination-snowflake/src/test-integration/resources/dat/sync2_expectedrecords_incremental_dedup_final2.jsonl +++ b/airbyte-integrations/connectors/destination-snowflake/src/test-integration/resources/dat/sync2_expectedrecords_incremental_dedup_final2.jsonl @@ -1 +1 @@ -{"_AIRBYTE_EXTRACTED_AT": "1970-01-01T00:00:02.000000000-08:00", "_AIRBYTE_META":{"errors":[]}, "ID1": 1, "ID2": 200, "UPDATED_AT": "2001-01-02T00:00:00.000000000Z", "NAME": "Someone completely different v2"} +{"_AIRBYTE_EXTRACTED_AT": "1970-01-01T00:00:02.000000000Z", "_AIRBYTE_META":{"errors":[]}, "ID1": 1, "ID2": 200, "UPDATED_AT": "2001-01-02T00:00:00.000000000Z", "NAME": "Someone completely different v2"} diff --git a/airbyte-integrations/connectors/destination-snowflake/src/test-integration/resources/dat/sync2_expectedrecords_raw.jsonl b/airbyte-integrations/connectors/destination-snowflake/src/test-integration/resources/dat/sync2_expectedrecords_raw.jsonl index d4bd6c49d4e7..2509cc47735e 100644 --- a/airbyte-integrations/connectors/destination-snowflake/src/test-integration/resources/dat/sync2_expectedrecords_raw.jsonl +++ b/airbyte-integrations/connectors/destination-snowflake/src/test-integration/resources/dat/sync2_expectedrecords_raw.jsonl @@ -1,10 +1,10 @@ // We keep the records from the first sync -{"_airbyte_extracted_at": "1970-01-01T00:00:01.000000000-08:00", "_airbyte_data": {"id1": 1, "id2": 200, "updated_at": "2000-01-01T00:00:00Z", "_ab_cdc_deleted_at": null, "name": "Alice", "address": {"city": "San Francisco", "state": "CA"}}} -{"_airbyte_extracted_at": "1970-01-01T00:00:01.000000000-08:00", "_airbyte_data": {"id1": 1, "id2": 200, "updated_at": "2000-01-01T00:01:00Z", "_ab_cdc_deleted_at": null, "name": "Alice", "address": {"city": "Los Angeles", "state": "CA"}}} -{"_airbyte_extracted_at": "1970-01-01T00:00:01.000000000-08:00", "_airbyte_data": {"id1": 1, "id2": 201, "updated_at": "2000-01-01T00:02:00Z", "name": "Bob", "address": {"city": "Boston", "state": "MA"}}} -{"_airbyte_extracted_at": "1970-01-01T00:00:01.000000000-08:00", "_airbyte_data": {"id1": 2, "id2": 200, "updated_at": "2000-01-01T00:03:00Z", "name": "Charlie", "age": "this is not an integer", "registration_date": "this is not a date"}} -{"_airbyte_extracted_at": "1970-01-01T00:00:01.000000000-08:00", "_airbyte_data": {"id1": 3, "id2": 200, "updated_at": "2000-01-01T00:04:00Z", "name": "a\bb\fc\nd\re\tf`~!@#$%^&*()_+-=[]\\{}|'\",./<>?"}} +{"_airbyte_extracted_at": "1970-01-01T00:00:01.000000000Z", "_airbyte_data": {"id1": 1, "id2": 200, "updated_at": "2000-01-01T00:00:00Z", "_ab_cdc_deleted_at": null, "name": "Alice", "address": {"city": "San Francisco", "state": "CA"}}} +{"_airbyte_extracted_at": "1970-01-01T00:00:01.000000000Z", "_airbyte_data": {"id1": 1, "id2": 200, "updated_at": "2000-01-01T00:01:00Z", "_ab_cdc_deleted_at": null, "name": "Alice", "address": {"city": "Los Angeles", "state": "CA"}}} +{"_airbyte_extracted_at": "1970-01-01T00:00:01.000000000Z", "_airbyte_data": {"id1": 1, "id2": 201, "updated_at": "2000-01-01T00:02:00Z", "name": "Bob", "address": {"city": "Boston", "state": "MA"}}} +{"_airbyte_extracted_at": "1970-01-01T00:00:01.000000000Z", "_airbyte_data": {"id1": 2, "id2": 200, "updated_at": "2000-01-01T00:03:00Z", "name": "Charlie", "age": "this is not an integer", "registration_date": "this is not a date"}} +{"_airbyte_extracted_at": "1970-01-01T00:00:01.000000000Z", "_airbyte_data": {"id1": 3, "id2": 200, "updated_at": "2000-01-01T00:04:00Z", "name": "a\bb\fc\nd\re\tf`~!@#$%^&*()_+-=[]\\{}|'\",./<>?"}} // And append the records from the second sync -{"_airbyte_extracted_at": "1970-01-01T00:00:02.000000000-08:00", "_airbyte_data": {"id1": 1, "id2": 200, "updated_at": "2000-01-02T00:00:00Z", "_ab_cdc_deleted_at": null, "name": "Alice", "address": {"city": "Seattle", "state": "WA"}}} -{"_airbyte_extracted_at": "1970-01-01T00:00:02.000000000-08:00", "_airbyte_data": {"id1": 1, "id2": 201, "updated_at": "2000-01-02T00:00:00Z", "_ab_cdc_deleted_at": null, "name": "Bob", "address": {"city": "New York", "state": "NY"}}} -{"_airbyte_extracted_at": "1970-01-01T00:00:02.000000000-08:00", "_airbyte_data": {"id1": 1, "id2": 201, "updated_at": "2000-01-02T00:01:00Z", "_ab_cdc_deleted_at": "1970-01-01T00:00:00Z"}} +{"_airbyte_extracted_at": "1970-01-01T00:00:02.000000000Z", "_airbyte_data": {"id1": 1, "id2": 200, "updated_at": "2000-01-02T00:00:00Z", "_ab_cdc_deleted_at": null, "name": "Alice", "address": {"city": "Seattle", "state": "WA"}}} +{"_airbyte_extracted_at": "1970-01-01T00:00:02.000000000Z", "_airbyte_data": {"id1": 1, "id2": 201, "updated_at": "2000-01-02T00:00:00Z", "_ab_cdc_deleted_at": null, "name": "Bob", "address": {"city": "New York", "state": "NY"}}} +{"_airbyte_extracted_at": "1970-01-01T00:00:02.000000000Z", "_airbyte_data": {"id1": 1, "id2": 201, "updated_at": "2000-01-02T00:01:00Z", "_ab_cdc_deleted_at": "1970-01-01T00:00:00Z"}} diff --git a/airbyte-integrations/connectors/destination-snowflake/src/test-integration/resources/dat/sync2_expectedrecords_raw2.jsonl b/airbyte-integrations/connectors/destination-snowflake/src/test-integration/resources/dat/sync2_expectedrecords_raw2.jsonl index 4d2e3167888c..0c8fd4eceab0 100644 --- a/airbyte-integrations/connectors/destination-snowflake/src/test-integration/resources/dat/sync2_expectedrecords_raw2.jsonl +++ b/airbyte-integrations/connectors/destination-snowflake/src/test-integration/resources/dat/sync2_expectedrecords_raw2.jsonl @@ -1,2 +1,2 @@ -{"_airbyte_extracted_at": "1970-01-01T00:00:01.000000000-08:00", "_airbyte_data": {"id1": 1, "id2": 200, "updated_at": "2001-01-01T00:00:00Z", "_ab_cdc_deleted_at": null, "name": "Someone completely different"}} -{"_airbyte_extracted_at": "1970-01-01T00:00:02.000000000-08:00", "_airbyte_data": {"id1": 1, "id2": 200, "updated_at": "2001-01-02T00:00:00Z", "_ab_cdc_deleted_at": null, "name": "Someone completely different v2"}} +{"_airbyte_extracted_at": "1970-01-01T00:00:01.000000000Z", "_airbyte_data": {"id1": 1, "id2": 200, "updated_at": "2001-01-01T00:00:00Z", "_ab_cdc_deleted_at": null, "name": "Someone completely different"}} +{"_airbyte_extracted_at": "1970-01-01T00:00:02.000000000Z", "_airbyte_data": {"id1": 1, "id2": 200, "updated_at": "2001-01-02T00:00:00Z", "_ab_cdc_deleted_at": null, "name": "Someone completely different v2"}} From 000e461c970f72ecb6ab00796a4bef44cc5e5d37 Mon Sep 17 00:00:00 2001 From: Edward Gao Date: Wed, 21 Feb 2024 16:55:08 -0800 Subject: [PATCH 02/33] fixups for snowflake timezone migration stuff --- .../SnowflakeInternalStagingDestination.java | 17 +++++--- .../SnowflakeDestinationHandler.java | 43 ++++++++++++++----- .../ExtractedAtUtcTimezoneMigration.kt | 1 + .../SnowflakeSqlGeneratorIntegrationTest.java | 4 +- 4 files changed, 47 insertions(+), 18 deletions(-) diff --git a/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/SnowflakeInternalStagingDestination.java b/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/SnowflakeInternalStagingDestination.java index 0edfad4399d0..d80542dab5f5 100644 --- a/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/SnowflakeInternalStagingDestination.java +++ b/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/SnowflakeInternalStagingDestination.java @@ -9,6 +9,7 @@ import io.airbyte.cdk.db.jdbc.JdbcDatabase; import io.airbyte.cdk.db.jdbc.JdbcUtils; import io.airbyte.cdk.integrations.base.Destination; +import io.airbyte.cdk.integrations.base.JavaBaseConstants; import io.airbyte.cdk.integrations.base.SerializedAirbyteMessageConsumer; import io.airbyte.cdk.integrations.base.TypingAndDedupingFlag; import io.airbyte.cdk.integrations.destination.NamingConventionTransformer; @@ -23,11 +24,13 @@ import io.airbyte.integrations.base.destination.typing_deduping.ParsedCatalog; import io.airbyte.integrations.base.destination.typing_deduping.TypeAndDedupeOperationValve; import io.airbyte.integrations.base.destination.typing_deduping.TyperDeduper; +import io.airbyte.integrations.base.destination.typing_deduping.migrators.Migration; import io.airbyte.integrations.destination.snowflake.typing_deduping.SnowflakeDestinationHandler; import io.airbyte.integrations.destination.snowflake.typing_deduping.SnowflakeSqlGenerator; import io.airbyte.integrations.destination.snowflake.typing_deduping.SnowflakeV1V2Migrator; import io.airbyte.integrations.destination.snowflake.typing_deduping.SnowflakeV2TableMigrator; import io.airbyte.integrations.destination.snowflake.typing_deduping.migrations.ExtractedAtUtcTimezoneMigration; +import io.airbyte.integrations.destination.snowflake.typing_deduping.migrations.SnowflakeState; import io.airbyte.protocol.models.v0.AirbyteConnectionStatus; import io.airbyte.protocol.models.v0.AirbyteMessage; import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog; @@ -133,7 +136,7 @@ protected JdbcSqlGenerator getSqlGenerator() { } @Override - protected JdbcDestinationHandler getDestinationHandler(String databaseName, JdbcDatabase database) { + protected JdbcDestinationHandler getDestinationHandler(String databaseName, JdbcDatabase database) { throw new UnsupportedOperationException("Snowflake does not yet use the native JDBC DV2 interface"); } @@ -153,19 +156,23 @@ public SerializedAirbyteMessageConsumer getSerializedMessageConsumer(final JsonN final TyperDeduper typerDeduper; final JdbcDatabase database = getDatabase(getDataSource(config)); final String databaseName = config.get(JdbcUtils.DATABASE_KEY).asText(); - final SnowflakeDestinationHandler snowflakeDestinationHandler = new SnowflakeDestinationHandler(databaseName, database); + final String rawTableSchemaName; final CatalogParser catalogParser; if (TypingAndDedupingFlag.getRawNamespaceOverride(RAW_SCHEMA_OVERRIDE).isPresent()) { - catalogParser = new CatalogParser(sqlGenerator, TypingAndDedupingFlag.getRawNamespaceOverride(RAW_SCHEMA_OVERRIDE).get()); + rawTableSchemaName = TypingAndDedupingFlag.getRawNamespaceOverride(RAW_SCHEMA_OVERRIDE).get(); + catalogParser = new CatalogParser(sqlGenerator, rawTableSchemaName); } else { + rawTableSchemaName = JavaBaseConstants.DEFAULT_AIRBYTE_INTERNAL_NAMESPACE; catalogParser = new CatalogParser(sqlGenerator); } + final SnowflakeDestinationHandler snowflakeDestinationHandler = new SnowflakeDestinationHandler(databaseName, database, rawTableSchemaName); parsedCatalog = catalogParser.parseCatalog(catalog); final SnowflakeV1V2Migrator migrator = new SnowflakeV1V2Migrator(getNamingResolver(), database, databaseName); final SnowflakeV2TableMigrator v2TableMigrator = new SnowflakeV2TableMigrator(database, databaseName, sqlGenerator, snowflakeDestinationHandler); final boolean disableTypeDedupe = config.has(DISABLE_TYPE_DEDUPE) && config.get(DISABLE_TYPE_DEDUPE).asBoolean(false); + final List> migrations = List.of(new ExtractedAtUtcTimezoneMigration(database)); if (disableTypeDedupe) { - typerDeduper = new NoOpTyperDeduperWithV1V2Migrations<>(sqlGenerator, snowflakeDestinationHandler, parsedCatalog, migrator, v2TableMigrator); + typerDeduper = new NoOpTyperDeduperWithV1V2Migrations<>(sqlGenerator, snowflakeDestinationHandler, parsedCatalog, migrator, v2TableMigrator, migrations); } else { typerDeduper = new DefaultTyperDeduper<>( @@ -174,7 +181,7 @@ public SerializedAirbyteMessageConsumer getSerializedMessageConsumer(final JsonN parsedCatalog, migrator, v2TableMigrator, - List.of(new ExtractedAtUtcTimezoneMigration(database))); + migrations); } return StagingConsumerFactory.builder( diff --git a/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/typing_deduping/SnowflakeDestinationHandler.java b/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/typing_deduping/SnowflakeDestinationHandler.java index 296509e9aef6..92f297d850de 100644 --- a/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/typing_deduping/SnowflakeDestinationHandler.java +++ b/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/typing_deduping/SnowflakeDestinationHandler.java @@ -14,20 +14,21 @@ import io.airbyte.cdk.integrations.destination.jdbc.ColumnDefinition; import io.airbyte.cdk.integrations.destination.jdbc.TableDefinition; import io.airbyte.cdk.integrations.destination.jdbc.typing_deduping.JdbcDestinationHandler; +import io.airbyte.commons.json.Jsons; import io.airbyte.integrations.base.destination.typing_deduping.AirbyteProtocolType; import io.airbyte.integrations.base.destination.typing_deduping.AirbyteType; import io.airbyte.integrations.base.destination.typing_deduping.Array; import io.airbyte.integrations.base.destination.typing_deduping.ColumnId; import io.airbyte.integrations.base.destination.typing_deduping.DestinationInitialState; -import io.airbyte.integrations.base.destination.typing_deduping.DestinationInitialStateImpl; import io.airbyte.integrations.base.destination.typing_deduping.InitialRawTableState; import io.airbyte.integrations.base.destination.typing_deduping.Sql; import io.airbyte.integrations.base.destination.typing_deduping.StreamConfig; import io.airbyte.integrations.base.destination.typing_deduping.StreamId; -import io.airbyte.integrations.destination.snowflake.typing_deduping.migrations.SnowflakeState; import io.airbyte.integrations.base.destination.typing_deduping.Struct; import io.airbyte.integrations.base.destination.typing_deduping.Union; import io.airbyte.integrations.base.destination.typing_deduping.UnsupportedOneOf; +import io.airbyte.integrations.destination.snowflake.typing_deduping.migrations.SnowflakeState; +import io.airbyte.protocol.models.v0.AirbyteStreamNameNamespacePair; import java.sql.ResultSet; import java.sql.SQLException; import java.time.Instant; @@ -41,6 +42,7 @@ import java.util.stream.Collectors; import net.snowflake.client.jdbc.SnowflakeSQLException; import org.apache.commons.text.StringSubstitutor; +import org.jooq.SQLDialect; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -52,9 +54,11 @@ public class SnowflakeDestinationHandler extends JdbcDestinationHandler results = database.queryJsons(query, bindValues); @@ -128,7 +132,7 @@ public InitialRawTableState getInitialRawTableState(final StreamId id) throws Ex id.rawName(), null); if (!tables.next()) { - return new InitialRawTableState(false, Optional.empty()); + return new InitialRawTableState(false, false, Optional.empty()); } // Snowflake timestamps have nanosecond precision, so decrement by 1ns // And use two explicit queries because COALESCE doesn't short-circuit. @@ -147,7 +151,7 @@ SELECT to_varchar( // The query will always return exactly one record, so use .get(0) record -> record.getString("MIN_TIMESTAMP")).get(0)); if (minUnloadedTimestamp.isPresent()) { - return new InitialRawTableState(true, minUnloadedTimestamp.map(Instant::parse)); + return new InitialRawTableState(true, true, minUnloadedTimestamp.map(Instant::parse)); } // If there are no unloaded raw records, then we can safely skip all existing raw records. @@ -163,7 +167,7 @@ SELECT to_varchar( FROM ${raw_table} """)), record -> record.getString("MIN_TIMESTAMP")).get(0)); - return new InitialRawTableState(false, maxTimestamp.map(Instant::parse)); + return new InitialRawTableState(true, false, maxTimestamp.map(Instant::parse)); } @Override @@ -251,7 +255,9 @@ protected boolean existingSchemaMatchesStreamConfig(final StreamConfig stream, f } @Override - public List gatherInitialState(List streamConfigs) throws Exception { + public List> gatherInitialState(List streamConfigs) throws Exception { + final Map destinationStates = super.getAllDestinationStates(); + List streamIds = streamConfigs.stream().map(StreamConfig::id).toList(); final LinkedHashMap> existingTables = findExistingTables(database, databaseName, streamIds); final LinkedHashMap> tableRowCounts = getFinalTableRowCount(streamIds); @@ -269,7 +275,14 @@ public List gatherInitialState(List strea isFinalTableEmpty = hasRowCount && tableRowCounts.get(namespace).get(name) == 0; } final InitialRawTableState initialRawTableState = getInitialRawTableState(streamConfig.id()); - return new DestinationInitialStateImpl(streamConfig, isFinalTablePresent, initialRawTableState, isSchemaMismatch, isFinalTableEmpty); + final SnowflakeState destinationState = destinationStates.getOrDefault(streamConfig.id().asPair(), toDestinationState(Jsons.emptyObject())); + return new DestinationInitialState<>( + streamConfig, + isFinalTablePresent, + initialRawTableState, + isSchemaMismatch, + isFinalTableEmpty, + destinationState); } catch (Exception e) { throw new RuntimeException(e); } @@ -291,6 +304,14 @@ protected String toJdbcTypeName(AirbyteType airbyteType) { }; } + @Override + protected SnowflakeState toDestinationState(JsonNode json) { + // TODO will jackson deser handle the null -> false conversion for us? + return new SnowflakeState( + json.hasNonNull("needsSoftReset") && json.get("needsSoftReset").asBoolean(), + json.hasNonNull("extractedAtInUtc") && json.get("extractedAtInUtc").asBoolean()); + } + private String toJdbcTypeName(final AirbyteProtocolType airbyteProtocolType) { return switch (airbyteProtocolType) { case STRING -> "TEXT"; diff --git a/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/typing_deduping/migrations/ExtractedAtUtcTimezoneMigration.kt b/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/typing_deduping/migrations/ExtractedAtUtcTimezoneMigration.kt index 57d8fb3b46de..948e2ba55ca7 100644 --- a/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/typing_deduping/migrations/ExtractedAtUtcTimezoneMigration.kt +++ b/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/typing_deduping/migrations/ExtractedAtUtcTimezoneMigration.kt @@ -77,6 +77,7 @@ class ExtractedAtUtcTimezoneMigration(private val database: JdbcDatabase) : Migr ).getSQL(ParamType.INLINED)) ) + // We've executed the migration. Update the state and trigger a soft reset. // Invalidate the initial state - we've modified all the extracted_at timestamps, so need to refetch them. return Migration.MigrationResult(state.destinationState.copy(needsSoftReset = true, extractedAtInUtc = true), true) } diff --git a/airbyte-integrations/connectors/destination-snowflake/src/test-integration/java/io/airbyte/integrations/destination/snowflake/typing_deduping/SnowflakeSqlGeneratorIntegrationTest.java b/airbyte-integrations/connectors/destination-snowflake/src/test-integration/java/io/airbyte/integrations/destination/snowflake/typing_deduping/SnowflakeSqlGeneratorIntegrationTest.java index fe1f47fc2d85..f4727bfee249 100644 --- a/airbyte-integrations/connectors/destination-snowflake/src/test-integration/java/io/airbyte/integrations/destination/snowflake/typing_deduping/SnowflakeSqlGeneratorIntegrationTest.java +++ b/airbyte-integrations/connectors/destination-snowflake/src/test-integration/java/io/airbyte/integrations/destination/snowflake/typing_deduping/SnowflakeSqlGeneratorIntegrationTest.java @@ -72,7 +72,7 @@ protected SnowflakeSqlGenerator getSqlGenerator() { @Override protected SnowflakeDestinationHandler getDestinationHandler() { - return new SnowflakeDestinationHandler(databaseName, database); + return new SnowflakeDestinationHandler(databaseName, database, namespace.toUpperCase()); } @Override @@ -413,7 +413,7 @@ public void ensurePKsAreIndexedUnique() throws Exception { // should be OK with new tables destinationHandler.execute(createTable); - List initialStates = destinationHandler.gatherInitialState(List.of(incrementalDedupStream)); + List> initialStates = destinationHandler.gatherInitialState(List.of(incrementalDedupStream)); assertEquals(1, initialStates.size()); assertFalse(initialStates.get(0).isSchemaMismatch()); destinationHandler.execute(Sql.of("DROP TABLE " + streamId.finalTableId(""))); From 37bfa77530dc9736e4a089e4d5d3f67806b09549 Mon Sep 17 00:00:00 2001 From: Edward Gao Date: Fri, 23 Feb 2024 13:40:10 -0800 Subject: [PATCH 03/33] fix build --- .../snowflake/SnowflakeInternalStagingDestination.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/SnowflakeInternalStagingDestination.java b/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/SnowflakeInternalStagingDestination.java index d80542dab5f5..6a406ebc0354 100644 --- a/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/SnowflakeInternalStagingDestination.java +++ b/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/SnowflakeInternalStagingDestination.java @@ -136,7 +136,7 @@ protected JdbcSqlGenerator getSqlGenerator() { } @Override - protected JdbcDestinationHandler getDestinationHandler(String databaseName, JdbcDatabase database) { + protected JdbcDestinationHandler getDestinationHandler(String databaseName, JdbcDatabase database, String rawTableSchema) { throw new UnsupportedOperationException("Snowflake does not yet use the native JDBC DV2 interface"); } From 5a380b3bf4db6fb2227322d8d72cfc803933efbe Mon Sep 17 00:00:00 2001 From: Edward Gao Date: Fri, 23 Feb 2024 15:30:21 -0800 Subject: [PATCH 04/33] format --- .../snowflake/SnowflakeInternalStagingDestination.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/SnowflakeInternalStagingDestination.java b/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/SnowflakeInternalStagingDestination.java index 6a406ebc0354..63856ce600c7 100644 --- a/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/SnowflakeInternalStagingDestination.java +++ b/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/SnowflakeInternalStagingDestination.java @@ -172,7 +172,8 @@ public SerializedAirbyteMessageConsumer getSerializedMessageConsumer(final JsonN final boolean disableTypeDedupe = config.has(DISABLE_TYPE_DEDUPE) && config.get(DISABLE_TYPE_DEDUPE).asBoolean(false); final List> migrations = List.of(new ExtractedAtUtcTimezoneMigration(database)); if (disableTypeDedupe) { - typerDeduper = new NoOpTyperDeduperWithV1V2Migrations<>(sqlGenerator, snowflakeDestinationHandler, parsedCatalog, migrator, v2TableMigrator, migrations); + typerDeduper = + new NoOpTyperDeduperWithV1V2Migrations<>(sqlGenerator, snowflakeDestinationHandler, parsedCatalog, migrator, v2TableMigrator, migrations); } else { typerDeduper = new DefaultTyperDeduper<>( From e4e467bb25eb33bc23b4b1953db27ecd5201a83b Mon Sep 17 00:00:00 2001 From: Edward Gao Date: Mon, 26 Feb 2024 12:36:38 -0800 Subject: [PATCH 05/33] also skip for overwrite sync mode --- .../migrations/ExtractedAtUtcTimezoneMigration.kt | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/typing_deduping/migrations/ExtractedAtUtcTimezoneMigration.kt b/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/typing_deduping/migrations/ExtractedAtUtcTimezoneMigration.kt index 948e2ba55ca7..5f778ce30cc1 100644 --- a/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/typing_deduping/migrations/ExtractedAtUtcTimezoneMigration.kt +++ b/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/typing_deduping/migrations/ExtractedAtUtcTimezoneMigration.kt @@ -7,6 +7,7 @@ import io.airbyte.cdk.integrations.base.JavaBaseConstants import io.airbyte.commons.json.Jsons import io.airbyte.integrations.base.destination.typing_deduping.* import io.airbyte.integrations.base.destination.typing_deduping.migrators.Migration +import io.airbyte.protocol.models.v0.DestinationSyncMode import org.jooq.Field import org.jooq.conf.ParamType import org.jooq.impl.DSL.* @@ -32,7 +33,12 @@ class ExtractedAtUtcTimezoneMigration(private val database: JdbcDatabase) : Migr override fun migrateIfNecessary(destinationHandler: DestinationHandler, stream: StreamConfig, state: DestinationInitialState): Migration.MigrationResult { if (!state.initialRawTableState.rawTableExists) { // The raw table doesn't exist. No migration necessary. Update the state. - logger.info("Skipping ExtractedAtUtcTimezoneMigration for ${state.streamConfig.id.originalNamespace}.${state.streamConfig.id.originalName} because the raw table doesn't exist") + logger.info("Skipping ExtractedAtUtcTimezoneMigration for ${stream.id.originalNamespace}.${stream.id.originalName} because the raw table doesn't exist") + return Migration.MigrationResult(state.destinationState.copy(extractedAtInUtc = true), false) + } + if (stream.destinationSyncMode == DestinationSyncMode.OVERWRITE) { + // We're nuking the data for this stream. No migration necessary. Update the state. + logger.info("Skipping ExtractedAtUtcTimezoneMigration for ${stream.id.originalNamespace}.${stream.id.originalName} because the sync mode is OVERWRITE.") return Migration.MigrationResult(state.destinationState.copy(extractedAtInUtc = true), false) } @@ -55,11 +61,11 @@ class ExtractedAtUtcTimezoneMigration(private val database: JdbcDatabase) : Migr if (rawRecordTimezone == null || (rawRecordTimezone.get("tzh").intValue() == 0 && rawRecordTimezone.get("tzm").intValue() == 0)) { // There are no raw records, or the raw records are already in UTC. No migration necessary. Update the state. - logger.info("Skipping ExtractedAtUtcTimezoneMigration for ${state.streamConfig.id.originalNamespace}.${state.streamConfig.id.originalName} because the raw table doesn't contain records needing migration.") + logger.info("Skipping ExtractedAtUtcTimezoneMigration for ${stream.id.originalNamespace}.${stream.id.originalName} because the raw table doesn't contain records needing migration.") return Migration.MigrationResult(state.destinationState.copy(extractedAtInUtc = true), false) } - logger.info("Executing ExtractedAtUtcTimezoneMigration for ${state.streamConfig.id.originalNamespace}.${state.streamConfig.id.originalName} for real.") + logger.info("Executing ExtractedAtUtcTimezoneMigration for ${stream.id.originalNamespace}.${stream.id.originalName} for real.") destinationHandler.execute(Sql.of( update(table(quotedName(stream.id().rawNamespace, stream.id().rawName))) From 4ac67b9eccfb7ad1e49e22424fea009ad1fde84f Mon Sep 17 00:00:00 2001 From: Gireesh Sreepathi Date: Thu, 29 Feb 2024 17:19:10 -0800 Subject: [PATCH 06/33] fix compilation errors post rebase --- .../SnowflakeDestinationHandler.java | 18 +++++++++--------- .../ExtractedAtUtcTimezoneMigration.kt | 13 ++++++------- 2 files changed, 15 insertions(+), 16 deletions(-) diff --git a/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/typing_deduping/SnowflakeDestinationHandler.java b/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/typing_deduping/SnowflakeDestinationHandler.java index 92f297d850de..97be4f96af24 100644 --- a/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/typing_deduping/SnowflakeDestinationHandler.java +++ b/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/typing_deduping/SnowflakeDestinationHandler.java @@ -19,8 +19,8 @@ import io.airbyte.integrations.base.destination.typing_deduping.AirbyteType; import io.airbyte.integrations.base.destination.typing_deduping.Array; import io.airbyte.integrations.base.destination.typing_deduping.ColumnId; -import io.airbyte.integrations.base.destination.typing_deduping.DestinationInitialState; -import io.airbyte.integrations.base.destination.typing_deduping.InitialRawTableState; +import io.airbyte.integrations.base.destination.typing_deduping.DestinationInitialStatus; +import io.airbyte.integrations.base.destination.typing_deduping.InitialRawTableStatus; import io.airbyte.integrations.base.destination.typing_deduping.Sql; import io.airbyte.integrations.base.destination.typing_deduping.StreamConfig; import io.airbyte.integrations.base.destination.typing_deduping.StreamId; @@ -125,14 +125,14 @@ AND table_name IN (%s) return tableRowCounts; } - public InitialRawTableState getInitialRawTableState(final StreamId id) throws Exception { + public InitialRawTableStatus getInitialRawTableState(final StreamId id) throws Exception { final ResultSet tables = database.getMetaData().getTables( databaseName, id.rawNamespace(), id.rawName(), null); if (!tables.next()) { - return new InitialRawTableState(false, false, Optional.empty()); + return new InitialRawTableStatus(false, false, Optional.empty()); } // Snowflake timestamps have nanosecond precision, so decrement by 1ns // And use two explicit queries because COALESCE doesn't short-circuit. @@ -151,7 +151,7 @@ SELECT to_varchar( // The query will always return exactly one record, so use .get(0) record -> record.getString("MIN_TIMESTAMP")).get(0)); if (minUnloadedTimestamp.isPresent()) { - return new InitialRawTableState(true, true, minUnloadedTimestamp.map(Instant::parse)); + return new InitialRawTableStatus(true, true, minUnloadedTimestamp.map(Instant::parse)); } // If there are no unloaded raw records, then we can safely skip all existing raw records. @@ -167,7 +167,7 @@ SELECT to_varchar( FROM ${raw_table} """)), record -> record.getString("MIN_TIMESTAMP")).get(0)); - return new InitialRawTableState(true, false, maxTimestamp.map(Instant::parse)); + return new InitialRawTableStatus(true, false, maxTimestamp.map(Instant::parse)); } @Override @@ -255,7 +255,7 @@ protected boolean existingSchemaMatchesStreamConfig(final StreamConfig stream, f } @Override - public List> gatherInitialState(List streamConfigs) throws Exception { + public List> gatherInitialState(List streamConfigs) throws Exception { final Map destinationStates = super.getAllDestinationStates(); List streamIds = streamConfigs.stream().map(StreamConfig::id).toList(); @@ -274,9 +274,9 @@ public List> gatherInitialState(List( + return new DestinationInitialStatus<>( streamConfig, isFinalTablePresent, initialRawTableState, diff --git a/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/typing_deduping/migrations/ExtractedAtUtcTimezoneMigration.kt b/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/typing_deduping/migrations/ExtractedAtUtcTimezoneMigration.kt index 5f778ce30cc1..f3f9a65849e9 100644 --- a/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/typing_deduping/migrations/ExtractedAtUtcTimezoneMigration.kt +++ b/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/typing_deduping/migrations/ExtractedAtUtcTimezoneMigration.kt @@ -26,12 +26,8 @@ actual migration. class ExtractedAtUtcTimezoneMigration(private val database: JdbcDatabase) : Migration { private val logger: Logger = LoggerFactory.getLogger(ExtractedAtUtcTimezoneMigration::class.java) - override fun requireMigration(state: SnowflakeState): Boolean { - return !state.extractedAtInUtc - } - - override fun migrateIfNecessary(destinationHandler: DestinationHandler, stream: StreamConfig, state: DestinationInitialState): Migration.MigrationResult { - if (!state.initialRawTableState.rawTableExists) { + override fun migrateIfNecessary(destinationHandler: DestinationHandler, stream: StreamConfig, state: DestinationInitialStatus): Migration.MigrationResult { + if (!state.initialRawTableStatus.rawTableExists) { // The raw table doesn't exist. No migration necessary. Update the state. logger.info("Skipping ExtractedAtUtcTimezoneMigration for ${stream.id.originalNamespace}.${stream.id.originalName} because the raw table doesn't exist") return Migration.MigrationResult(state.destinationState.copy(extractedAtInUtc = true), false) @@ -41,7 +37,10 @@ class ExtractedAtUtcTimezoneMigration(private val database: JdbcDatabase) : Migr logger.info("Skipping ExtractedAtUtcTimezoneMigration for ${stream.id.originalNamespace}.${stream.id.originalName} because the sync mode is OVERWRITE.") return Migration.MigrationResult(state.destinationState.copy(extractedAtInUtc = true), false) } - + if (state.destinationState.extractedAtInUtc) { + logger.info("Skipping ExtractedAtUtcTimezoneMigration for ${stream.id.originalNamespace}.${stream.id.originalName}, already done.") + return Migration.MigrationResult(state.destinationState, false) + } val rawRecordTimezone: JsonNode? = database.queryJsons( { connection -> connection.prepareStatement( From 927b3d876d87fc8777960ebd4e5bf295e08bd326 Mon Sep 17 00:00:00 2001 From: Gireesh Sreepathi Date: Fri, 1 Mar 2024 10:25:03 -0800 Subject: [PATCH 07/33] fix compilation in test --- .../SnowflakeSqlGeneratorIntegrationTest.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/airbyte-integrations/connectors/destination-snowflake/src/test-integration/java/io/airbyte/integrations/destination/snowflake/typing_deduping/SnowflakeSqlGeneratorIntegrationTest.java b/airbyte-integrations/connectors/destination-snowflake/src/test-integration/java/io/airbyte/integrations/destination/snowflake/typing_deduping/SnowflakeSqlGeneratorIntegrationTest.java index f4727bfee249..877c5419da9a 100644 --- a/airbyte-integrations/connectors/destination-snowflake/src/test-integration/java/io/airbyte/integrations/destination/snowflake/typing_deduping/SnowflakeSqlGeneratorIntegrationTest.java +++ b/airbyte-integrations/connectors/destination-snowflake/src/test-integration/java/io/airbyte/integrations/destination/snowflake/typing_deduping/SnowflakeSqlGeneratorIntegrationTest.java @@ -22,7 +22,7 @@ import io.airbyte.commons.io.IOs; import io.airbyte.commons.json.Jsons; import io.airbyte.integrations.base.destination.typing_deduping.BaseSqlGeneratorIntegrationTest; -import io.airbyte.integrations.base.destination.typing_deduping.DestinationInitialState; +import io.airbyte.integrations.base.destination.typing_deduping.DestinationInitialStatus; import io.airbyte.integrations.base.destination.typing_deduping.Sql; import io.airbyte.integrations.base.destination.typing_deduping.StreamId; import io.airbyte.integrations.destination.snowflake.OssCloudEnvVarConsts; @@ -413,9 +413,9 @@ public void ensurePKsAreIndexedUnique() throws Exception { // should be OK with new tables destinationHandler.execute(createTable); - List> initialStates = destinationHandler.gatherInitialState(List.of(incrementalDedupStream)); + List> initialStates = destinationHandler.gatherInitialState(List.of(incrementalDedupStream)); assertEquals(1, initialStates.size()); - assertFalse(initialStates.get(0).isSchemaMismatch()); + assertFalse(initialStates.getFirst().isSchemaMismatch()); destinationHandler.execute(Sql.of("DROP TABLE " + streamId.finalTableId(""))); // Hack the create query to add NOT NULLs to emulate the old behavior From d811907106996aafc965bbcfb13650953c3b5a56 Mon Sep 17 00:00:00 2001 From: Gireesh Sreepathi Date: Fri, 1 Mar 2024 10:29:06 -0800 Subject: [PATCH 08/33] pr comments --- .../snowflake/typing_deduping/SnowflakeDestinationHandler.java | 1 - .../typing_deduping/SnowflakeSqlGeneratorIntegrationTest.java | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/typing_deduping/SnowflakeDestinationHandler.java b/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/typing_deduping/SnowflakeDestinationHandler.java index 97be4f96af24..ab26fdac7d8b 100644 --- a/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/typing_deduping/SnowflakeDestinationHandler.java +++ b/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/typing_deduping/SnowflakeDestinationHandler.java @@ -306,7 +306,6 @@ protected String toJdbcTypeName(AirbyteType airbyteType) { @Override protected SnowflakeState toDestinationState(JsonNode json) { - // TODO will jackson deser handle the null -> false conversion for us? return new SnowflakeState( json.hasNonNull("needsSoftReset") && json.get("needsSoftReset").asBoolean(), json.hasNonNull("extractedAtInUtc") && json.get("extractedAtInUtc").asBoolean()); diff --git a/airbyte-integrations/connectors/destination-snowflake/src/test-integration/java/io/airbyte/integrations/destination/snowflake/typing_deduping/SnowflakeSqlGeneratorIntegrationTest.java b/airbyte-integrations/connectors/destination-snowflake/src/test-integration/java/io/airbyte/integrations/destination/snowflake/typing_deduping/SnowflakeSqlGeneratorIntegrationTest.java index 877c5419da9a..29c3b6be04df 100644 --- a/airbyte-integrations/connectors/destination-snowflake/src/test-integration/java/io/airbyte/integrations/destination/snowflake/typing_deduping/SnowflakeSqlGeneratorIntegrationTest.java +++ b/airbyte-integrations/connectors/destination-snowflake/src/test-integration/java/io/airbyte/integrations/destination/snowflake/typing_deduping/SnowflakeSqlGeneratorIntegrationTest.java @@ -72,7 +72,7 @@ protected SnowflakeSqlGenerator getSqlGenerator() { @Override protected SnowflakeDestinationHandler getDestinationHandler() { - return new SnowflakeDestinationHandler(databaseName, database, namespace.toUpperCase()); + return new SnowflakeDestinationHandler(databaseName, database, namespace); } @Override From f5c9e0eebfc245979737314c1705f889546ec1ce Mon Sep 17 00:00:00 2001 From: Gireesh Sreepathi Date: Fri, 1 Mar 2024 11:48:11 -0800 Subject: [PATCH 09/33] version bump logistics Signed-off-by: Gireesh Sreepathi --- .../connectors/destination-snowflake/build.gradle | 4 ++-- .../connectors/destination-snowflake/metadata.yaml | 2 +- docs/integrations/destinations/snowflake.md | 1 + 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/airbyte-integrations/connectors/destination-snowflake/build.gradle b/airbyte-integrations/connectors/destination-snowflake/build.gradle index 43c25808d469..4cc747506746 100644 --- a/airbyte-integrations/connectors/destination-snowflake/build.gradle +++ b/airbyte-integrations/connectors/destination-snowflake/build.gradle @@ -4,9 +4,9 @@ plugins { } airbyteJavaConnector { - cdkVersionRequired = '0.23.2' + cdkVersionRequired = '0.23.11' features = ['db-destinations', 's3-destinations', 'typing-deduping'] - useLocalCdk = true + useLocalCdk = false } java { diff --git a/airbyte-integrations/connectors/destination-snowflake/metadata.yaml b/airbyte-integrations/connectors/destination-snowflake/metadata.yaml index d39c5a8c9669..fc0c46bd82cc 100644 --- a/airbyte-integrations/connectors/destination-snowflake/metadata.yaml +++ b/airbyte-integrations/connectors/destination-snowflake/metadata.yaml @@ -5,7 +5,7 @@ data: connectorSubtype: database connectorType: destination definitionId: 424892c4-daac-4491-b35d-c6688ba547ba - dockerImageTag: 3.5.14 + dockerImageTag: 3.6.0 dockerRepository: airbyte/destination-snowflake documentationUrl: https://docs.airbyte.com/integrations/destinations/snowflake githubIssueLabel: destination-snowflake diff --git a/docs/integrations/destinations/snowflake.md b/docs/integrations/destinations/snowflake.md index 39be90148e99..966224d41e13 100644 --- a/docs/integrations/destinations/snowflake.md +++ b/docs/integrations/destinations/snowflake.md @@ -246,6 +246,7 @@ Otherwise, make sure to grant the role the required permissions in the desired n | Version | Date | Pull Request | Subject | |:----------------|:-----------|:-----------------------------------------------------------|:----------------------------------------------------------------------------------------------------------------------------------------------------------------| +| 3.6.0 | 2024-02-22 | [35308](https://github.com/airbytehq/airbyte/pull/35308) | Upgrade CDK; use utc tz for extracted_at; Migrate existing extracted_at to utc; | | 3.5.14 | 2024-02-22 | [35456](https://github.com/airbytehq/airbyte/pull/35456) | Adopt CDK 0.23.0; Gather initial state upfront, reduce information_schema calls | | 3.5.13 | 2024-02-22 | [35569](https://github.com/airbytehq/airbyte/pull/35569) | Fix logging bug. | | 3.5.12 | 2024-02-15 | [35240](https://github.com/airbytehq/airbyte/pull/35240) | Adopt CDK 0.20.9 | From c109d59fbfb0e551a1c2354caab37e96d32eb5d1 Mon Sep 17 00:00:00 2001 From: Gireesh Sreepathi Date: Fri, 1 Mar 2024 12:40:41 -0800 Subject: [PATCH 10/33] uppercase rawNamespace for test --- .../typing_deduping/SnowflakeSqlGeneratorIntegrationTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/airbyte-integrations/connectors/destination-snowflake/src/test-integration/java/io/airbyte/integrations/destination/snowflake/typing_deduping/SnowflakeSqlGeneratorIntegrationTest.java b/airbyte-integrations/connectors/destination-snowflake/src/test-integration/java/io/airbyte/integrations/destination/snowflake/typing_deduping/SnowflakeSqlGeneratorIntegrationTest.java index 29c3b6be04df..877c5419da9a 100644 --- a/airbyte-integrations/connectors/destination-snowflake/src/test-integration/java/io/airbyte/integrations/destination/snowflake/typing_deduping/SnowflakeSqlGeneratorIntegrationTest.java +++ b/airbyte-integrations/connectors/destination-snowflake/src/test-integration/java/io/airbyte/integrations/destination/snowflake/typing_deduping/SnowflakeSqlGeneratorIntegrationTest.java @@ -72,7 +72,7 @@ protected SnowflakeSqlGenerator getSqlGenerator() { @Override protected SnowflakeDestinationHandler getDestinationHandler() { - return new SnowflakeDestinationHandler(databaseName, database, namespace); + return new SnowflakeDestinationHandler(databaseName, database, namespace.toUpperCase()); } @Override From 0d9d66d86f9efafce2bedcde16f47a9154da709e Mon Sep 17 00:00:00 2001 From: Gireesh Sreepathi Date: Mon, 4 Mar 2024 10:38:24 -0800 Subject: [PATCH 11/33] skip minstamp for overwrite --- .../typing_deduping/SnowflakeDestinationHandler.java | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/typing_deduping/SnowflakeDestinationHandler.java b/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/typing_deduping/SnowflakeDestinationHandler.java index ab26fdac7d8b..48b925c0889c 100644 --- a/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/typing_deduping/SnowflakeDestinationHandler.java +++ b/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/typing_deduping/SnowflakeDestinationHandler.java @@ -29,6 +29,7 @@ import io.airbyte.integrations.base.destination.typing_deduping.UnsupportedOneOf; import io.airbyte.integrations.destination.snowflake.typing_deduping.migrations.SnowflakeState; import io.airbyte.protocol.models.v0.AirbyteStreamNameNamespacePair; +import io.airbyte.protocol.models.v0.DestinationSyncMode; import java.sql.ResultSet; import java.sql.SQLException; import java.time.Instant; @@ -125,7 +126,11 @@ AND table_name IN (%s) return tableRowCounts; } - public InitialRawTableStatus getInitialRawTableState(final StreamId id) throws Exception { + private InitialRawTableStatus getInitialRawTableState(final StreamId id, final DestinationSyncMode destinationSyncMode) throws Exception { + // Short-circuit for overwrite, table will be truncated anyway + if (destinationSyncMode == DestinationSyncMode.OVERWRITE) { + return new InitialRawTableStatus(false, false, Optional.empty()); + } final ResultSet tables = database.getMetaData().getTables( databaseName, id.rawNamespace(), @@ -274,7 +279,7 @@ public List> gatherInitialState(List( streamConfig, From 6a4ec8856cb13d1680bbb8397d134a937e0e93b7 Mon Sep 17 00:00:00 2001 From: Gireesh Sreepathi Date: Mon, 4 Mar 2024 11:00:09 -0800 Subject: [PATCH 12/33] fmt Signed-off-by: Gireesh Sreepathi --- .../ExtractedAtUtcTimezoneMigration.kt | 179 ++++++++++++------ .../migrations/SnowflakeState.kt | 20 +- 2 files changed, 132 insertions(+), 67 deletions(-) diff --git a/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/typing_deduping/migrations/ExtractedAtUtcTimezoneMigration.kt b/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/typing_deduping/migrations/ExtractedAtUtcTimezoneMigration.kt index f3f9a65849e9..86eb33fc5201 100644 --- a/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/typing_deduping/migrations/ExtractedAtUtcTimezoneMigration.kt +++ b/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/typing_deduping/migrations/ExtractedAtUtcTimezoneMigration.kt @@ -1,3 +1,7 @@ +/* + * Copyright (c) 2024 Airbyte, Inc., all rights reserved. + */ + package io.airbyte.integrations.destination.snowflake.typing_deduping.migrations import com.fasterxml.jackson.databind.JsonNode @@ -8,7 +12,6 @@ import io.airbyte.commons.json.Jsons import io.airbyte.integrations.base.destination.typing_deduping.* import io.airbyte.integrations.base.destination.typing_deduping.migrators.Migration import io.airbyte.protocol.models.v0.DestinationSyncMode -import org.jooq.Field import org.jooq.conf.ParamType import org.jooq.impl.DSL.* import org.slf4j.Logger @@ -23,68 +26,126 @@ since we need to query each table separately anyway. So we just take the databas _do_ still use destinationHandler.execute, because that gives us debug SQL logs for when we run the actual migration. */ -class ExtractedAtUtcTimezoneMigration(private val database: JdbcDatabase) : Migration { - private val logger: Logger = LoggerFactory.getLogger(ExtractedAtUtcTimezoneMigration::class.java) +class ExtractedAtUtcTimezoneMigration(private val database: JdbcDatabase) : + Migration { + private val logger: Logger = + LoggerFactory.getLogger(ExtractedAtUtcTimezoneMigration::class.java) - override fun migrateIfNecessary(destinationHandler: DestinationHandler, stream: StreamConfig, state: DestinationInitialStatus): Migration.MigrationResult { - if (!state.initialRawTableStatus.rawTableExists) { - // The raw table doesn't exist. No migration necessary. Update the state. - logger.info("Skipping ExtractedAtUtcTimezoneMigration for ${stream.id.originalNamespace}.${stream.id.originalName} because the raw table doesn't exist") - return Migration.MigrationResult(state.destinationState.copy(extractedAtInUtc = true), false) - } - if (stream.destinationSyncMode == DestinationSyncMode.OVERWRITE) { - // We're nuking the data for this stream. No migration necessary. Update the state. - logger.info("Skipping ExtractedAtUtcTimezoneMigration for ${stream.id.originalNamespace}.${stream.id.originalName} because the sync mode is OVERWRITE.") - return Migration.MigrationResult(state.destinationState.copy(extractedAtInUtc = true), false) - } - if (state.destinationState.extractedAtInUtc) { - logger.info("Skipping ExtractedAtUtcTimezoneMigration for ${stream.id.originalNamespace}.${stream.id.originalName}, already done.") - return Migration.MigrationResult(state.destinationState, false) - } - val rawRecordTimezone: JsonNode? = database.queryJsons( - { connection -> - connection.prepareStatement( - select( - field(sql("extract(timezone_hour from \"_airbyte_extracted_at\")")).`as`("tzh"), - field(sql("extract(timezone_minute from \"_airbyte_extracted_at\")")).`as`("tzm") - ).from(table(quotedName(stream.id().rawNamespace, stream.id().rawName))) - .limit(1) - .getSQL(ParamType.INLINED)) - }, - { rs -> - (Jsons.emptyObject() as ObjectNode) - .put("tzh", rs.getInt("tzh")) - .put("tzm", rs.getInt("tzm")) + override fun migrateIfNecessary( + destinationHandler: DestinationHandler, + stream: StreamConfig, + state: DestinationInitialStatus + ): Migration.MigrationResult { + if (!state.initialRawTableStatus.rawTableExists) { + // The raw table doesn't exist. No migration necessary. Update the state. + logger.info( + "Skipping ExtractedAtUtcTimezoneMigration for ${stream.id.originalNamespace}.${stream.id.originalName} because the raw table doesn't exist" + ) + return Migration.MigrationResult( + state.destinationState.copy(extractedAtInUtc = true), + false + ) + } + if (stream.destinationSyncMode == DestinationSyncMode.OVERWRITE) { + // We're nuking the data for this stream. No migration necessary. Update the state. + logger.info( + "Skipping ExtractedAtUtcTimezoneMigration for ${stream.id.originalNamespace}.${stream.id.originalName} because the sync mode is OVERWRITE." + ) + return Migration.MigrationResult( + state.destinationState.copy(extractedAtInUtc = true), + false + ) + } + if (state.destinationState.extractedAtInUtc) { + logger.info( + "Skipping ExtractedAtUtcTimezoneMigration for ${stream.id.originalNamespace}.${stream.id.originalName}, already done." + ) + return Migration.MigrationResult(state.destinationState, false) + } + val rawRecordTimezone: JsonNode? = + database + .queryJsons( + { connection -> + connection.prepareStatement( + select( + field( + sql( + "extract(timezone_hour from \"_airbyte_extracted_at\")" + ) + ) + .`as`("tzh"), + field( + sql( + "extract(timezone_minute from \"_airbyte_extracted_at\")" + ) + ) + .`as`("tzm") + ) + .from( + table(quotedName(stream.id().rawNamespace, stream.id().rawName)) + ) + .limit(1) + .getSQL(ParamType.INLINED) + ) + }, + { rs -> + (Jsons.emptyObject() as ObjectNode) + .put("tzh", rs.getInt("tzh")) + .put("tzm", rs.getInt("tzm")) + } + ) + .first() + if ( + rawRecordTimezone == null || + (rawRecordTimezone.get("tzh").intValue() == 0 && + rawRecordTimezone.get("tzm").intValue() == 0) + ) { + // There are no raw records, or the raw records are already in UTC. No migration + // necessary. Update the state. + logger.info( + "Skipping ExtractedAtUtcTimezoneMigration for ${stream.id.originalNamespace}.${stream.id.originalName} because the raw table doesn't contain records needing migration." + ) + return Migration.MigrationResult( + state.destinationState.copy(extractedAtInUtc = true), + false + ) } - ).first() - if (rawRecordTimezone == null - || (rawRecordTimezone.get("tzh").intValue() == 0 && rawRecordTimezone.get("tzm").intValue() == 0)) { - // There are no raw records, or the raw records are already in UTC. No migration necessary. Update the state. - logger.info("Skipping ExtractedAtUtcTimezoneMigration for ${stream.id.originalNamespace}.${stream.id.originalName} because the raw table doesn't contain records needing migration.") - return Migration.MigrationResult(state.destinationState.copy(extractedAtInUtc = true), false) - } - logger.info("Executing ExtractedAtUtcTimezoneMigration for ${stream.id.originalNamespace}.${stream.id.originalName} for real.") + logger.info( + "Executing ExtractedAtUtcTimezoneMigration for ${stream.id.originalNamespace}.${stream.id.originalName} for real." + ) - destinationHandler.execute(Sql.of( - update(table(quotedName(stream.id().rawNamespace, stream.id().rawName))) - .set( - field(quotedName(JavaBaseConstants.COLUMN_NAME_AB_EXTRACTED_AT)), - // this is the easiest way to forcibly set the offset on a timestamptz. - // We convert to timestamp_ntz to remove the offset, - // then convert to string and append a 'Z' offset, - // then convert back to timestamp_tz. - // We _could_ go through convert_timezone and manually add a negative offset number of hours - // but that's a lot more work for no real benefit. - field(sql(""" + destinationHandler.execute( + Sql.of( + update(table(quotedName(stream.id().rawNamespace, stream.id().rawName))) + .set( + field(quotedName(JavaBaseConstants.COLUMN_NAME_AB_EXTRACTED_AT)), + // this is the easiest way to forcibly set the offset on a timestamptz. + // We convert to timestamp_ntz to remove the offset, + // then convert to string and append a 'Z' offset, + // then convert back to timestamp_tz. + // We _could_ go through convert_timezone and manually add a negative offset + // number of hours + // but that's a lot more work for no real benefit. + field( + sql( + """ cast(cast(cast("_airbyte_extracted_at" as timestampntz) as string) || 'Z' as timestamptz) - """.trimIndent())) as Any - ).getSQL(ParamType.INLINED)) - ) - - // We've executed the migration. Update the state and trigger a soft reset. - // Invalidate the initial state - we've modified all the extracted_at timestamps, so need to refetch them. - return Migration.MigrationResult(state.destinationState.copy(needsSoftReset = true, extractedAtInUtc = true), true) - } + """.trimIndent() + ) + ) + as Any + ) + .getSQL(ParamType.INLINED) + ) + ) + // We've executed the migration. Update the state and trigger a soft reset. + // Invalidate the initial state - we've modified all the extracted_at timestamps, so need to + // refetch them. + return Migration.MigrationResult( + state.destinationState.copy(needsSoftReset = true, extractedAtInUtc = true), + true + ) + } } diff --git a/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/typing_deduping/migrations/SnowflakeState.kt b/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/typing_deduping/migrations/SnowflakeState.kt index bf40925dc4a2..f415b4afba50 100644 --- a/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/typing_deduping/migrations/SnowflakeState.kt +++ b/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/typing_deduping/migrations/SnowflakeState.kt @@ -1,16 +1,20 @@ +/* + * Copyright (c) 2024 Airbyte, Inc., all rights reserved. + */ + package io.airbyte.integrations.destination.snowflake.typing_deduping.migrations import io.airbyte.integrations.base.destination.typing_deduping.migrators.MinimumDestinationState // Note the nonnullable fields. Even though the underlying storage medium (a JSON blob) supports // nullability, we don't want to deal with that in our codebase. -data class SnowflakeState(val needsSoftReset: Boolean, - val extractedAtInUtc: Boolean): MinimumDestinationState { - override fun needsSoftReset(): Boolean { - return needsSoftReset - } +data class SnowflakeState(val needsSoftReset: Boolean, val extractedAtInUtc: Boolean) : + MinimumDestinationState { + override fun needsSoftReset(): Boolean { + return needsSoftReset + } - override fun withSoftReset(needsSoftReset: Boolean): T { - return copy(needsSoftReset = needsSoftReset) as T - } + override fun withSoftReset(needsSoftReset: Boolean): T { + return copy(needsSoftReset = needsSoftReset) as T + } } From 226dde4959199ef80fe9684d27e56bf5c4587c16 Mon Sep 17 00:00:00 2001 From: Gireesh Sreepathi Date: Mon, 4 Mar 2024 13:47:40 -0800 Subject: [PATCH 13/33] add explicit date format Signed-off-by: Gireesh Sreepathi --- .../migrations/ExtractedAtUtcTimezoneMigration.kt | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/typing_deduping/migrations/ExtractedAtUtcTimezoneMigration.kt b/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/typing_deduping/migrations/ExtractedAtUtcTimezoneMigration.kt index 86eb33fc5201..89cab09b0bd1 100644 --- a/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/typing_deduping/migrations/ExtractedAtUtcTimezoneMigration.kt +++ b/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/typing_deduping/migrations/ExtractedAtUtcTimezoneMigration.kt @@ -130,8 +130,12 @@ class ExtractedAtUtcTimezoneMigration(private val database: JdbcDatabase) : field( sql( """ - cast(cast(cast("_airbyte_extracted_at" as timestampntz) as string) || 'Z' as timestamptz) - """.trimIndent() + cast( + to_varchar( + cast("_airbyte_extracted_at" as timestampntz), + 'YYYY-MM-DDTHH24:MI:SS.FF9') + || 'Z' as timestamptz) + """.trimIndent() ) ) as Any From c7b8bffcbba0420790405e6fcf4760bbfe17c0e9 Mon Sep 17 00:00:00 2001 From: Gireesh Sreepathi Date: Mon, 4 Mar 2024 14:45:46 -0800 Subject: [PATCH 14/33] another nit tz func --- .../migrations/ExtractedAtUtcTimezoneMigration.kt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/typing_deduping/migrations/ExtractedAtUtcTimezoneMigration.kt b/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/typing_deduping/migrations/ExtractedAtUtcTimezoneMigration.kt index 89cab09b0bd1..a7b7adf468cf 100644 --- a/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/typing_deduping/migrations/ExtractedAtUtcTimezoneMigration.kt +++ b/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/typing_deduping/migrations/ExtractedAtUtcTimezoneMigration.kt @@ -130,11 +130,11 @@ class ExtractedAtUtcTimezoneMigration(private val database: JdbcDatabase) : field( sql( """ - cast( + to_timestamp_tz( to_varchar( cast("_airbyte_extracted_at" as timestampntz), 'YYYY-MM-DDTHH24:MI:SS.FF9') - || 'Z' as timestamptz) + || 'Z', 'YYYY-MM-DDTHH24:MI:SS.FF9TZH:TZM') """.trimIndent() ) ) From 0dad9c64891530da761a42577976f045511eaf7b Mon Sep 17 00:00:00 2001 From: Edward Gao Date: Tue, 5 Mar 2024 10:48:11 -0800 Subject: [PATCH 15/33] add tests?? --- .../SnowflakeSqlGeneratorIntegrationTest.java | 276 ++++++++++++++++++ 1 file changed, 276 insertions(+) diff --git a/airbyte-integrations/connectors/destination-snowflake/src/test-integration/java/io/airbyte/integrations/destination/snowflake/typing_deduping/SnowflakeSqlGeneratorIntegrationTest.java b/airbyte-integrations/connectors/destination-snowflake/src/test-integration/java/io/airbyte/integrations/destination/snowflake/typing_deduping/SnowflakeSqlGeneratorIntegrationTest.java index 877c5419da9a..902cb68b2a05 100644 --- a/airbyte-integrations/connectors/destination-snowflake/src/test-integration/java/io/airbyte/integrations/destination/snowflake/typing_deduping/SnowflakeSqlGeneratorIntegrationTest.java +++ b/airbyte-integrations/connectors/destination-snowflake/src/test-integration/java/io/airbyte/integrations/destination/snowflake/typing_deduping/SnowflakeSqlGeneratorIntegrationTest.java @@ -25,6 +25,7 @@ import io.airbyte.integrations.base.destination.typing_deduping.DestinationInitialStatus; import io.airbyte.integrations.base.destination.typing_deduping.Sql; import io.airbyte.integrations.base.destination.typing_deduping.StreamId; +import io.airbyte.integrations.base.destination.typing_deduping.TypeAndDedupeTransaction; import io.airbyte.integrations.destination.snowflake.OssCloudEnvVarConsts; import io.airbyte.integrations.destination.snowflake.SnowflakeDatabase; import io.airbyte.integrations.destination.snowflake.SnowflakeTestSourceOperations; @@ -432,4 +433,279 @@ public void ensurePKsAreIndexedUnique() throws Exception { assertTrue(initialStates.get(0).isSchemaMismatch()); } + // Note the null updated_at values. This forces us to rely on the extracted_at tiebreaker. + + @Test + public void dst_test_oldSyncRunsThroughTransition_thenNewSyncRuns() throws Exception { + this.createRawTable(this.streamId); + this.createFinalTable(this.incrementalDedupStream, ""); + this.insertRawTableRecords(this.streamId, List.of( + // 2 records written by a sync running on the old version of snowflake + Jsons.deserialize(""" + { + "_airbyte_raw_id": "pre-dst local tz 1", + "_airbyte_extracted_at": "2023-03-10T02:00:00-08:00", + "_airbyte_data": { + "id1": 1, + "id2": 100, + "string": "Alice00" + } + } + """), + Jsons.deserialize(""" + { + "_airbyte_raw_id": "post-dst local tz 2", + "_airbyte_extracted_at": "2023-03-10T02:01:00-07:00", + "_airbyte_data": { + "id1": 2, + "id2": 100, + "string": "Bob00" + } + } + """))); + // Gather initial state at the start of our updated sync + DestinationInitialStatus initialState = this.destinationHandler.gatherInitialState(List.of(this.incrementalDedupStream)).getFirst(); + this.insertRawTableRecords(this.streamId, List.of( + // insert raw records with updates + Jsons.deserialize(""" + { + "_airbyte_raw_id": "post-dst utc 1", + "_airbyte_extracted_at": "2023-03-10T02:02:00Z", + "_airbyte_data": { + "id1": 1, + "id2": 100, + "string": "Alice01" + } + } + """), + Jsons.deserialize(""" + { + "_airbyte_raw_id": "post-dst utc 2", + "_airbyte_extracted_at": "2023-03-10T02:02:00Z", + "_airbyte_data": { + "id1": 2, + "id2": 100, + "string": "Bob01" + } + } + """))); + + TypeAndDedupeTransaction.executeTypeAndDedupe(this.generator, this.destinationHandler, this.incrementalDedupStream, initialState.initialRawTableStatus().maxProcessedTimestamp(), ""); + + DIFFER.diffFinalTableRecords( + List.of( + Jsons.deserialize(""" + { + "_AIRBYTE_RAW_ID": "post-dst utc 1", + "_AIRBYTE_EXTRACTED_AT": "2023-03-10T02:02:00Z", + "ID1": 1, + "ID2": 100, + "STRING": "Alice01" + } + """), + Jsons.deserialize(""" + { + "_AIRBYTE_RAW_ID": "post-dst utc 2", + "_AIRBYTE_EXTRACTED_AT": "2023-03-10T02:02:00Z", + "ID1": 2, + "ID2": 100, + "STRING": "Bob01" + } + """)), + this.dumpFinalTableRecords(this.streamId, "")); + } + + @Test + public void dst_test_oldSyncRunsBeforeTransition_thenNewSyncRunsThroughTransition() throws Exception { + this.createRawTable(this.streamId); + this.createFinalTable(this.incrementalDedupStream, ""); + this.insertRawTableRecords(this.streamId, List.of( + // record written by a sync running on the old version of snowflake + Jsons.deserialize(""" + { + "_airbyte_raw_id": "pre-dst local tz 1", + "_airbyte_extracted_at": "2023-03-10T01:59:00-08:00", + "_airbyte_data": { + "id1": 1, + "id2": 100, + "string": "Alice00" + } + } + """))); + // Gather initial state at the start of our updated sync + DestinationInitialStatus initialState = this.destinationHandler.gatherInitialState(List.of(this.incrementalDedupStream)).getFirst(); + this.insertRawTableRecords(this.streamId, List.of( + // update the record twice + // this never really happens, but verify that it works + Jsons.deserialize(""" + { + "_airbyte_raw_id": "pre-dst utc 1", + "_airbyte_extracted_at": "2023-03-10T02:00:00Z", + "_airbyte_data": { + "id1": 1, + "id2": 100, + "string": "Alice01" + } + } + """), + Jsons.deserialize(""" + { + "_airbyte_raw_id": "post-dst utc 1", + "_airbyte_extracted_at": "2023-03-10T02:01:00Z", + "_airbyte_data": { + "id1": 1, + "id2": 100, + "string": "Alice02" + } + } + """))); + + TypeAndDedupeTransaction.executeTypeAndDedupe(this.generator, this.destinationHandler, this.incrementalDedupStream, initialState.initialRawTableStatus().maxProcessedTimestamp(), ""); + + DIFFER.diffFinalTableRecords( + List.of( + Jsons.deserialize(""" + { + "_AIRBYTE_RAW_ID": "post-dst utc 1", + "_AIRBYTE_EXTRACTED_AT": "2023-03-10T02:01:00Z", + "ID1": 1, + "ID2": 100, + "STRING": "Alice02" + } + """)), + this.dumpFinalTableRecords(this.streamId, "")); + } + + @Test + public void dst_test_oldSyncRunsBeforeTransition_thenNewSyncRunsBeforeTransition_thenNewSyncRunsThroughTransition() throws Exception { + this.createRawTable(this.streamId); + this.createFinalTable(this.incrementalDedupStream, ""); + this.insertRawTableRecords(this.streamId, List.of( + // records written by a sync running on the old version of snowflake + Jsons.deserialize(""" + { + "_airbyte_raw_id": "pre-dst local tz 1", + "_airbyte_extracted_at": "2023-03-10T01:59:00-08:00", + "_airbyte_data": { + "id1": 1, + "id2": 100, + "string": "Alice00" + } + } + """), + Jsons.deserialize(""" + { + "_airbyte_raw_id": "pre-dst local tz 2", + "_airbyte_extracted_at": "2023-03-10T01:59:00-08:00", + "_airbyte_data": { + "id1": 2, + "id2": 100, + "string": "Bob00" + } + } + """))); + + // Gather initial state at the start of our first new sync + DestinationInitialStatus initialState = this.destinationHandler.gatherInitialState(List.of(this.incrementalDedupStream)).getFirst(); + this.insertRawTableRecords(this.streamId, List.of( + // update the records + Jsons.deserialize(""" + { + "_airbyte_raw_id": "pre-dst utc 1", + "_airbyte_extracted_at": "2023-03-10T02:00:00Z", + "_airbyte_data": { + "id1": 1, + "id2": 100, + "string": "Alice01" + } + } + """), + Jsons.deserialize(""" + { + "_airbyte_raw_id": "pre-dst utc 2", + "_airbyte_extracted_at": "2023-03-10T02:00:00Z", + "_airbyte_data": { + "id1": 2, + "id2": 100, + "string": "Bob01" + } + } + """))); + + TypeAndDedupeTransaction.executeTypeAndDedupe(this.generator, this.destinationHandler, this.incrementalDedupStream, initialState.initialRawTableStatus().maxProcessedTimestamp(), ""); + + DIFFER.diffFinalTableRecords( + List.of( + Jsons.deserialize(""" + { + "_AIRBYTE_RAW_ID": "pre-dst utc 1", + "_AIRBYTE_EXTRACTED_AT": "2023-03-10T02:00:00Z", + "ID1": 1, + "ID2": 100, + "STRING": "Alice01" + } + """), + Jsons.deserialize(""" + { + "_AIRBYTE_RAW_ID": "pre-dst utc 2", + "_AIRBYTE_EXTRACTED_AT": "2023-03-10T02:00:00Z", + "ID1": 2, + "ID2": 100, + "STRING": "Bob01" + } + """)), + this.dumpFinalTableRecords(this.streamId, "")); + + // Gather initial state at the start of our second new sync + DestinationInitialStatus initialState2 = this.destinationHandler.gatherInitialState(List.of(this.incrementalDedupStream)).getFirst(); + this.insertRawTableRecords(this.streamId, List.of( + // update the records again + Jsons.deserialize(""" + { + "_airbyte_raw_id": "post-dst utc 1", + "_airbyte_extracted_at": "2023-03-10T02:01:00Z", + "_airbyte_data": { + "id1": 1, + "id2": 100, + "string": "Alice02" + } + } + """), + Jsons.deserialize(""" + { + "_airbyte_raw_id": "post-dst utc 2", + "_airbyte_extracted_at": "2023-03-10T02:01:00Z", + "_airbyte_data": { + "id1": 2, + "id2": 100, + "string": "Bob02" + } + } + """))); + + TypeAndDedupeTransaction.executeTypeAndDedupe(this.generator, this.destinationHandler, this.incrementalDedupStream, initialState2.initialRawTableStatus().maxProcessedTimestamp(), ""); + + DIFFER.diffFinalTableRecords( + List.of( + Jsons.deserialize(""" + { + "_AIRBYTE_RAW_ID": "post-dst utc 1", + "_AIRBYTE_EXTRACTED_AT": "2023-03-10T02:01:00Z", + "ID1": 1, + "ID2": 100, + "STRING": "Alice02" + } + """), + Jsons.deserialize(""" + { + "_AIRBYTE_RAW_ID": "post-dst utc 2", + "_AIRBYTE_EXTRACTED_AT": "2023-03-10T02:01:00Z", + "ID1": 2, + "ID2": 100, + "STRING": "Bob02" + } + """)), + this.dumpFinalTableRecords(this.streamId, "")); + } + } From f4657b179191d0423f78411549255404f9cae105 Mon Sep 17 00:00:00 2001 From: Edward Gao Date: Tue, 5 Mar 2024 10:55:14 -0800 Subject: [PATCH 16/33] 2024 >.> --- .../SnowflakeSqlGeneratorIntegrationTest.java | 40 +++++++++---------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/airbyte-integrations/connectors/destination-snowflake/src/test-integration/java/io/airbyte/integrations/destination/snowflake/typing_deduping/SnowflakeSqlGeneratorIntegrationTest.java b/airbyte-integrations/connectors/destination-snowflake/src/test-integration/java/io/airbyte/integrations/destination/snowflake/typing_deduping/SnowflakeSqlGeneratorIntegrationTest.java index 902cb68b2a05..230e6e919f8e 100644 --- a/airbyte-integrations/connectors/destination-snowflake/src/test-integration/java/io/airbyte/integrations/destination/snowflake/typing_deduping/SnowflakeSqlGeneratorIntegrationTest.java +++ b/airbyte-integrations/connectors/destination-snowflake/src/test-integration/java/io/airbyte/integrations/destination/snowflake/typing_deduping/SnowflakeSqlGeneratorIntegrationTest.java @@ -444,7 +444,7 @@ public void dst_test_oldSyncRunsThroughTransition_thenNewSyncRuns() throws Excep Jsons.deserialize(""" { "_airbyte_raw_id": "pre-dst local tz 1", - "_airbyte_extracted_at": "2023-03-10T02:00:00-08:00", + "_airbyte_extracted_at": "2024-03-10T02:00:00-08:00", "_airbyte_data": { "id1": 1, "id2": 100, @@ -455,7 +455,7 @@ public void dst_test_oldSyncRunsThroughTransition_thenNewSyncRuns() throws Excep Jsons.deserialize(""" { "_airbyte_raw_id": "post-dst local tz 2", - "_airbyte_extracted_at": "2023-03-10T02:01:00-07:00", + "_airbyte_extracted_at": "2024-03-10T02:01:00-07:00", "_airbyte_data": { "id1": 2, "id2": 100, @@ -470,7 +470,7 @@ public void dst_test_oldSyncRunsThroughTransition_thenNewSyncRuns() throws Excep Jsons.deserialize(""" { "_airbyte_raw_id": "post-dst utc 1", - "_airbyte_extracted_at": "2023-03-10T02:02:00Z", + "_airbyte_extracted_at": "2024-03-10T02:02:00Z", "_airbyte_data": { "id1": 1, "id2": 100, @@ -481,7 +481,7 @@ public void dst_test_oldSyncRunsThroughTransition_thenNewSyncRuns() throws Excep Jsons.deserialize(""" { "_airbyte_raw_id": "post-dst utc 2", - "_airbyte_extracted_at": "2023-03-10T02:02:00Z", + "_airbyte_extracted_at": "2024-03-10T02:02:00Z", "_airbyte_data": { "id1": 2, "id2": 100, @@ -497,7 +497,7 @@ public void dst_test_oldSyncRunsThroughTransition_thenNewSyncRuns() throws Excep Jsons.deserialize(""" { "_AIRBYTE_RAW_ID": "post-dst utc 1", - "_AIRBYTE_EXTRACTED_AT": "2023-03-10T02:02:00Z", + "_AIRBYTE_EXTRACTED_AT": "2024-03-10T02:02:00Z", "ID1": 1, "ID2": 100, "STRING": "Alice01" @@ -506,7 +506,7 @@ public void dst_test_oldSyncRunsThroughTransition_thenNewSyncRuns() throws Excep Jsons.deserialize(""" { "_AIRBYTE_RAW_ID": "post-dst utc 2", - "_AIRBYTE_EXTRACTED_AT": "2023-03-10T02:02:00Z", + "_AIRBYTE_EXTRACTED_AT": "2024-03-10T02:02:00Z", "ID1": 2, "ID2": 100, "STRING": "Bob01" @@ -524,7 +524,7 @@ public void dst_test_oldSyncRunsBeforeTransition_thenNewSyncRunsThroughTransitio Jsons.deserialize(""" { "_airbyte_raw_id": "pre-dst local tz 1", - "_airbyte_extracted_at": "2023-03-10T01:59:00-08:00", + "_airbyte_extracted_at": "2024-03-10T01:59:00-08:00", "_airbyte_data": { "id1": 1, "id2": 100, @@ -540,7 +540,7 @@ public void dst_test_oldSyncRunsBeforeTransition_thenNewSyncRunsThroughTransitio Jsons.deserialize(""" { "_airbyte_raw_id": "pre-dst utc 1", - "_airbyte_extracted_at": "2023-03-10T02:00:00Z", + "_airbyte_extracted_at": "2024-03-10T02:00:00Z", "_airbyte_data": { "id1": 1, "id2": 100, @@ -551,7 +551,7 @@ public void dst_test_oldSyncRunsBeforeTransition_thenNewSyncRunsThroughTransitio Jsons.deserialize(""" { "_airbyte_raw_id": "post-dst utc 1", - "_airbyte_extracted_at": "2023-03-10T02:01:00Z", + "_airbyte_extracted_at": "2024-03-10T02:01:00Z", "_airbyte_data": { "id1": 1, "id2": 100, @@ -567,7 +567,7 @@ public void dst_test_oldSyncRunsBeforeTransition_thenNewSyncRunsThroughTransitio Jsons.deserialize(""" { "_AIRBYTE_RAW_ID": "post-dst utc 1", - "_AIRBYTE_EXTRACTED_AT": "2023-03-10T02:01:00Z", + "_AIRBYTE_EXTRACTED_AT": "2024-03-10T02:01:00Z", "ID1": 1, "ID2": 100, "STRING": "Alice02" @@ -585,7 +585,7 @@ public void dst_test_oldSyncRunsBeforeTransition_thenNewSyncRunsBeforeTransition Jsons.deserialize(""" { "_airbyte_raw_id": "pre-dst local tz 1", - "_airbyte_extracted_at": "2023-03-10T01:59:00-08:00", + "_airbyte_extracted_at": "2024-03-10T01:59:00-08:00", "_airbyte_data": { "id1": 1, "id2": 100, @@ -596,7 +596,7 @@ public void dst_test_oldSyncRunsBeforeTransition_thenNewSyncRunsBeforeTransition Jsons.deserialize(""" { "_airbyte_raw_id": "pre-dst local tz 2", - "_airbyte_extracted_at": "2023-03-10T01:59:00-08:00", + "_airbyte_extracted_at": "2024-03-10T01:59:00-08:00", "_airbyte_data": { "id1": 2, "id2": 100, @@ -612,7 +612,7 @@ public void dst_test_oldSyncRunsBeforeTransition_thenNewSyncRunsBeforeTransition Jsons.deserialize(""" { "_airbyte_raw_id": "pre-dst utc 1", - "_airbyte_extracted_at": "2023-03-10T02:00:00Z", + "_airbyte_extracted_at": "2024-03-10T02:00:00Z", "_airbyte_data": { "id1": 1, "id2": 100, @@ -623,7 +623,7 @@ public void dst_test_oldSyncRunsBeforeTransition_thenNewSyncRunsBeforeTransition Jsons.deserialize(""" { "_airbyte_raw_id": "pre-dst utc 2", - "_airbyte_extracted_at": "2023-03-10T02:00:00Z", + "_airbyte_extracted_at": "2024-03-10T02:00:00Z", "_airbyte_data": { "id1": 2, "id2": 100, @@ -639,7 +639,7 @@ public void dst_test_oldSyncRunsBeforeTransition_thenNewSyncRunsBeforeTransition Jsons.deserialize(""" { "_AIRBYTE_RAW_ID": "pre-dst utc 1", - "_AIRBYTE_EXTRACTED_AT": "2023-03-10T02:00:00Z", + "_AIRBYTE_EXTRACTED_AT": "2024-03-10T02:00:00Z", "ID1": 1, "ID2": 100, "STRING": "Alice01" @@ -648,7 +648,7 @@ public void dst_test_oldSyncRunsBeforeTransition_thenNewSyncRunsBeforeTransition Jsons.deserialize(""" { "_AIRBYTE_RAW_ID": "pre-dst utc 2", - "_AIRBYTE_EXTRACTED_AT": "2023-03-10T02:00:00Z", + "_AIRBYTE_EXTRACTED_AT": "2024-03-10T02:00:00Z", "ID1": 2, "ID2": 100, "STRING": "Bob01" @@ -663,7 +663,7 @@ public void dst_test_oldSyncRunsBeforeTransition_thenNewSyncRunsBeforeTransition Jsons.deserialize(""" { "_airbyte_raw_id": "post-dst utc 1", - "_airbyte_extracted_at": "2023-03-10T02:01:00Z", + "_airbyte_extracted_at": "2024-03-10T02:01:00Z", "_airbyte_data": { "id1": 1, "id2": 100, @@ -674,7 +674,7 @@ public void dst_test_oldSyncRunsBeforeTransition_thenNewSyncRunsBeforeTransition Jsons.deserialize(""" { "_airbyte_raw_id": "post-dst utc 2", - "_airbyte_extracted_at": "2023-03-10T02:01:00Z", + "_airbyte_extracted_at": "2024-03-10T02:01:00Z", "_airbyte_data": { "id1": 2, "id2": 100, @@ -690,7 +690,7 @@ public void dst_test_oldSyncRunsBeforeTransition_thenNewSyncRunsBeforeTransition Jsons.deserialize(""" { "_AIRBYTE_RAW_ID": "post-dst utc 1", - "_AIRBYTE_EXTRACTED_AT": "2023-03-10T02:01:00Z", + "_AIRBYTE_EXTRACTED_AT": "2024-03-10T02:01:00Z", "ID1": 1, "ID2": 100, "STRING": "Alice02" @@ -699,7 +699,7 @@ public void dst_test_oldSyncRunsBeforeTransition_thenNewSyncRunsBeforeTransition Jsons.deserialize(""" { "_AIRBYTE_RAW_ID": "post-dst utc 2", - "_AIRBYTE_EXTRACTED_AT": "2023-03-10T02:01:00Z", + "_AIRBYTE_EXTRACTED_AT": "2024-03-10T02:01:00Z", "ID1": 2, "ID2": 100, "STRING": "Bob02" From 08b77ee84866cf25117f98ccfa28aa12ca824640 Mon Sep 17 00:00:00 2001 From: Edward Gao Date: Tue, 5 Mar 2024 11:21:18 -0800 Subject: [PATCH 17/33] add test for existing local tz final table records --- .../SnowflakeSqlGeneratorIntegrationTest.java | 85 +++++++++++++++++++ 1 file changed, 85 insertions(+) diff --git a/airbyte-integrations/connectors/destination-snowflake/src/test-integration/java/io/airbyte/integrations/destination/snowflake/typing_deduping/SnowflakeSqlGeneratorIntegrationTest.java b/airbyte-integrations/connectors/destination-snowflake/src/test-integration/java/io/airbyte/integrations/destination/snowflake/typing_deduping/SnowflakeSqlGeneratorIntegrationTest.java index 230e6e919f8e..0e791e3573ae 100644 --- a/airbyte-integrations/connectors/destination-snowflake/src/test-integration/java/io/airbyte/integrations/destination/snowflake/typing_deduping/SnowflakeSqlGeneratorIntegrationTest.java +++ b/airbyte-integrations/connectors/destination-snowflake/src/test-integration/java/io/airbyte/integrations/destination/snowflake/typing_deduping/SnowflakeSqlGeneratorIntegrationTest.java @@ -462,7 +462,52 @@ public void dst_test_oldSyncRunsThroughTransition_thenNewSyncRuns() throws Excep "string": "Bob00" } } + """), + // and 2 records that got successfully loaded. + Jsons.deserialize(""" + { + "_airbyte_raw_id": "pre-dst local tz 3", + "_airbyte_extracted_at": "2024-03-10T02:00:00-08:00", + "_airbyte_loaded_at": "1970-01-01T00:00:00Z", + "_airbyte_data": { + "id1": 3, + "id2": 100, + "string": "Charlie00" + } + } + """), + Jsons.deserialize(""" + { + "_airbyte_raw_id": "post-dst local tz 4", + "_airbyte_extracted_at": "2024-03-10T02:01:00-07:00", + "_airbyte_loaded_at": "1970-01-01T00:00:00Z", + "_airbyte_data": { + "id1": 4, + "id2": 100, + "string": "Dave00" + } + } """))); + this.insertFinalTableRecords(false, this.streamId, "", List.of( + Jsons.deserialize(""" + { + "_AIRBYTE_RAW_ID": "pre-dst local tz 3", + "_AIRBYTE_EXTRACTED_AT": "2024-03-10T02:00:00-08:00", + "ID1": 3, + "ID2": 100, + "STRING": "Charlie00" + } + """), + Jsons.deserialize(""" + { + "_AIRBYTE_RAW_ID": "post-dst local tz 4", + "_AIRBYTE_EXTRACTED_AT": "2024-03-10T02:01:00-07:00", + "ID1": 4, + "ID2": 100, + "STRING": "Dave00" + } + """) + )); // Gather initial state at the start of our updated sync DestinationInitialStatus initialState = this.destinationHandler.gatherInitialState(List.of(this.incrementalDedupStream)).getFirst(); this.insertRawTableRecords(this.streamId, List.of( @@ -488,6 +533,28 @@ public void dst_test_oldSyncRunsThroughTransition_thenNewSyncRuns() throws Excep "string": "Bob01" } } + """), + Jsons.deserialize(""" + { + "_airbyte_raw_id": "post-dst utc 3", + "_airbyte_extracted_at": "2024-03-10T02:02:00Z", + "_airbyte_data": { + "id1": 3, + "id2": 100, + "string": "Charlie01" + } + } + """), + Jsons.deserialize(""" + { + "_airbyte_raw_id": "post-dst utc 4", + "_airbyte_extracted_at": "2024-03-10T02:02:00Z", + "_airbyte_data": { + "id1": 4, + "id2": 100, + "string": "Dave01" + } + } """))); TypeAndDedupeTransaction.executeTypeAndDedupe(this.generator, this.destinationHandler, this.incrementalDedupStream, initialState.initialRawTableStatus().maxProcessedTimestamp(), ""); @@ -511,6 +578,24 @@ public void dst_test_oldSyncRunsThroughTransition_thenNewSyncRuns() throws Excep "ID2": 100, "STRING": "Bob01" } + """), + Jsons.deserialize(""" + { + "_AIRBYTE_RAW_ID": "post-dst utc 3", + "_AIRBYTE_EXTRACTED_AT": "2024-03-10T02:02:00Z", + "ID1": 3, + "ID2": 100, + "STRING": "Charlie01" + } + """), + Jsons.deserialize(""" + { + "_AIRBYTE_RAW_ID": "post-dst utc 4", + "_AIRBYTE_EXTRACTED_AT": "2024-03-10T02:02:00Z", + "ID1": 4, + "ID2": 100, + "STRING": "Dave01" + } """)), this.dumpFinalTableRecords(this.streamId, "")); } From 8bb18035bf1c0d9d324ca9db5bc435c54e63f942 Mon Sep 17 00:00:00 2001 From: Edward Gao Date: Tue, 5 Mar 2024 11:23:12 -0800 Subject: [PATCH 18/33] delete migration --- .../SnowflakeInternalStagingDestination.java | 3 +- .../SnowflakeDestinationHandler.java | 3 +- .../ExtractedAtUtcTimezoneMigration.kt | 155 ------------------ .../migrations/SnowflakeState.kt | 2 +- 4 files changed, 3 insertions(+), 160 deletions(-) delete mode 100644 airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/typing_deduping/migrations/ExtractedAtUtcTimezoneMigration.kt diff --git a/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/SnowflakeInternalStagingDestination.java b/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/SnowflakeInternalStagingDestination.java index 63856ce600c7..29eb9175e988 100644 --- a/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/SnowflakeInternalStagingDestination.java +++ b/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/SnowflakeInternalStagingDestination.java @@ -29,7 +29,6 @@ import io.airbyte.integrations.destination.snowflake.typing_deduping.SnowflakeSqlGenerator; import io.airbyte.integrations.destination.snowflake.typing_deduping.SnowflakeV1V2Migrator; import io.airbyte.integrations.destination.snowflake.typing_deduping.SnowflakeV2TableMigrator; -import io.airbyte.integrations.destination.snowflake.typing_deduping.migrations.ExtractedAtUtcTimezoneMigration; import io.airbyte.integrations.destination.snowflake.typing_deduping.migrations.SnowflakeState; import io.airbyte.protocol.models.v0.AirbyteConnectionStatus; import io.airbyte.protocol.models.v0.AirbyteMessage; @@ -170,7 +169,7 @@ public SerializedAirbyteMessageConsumer getSerializedMessageConsumer(final JsonN final SnowflakeV1V2Migrator migrator = new SnowflakeV1V2Migrator(getNamingResolver(), database, databaseName); final SnowflakeV2TableMigrator v2TableMigrator = new SnowflakeV2TableMigrator(database, databaseName, sqlGenerator, snowflakeDestinationHandler); final boolean disableTypeDedupe = config.has(DISABLE_TYPE_DEDUPE) && config.get(DISABLE_TYPE_DEDUPE).asBoolean(false); - final List> migrations = List.of(new ExtractedAtUtcTimezoneMigration(database)); + final List> migrations = List.of(); if (disableTypeDedupe) { typerDeduper = new NoOpTyperDeduperWithV1V2Migrations<>(sqlGenerator, snowflakeDestinationHandler, parsedCatalog, migrator, v2TableMigrator, migrations); diff --git a/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/typing_deduping/SnowflakeDestinationHandler.java b/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/typing_deduping/SnowflakeDestinationHandler.java index 48b925c0889c..1275e7d57dcf 100644 --- a/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/typing_deduping/SnowflakeDestinationHandler.java +++ b/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/typing_deduping/SnowflakeDestinationHandler.java @@ -312,8 +312,7 @@ protected String toJdbcTypeName(AirbyteType airbyteType) { @Override protected SnowflakeState toDestinationState(JsonNode json) { return new SnowflakeState( - json.hasNonNull("needsSoftReset") && json.get("needsSoftReset").asBoolean(), - json.hasNonNull("extractedAtInUtc") && json.get("extractedAtInUtc").asBoolean()); + json.hasNonNull("needsSoftReset") && json.get("needsSoftReset").asBoolean()); } private String toJdbcTypeName(final AirbyteProtocolType airbyteProtocolType) { diff --git a/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/typing_deduping/migrations/ExtractedAtUtcTimezoneMigration.kt b/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/typing_deduping/migrations/ExtractedAtUtcTimezoneMigration.kt deleted file mode 100644 index a7b7adf468cf..000000000000 --- a/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/typing_deduping/migrations/ExtractedAtUtcTimezoneMigration.kt +++ /dev/null @@ -1,155 +0,0 @@ -/* - * Copyright (c) 2024 Airbyte, Inc., all rights reserved. - */ - -package io.airbyte.integrations.destination.snowflake.typing_deduping.migrations - -import com.fasterxml.jackson.databind.JsonNode -import com.fasterxml.jackson.databind.node.ObjectNode -import io.airbyte.cdk.db.jdbc.JdbcDatabase -import io.airbyte.cdk.integrations.base.JavaBaseConstants -import io.airbyte.commons.json.Jsons -import io.airbyte.integrations.base.destination.typing_deduping.* -import io.airbyte.integrations.base.destination.typing_deduping.migrators.Migration -import io.airbyte.protocol.models.v0.DestinationSyncMode -import org.jooq.conf.ParamType -import org.jooq.impl.DSL.* -import org.slf4j.Logger -import org.slf4j.LoggerFactory - -/* -Slightly sketchy to accept a JdbcDatabase here. Migrations should generally prefer to use the -DestinationHandler argument in [migrateIfNecessary] to execute SQL, and DestinationInitialState for -deciding whether a migration is necessary. However, in this case, we need to actually query for data -in the raw table. There's no performance win to doing this via DestinationHandler.gatherInitialState, -since we need to query each table separately anyway. So we just take the database here. However, we -_do_ still use destinationHandler.execute, because that gives us debug SQL logs for when we run the -actual migration. -*/ -class ExtractedAtUtcTimezoneMigration(private val database: JdbcDatabase) : - Migration { - private val logger: Logger = - LoggerFactory.getLogger(ExtractedAtUtcTimezoneMigration::class.java) - - override fun migrateIfNecessary( - destinationHandler: DestinationHandler, - stream: StreamConfig, - state: DestinationInitialStatus - ): Migration.MigrationResult { - if (!state.initialRawTableStatus.rawTableExists) { - // The raw table doesn't exist. No migration necessary. Update the state. - logger.info( - "Skipping ExtractedAtUtcTimezoneMigration for ${stream.id.originalNamespace}.${stream.id.originalName} because the raw table doesn't exist" - ) - return Migration.MigrationResult( - state.destinationState.copy(extractedAtInUtc = true), - false - ) - } - if (stream.destinationSyncMode == DestinationSyncMode.OVERWRITE) { - // We're nuking the data for this stream. No migration necessary. Update the state. - logger.info( - "Skipping ExtractedAtUtcTimezoneMigration for ${stream.id.originalNamespace}.${stream.id.originalName} because the sync mode is OVERWRITE." - ) - return Migration.MigrationResult( - state.destinationState.copy(extractedAtInUtc = true), - false - ) - } - if (state.destinationState.extractedAtInUtc) { - logger.info( - "Skipping ExtractedAtUtcTimezoneMigration for ${stream.id.originalNamespace}.${stream.id.originalName}, already done." - ) - return Migration.MigrationResult(state.destinationState, false) - } - val rawRecordTimezone: JsonNode? = - database - .queryJsons( - { connection -> - connection.prepareStatement( - select( - field( - sql( - "extract(timezone_hour from \"_airbyte_extracted_at\")" - ) - ) - .`as`("tzh"), - field( - sql( - "extract(timezone_minute from \"_airbyte_extracted_at\")" - ) - ) - .`as`("tzm") - ) - .from( - table(quotedName(stream.id().rawNamespace, stream.id().rawName)) - ) - .limit(1) - .getSQL(ParamType.INLINED) - ) - }, - { rs -> - (Jsons.emptyObject() as ObjectNode) - .put("tzh", rs.getInt("tzh")) - .put("tzm", rs.getInt("tzm")) - } - ) - .first() - if ( - rawRecordTimezone == null || - (rawRecordTimezone.get("tzh").intValue() == 0 && - rawRecordTimezone.get("tzm").intValue() == 0) - ) { - // There are no raw records, or the raw records are already in UTC. No migration - // necessary. Update the state. - logger.info( - "Skipping ExtractedAtUtcTimezoneMigration for ${stream.id.originalNamespace}.${stream.id.originalName} because the raw table doesn't contain records needing migration." - ) - return Migration.MigrationResult( - state.destinationState.copy(extractedAtInUtc = true), - false - ) - } - - logger.info( - "Executing ExtractedAtUtcTimezoneMigration for ${stream.id.originalNamespace}.${stream.id.originalName} for real." - ) - - destinationHandler.execute( - Sql.of( - update(table(quotedName(stream.id().rawNamespace, stream.id().rawName))) - .set( - field(quotedName(JavaBaseConstants.COLUMN_NAME_AB_EXTRACTED_AT)), - // this is the easiest way to forcibly set the offset on a timestamptz. - // We convert to timestamp_ntz to remove the offset, - // then convert to string and append a 'Z' offset, - // then convert back to timestamp_tz. - // We _could_ go through convert_timezone and manually add a negative offset - // number of hours - // but that's a lot more work for no real benefit. - field( - sql( - """ - to_timestamp_tz( - to_varchar( - cast("_airbyte_extracted_at" as timestampntz), - 'YYYY-MM-DDTHH24:MI:SS.FF9') - || 'Z', 'YYYY-MM-DDTHH24:MI:SS.FF9TZH:TZM') - """.trimIndent() - ) - ) - as Any - ) - .getSQL(ParamType.INLINED) - ) - ) - - // We've executed the migration. Update the state and trigger a soft reset. - // Invalidate the initial state - we've modified all the extracted_at timestamps, so need to - // refetch them. - return Migration.MigrationResult( - state.destinationState.copy(needsSoftReset = true, extractedAtInUtc = true), - true - ) - } -} diff --git a/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/typing_deduping/migrations/SnowflakeState.kt b/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/typing_deduping/migrations/SnowflakeState.kt index f415b4afba50..f0b8ab35aa31 100644 --- a/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/typing_deduping/migrations/SnowflakeState.kt +++ b/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/typing_deduping/migrations/SnowflakeState.kt @@ -8,7 +8,7 @@ import io.airbyte.integrations.base.destination.typing_deduping.migrators.Minimu // Note the nonnullable fields. Even though the underlying storage medium (a JSON blob) supports // nullability, we don't want to deal with that in our codebase. -data class SnowflakeState(val needsSoftReset: Boolean, val extractedAtInUtc: Boolean) : +data class SnowflakeState(val needsSoftReset: Boolean) : MinimumDestinationState { override fun needsSoftReset(): Boolean { return needsSoftReset From 1fece83ca7fa21c8eadcf4bc6b80162fc910dcae Mon Sep 17 00:00:00 2001 From: Gireesh Sreepathi Date: Tue, 5 Mar 2024 11:25:58 -0800 Subject: [PATCH 19/33] tsadd dance + fmt --- .../SnowflakeDestinationHandler.java | 47 +- .../SnowflakeSqlGenerator.java | 27 +- .../SnowflakeSqlGeneratorIntegrationTest.java | 553 +++++++++--------- 3 files changed, 334 insertions(+), 293 deletions(-) diff --git a/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/typing_deduping/SnowflakeDestinationHandler.java b/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/typing_deduping/SnowflakeDestinationHandler.java index 1275e7d57dcf..335b5e5f069d 100644 --- a/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/typing_deduping/SnowflakeDestinationHandler.java +++ b/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/typing_deduping/SnowflakeDestinationHandler.java @@ -146,15 +146,23 @@ private InitialRawTableStatus getInitialRawTableState(final StreamId id, final D conn -> conn.createStatement().executeQuery(new StringSubstitutor(Map.of( "raw_table", id.rawTableId(SnowflakeSqlGenerator.QUOTE))).replace( """ - SELECT to_varchar( - TIMESTAMPADD(NANOSECOND, -1, MIN("_airbyte_extracted_at")), - 'YYYY-MM-DDTHH24:MI:SS.FF9TZH:TZM' - ) AS MIN_TIMESTAMP - FROM ${raw_table} - WHERE "_airbyte_loaded_at" IS NULL + WITH MIN_TS AS ( + SELECT TIMESTAMPADD(NANOSECOND, -1, MIN("_airbyte_extracted_at")) AS MIN_TIMESTAMP + FROM ${raw_table} + WHERE "_airbyte_loaded_at" IS NULL + ) SELECT TO_VARCHAR( + TIMESTAMPADD( + HOUR, + EXTRACT(timezone_hour from MIN_TIMESTAMP), + TIMESTAMPADD( + MINUTE, + EXTRACT(timezone_minute from MIN_TIMESTAMP), + CONVERT_TIMEZONE('UTC', MIN_TIMESTAMP) + ) + ),'YYYY-MM-DDTHH24:MI:SS.FF9TZH:TZM') as MIN_TIMESTAMP_UTC from MIN_TS; """)), // The query will always return exactly one record, so use .get(0) - record -> record.getString("MIN_TIMESTAMP")).get(0)); + record -> record.getString("MIN_TIMESTAMP_UTC")).get(0)); if (minUnloadedTimestamp.isPresent()) { return new InitialRawTableStatus(true, true, minUnloadedTimestamp.map(Instant::parse)); } @@ -165,13 +173,22 @@ record -> record.getString("MIN_TIMESTAMP")).get(0)); conn -> conn.createStatement().executeQuery(new StringSubstitutor(Map.of( "raw_table", id.rawTableId(SnowflakeSqlGenerator.QUOTE))).replace( """ - SELECT to_varchar( - MAX("_airbyte_extracted_at"), - 'YYYY-MM-DDTHH24:MI:SS.FF9TZH:TZM' - ) AS MIN_TIMESTAMP - FROM ${raw_table} + WITH MAX_TS AS ( + SELECT MAX("_airbyte_extracted_at") + AS MAX_TIMESTAMP + FROM ${raw_table} + ) SELECT TO_VARCHAR( + TIMESTAMPADD( + HOUR, + EXTRACT(timezone_hour from MAX_TIMESTAMP), + TIMESTAMPADD( + MINUTE, + EXTRACT(timezone_minute from MAX_TIMESTAMP), + CONVERT_TIMEZONE('UTC', MAX_TIMESTAMP) + ) + ),'YYYY-MM-DDTHH24:MI:SS.FF9TZH:TZM') as MAX_TIMESTAMP_UTC from MAX_TS; """)), - record -> record.getString("MIN_TIMESTAMP")).get(0)); + record -> record.getString("MAX_TIMESTAMP_UTC")).get(0)); return new InitialRawTableStatus(true, false, maxTimestamp.map(Instant::parse)); } @@ -181,7 +198,7 @@ public void execute(final Sql sql) throws Exception { final UUID queryId = UUID.randomUUID(); for (final String transaction : transactions) { final UUID transactionId = UUID.randomUUID(); - LOGGER.debug("Executing sql {}-{}: {}", queryId, transactionId, transaction); + LOGGER.info("Executing sql {}-{}: {}", queryId, transactionId, transaction); final long startTime = System.currentTimeMillis(); try { @@ -200,7 +217,7 @@ public void execute(final Sql sql) throws Exception { throw new RuntimeException(trimmedMessage, e); } - LOGGER.debug("Sql {}-{} completed in {} ms", queryId, transactionId, System.currentTimeMillis() - startTime); + LOGGER.info("Sql {}-{} completed in {} ms", queryId, transactionId, System.currentTimeMillis() - startTime); } } diff --git a/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/typing_deduping/SnowflakeSqlGenerator.java b/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/typing_deduping/SnowflakeSqlGenerator.java index 37b0bdaefff8..191b9f34cc66 100644 --- a/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/typing_deduping/SnowflakeSqlGenerator.java +++ b/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/typing_deduping/SnowflakeSqlGenerator.java @@ -227,6 +227,21 @@ WHEN TYPEOF(${expression}) != 'ARRAY' } } + private static String airbyteExtractedAtUtcForced(final String sqlExpression) { + return new StringSubstitutor(Map.of("expression", sqlExpression)).replace( + """ + TIMESTAMPADD( + HOUR, + EXTRACT(timezone_hour from ${expression}), + TIMESTAMPADD( + MINUTE, + EXTRACT(timezone_minute from ${expression}), + CONVERT_TIMEZONE('UTC', ${expression}) + ) + ) + """); + } + @VisibleForTesting String insertNewRecords(final StreamConfig stream, final String finalSuffix, @@ -297,14 +312,15 @@ AND TYPEOF("_airbyte_data":"_ab_cdc_deleted_at") NOT IN ('NULL', 'NULL_VALUE') "extractedAtCondition", extractedAtCondition, "column_list", columnList, "pk_list", pkList, - "cursor_order_clause", cursorOrderClause)).replace( + "cursor_order_clause", cursorOrderClause, + "airbyte_extracted_at_utc", airbyteExtractedAtUtcForced("\"_airbyte_extracted_at\""))).replace( """ WITH intermediate_data AS ( SELECT ${column_casts} ARRAY_CONSTRUCT_COMPACT(${column_errors}) as "_airbyte_cast_errors", "_airbyte_raw_id", - "_airbyte_extracted_at" + ${airbyte_extracted_at_utc} as "_airbyte_extracted_at" FROM ${raw_table_id} WHERE ( "_airbyte_loaded_at" IS NULL @@ -356,7 +372,7 @@ WITH intermediate_data AS ( private static String buildExtractedAtCondition(final Optional minRawTimestamp) { return minRawTimestamp - .map(ts -> " AND \"_airbyte_extracted_at\" > '" + ts + "'") + .map(ts -> " AND " + airbyteExtractedAtUtcForced("\"_airbyte_extracted_at\"") + " > '" + ts + "'") .orElse(""); } @@ -373,13 +389,14 @@ String dedupFinalTable(final StreamId id, return new StringSubstitutor(Map.of( "final_table_id", id.finalTableId(QUOTE, finalSuffix.toUpperCase()), "pk_list", pkList, - "cursor_order_clause", cursorOrderClause)).replace( + "cursor_order_clause", cursorOrderClause, + "airbyte_extracted_at_utc", airbyteExtractedAtUtcForced("\"_AIRBYTE_EXTRACTED_AT\""))).replace( """ DELETE FROM ${final_table_id} WHERE "_AIRBYTE_RAW_ID" IN ( SELECT "_AIRBYTE_RAW_ID" FROM ( SELECT "_AIRBYTE_RAW_ID", row_number() OVER ( - PARTITION BY ${pk_list} ORDER BY ${cursor_order_clause} "_AIRBYTE_EXTRACTED_AT" DESC + PARTITION BY ${pk_list} ORDER BY ${cursor_order_clause} ${airbyte_extracted_at_utc} DESC ) as row_number FROM ${final_table_id} ) WHERE row_number != 1 diff --git a/airbyte-integrations/connectors/destination-snowflake/src/test-integration/java/io/airbyte/integrations/destination/snowflake/typing_deduping/SnowflakeSqlGeneratorIntegrationTest.java b/airbyte-integrations/connectors/destination-snowflake/src/test-integration/java/io/airbyte/integrations/destination/snowflake/typing_deduping/SnowflakeSqlGeneratorIntegrationTest.java index 0e791e3573ae..91631556c57f 100644 --- a/airbyte-integrations/connectors/destination-snowflake/src/test-integration/java/io/airbyte/integrations/destination/snowflake/typing_deduping/SnowflakeSqlGeneratorIntegrationTest.java +++ b/airbyte-integrations/connectors/destination-snowflake/src/test-integration/java/io/airbyte/integrations/destination/snowflake/typing_deduping/SnowflakeSqlGeneratorIntegrationTest.java @@ -442,161 +442,162 @@ public void dst_test_oldSyncRunsThroughTransition_thenNewSyncRuns() throws Excep this.insertRawTableRecords(this.streamId, List.of( // 2 records written by a sync running on the old version of snowflake Jsons.deserialize(""" - { - "_airbyte_raw_id": "pre-dst local tz 1", - "_airbyte_extracted_at": "2024-03-10T02:00:00-08:00", - "_airbyte_data": { - "id1": 1, - "id2": 100, - "string": "Alice00" - } - } - """), + { + "_airbyte_raw_id": "pre-dst local tz 1", + "_airbyte_extracted_at": "2024-03-10T02:00:00-08:00", + "_airbyte_data": { + "id1": 1, + "id2": 100, + "string": "Alice00" + } + } + """), Jsons.deserialize(""" - { - "_airbyte_raw_id": "post-dst local tz 2", - "_airbyte_extracted_at": "2024-03-10T02:01:00-07:00", - "_airbyte_data": { - "id1": 2, - "id2": 100, - "string": "Bob00" - } - } - """), + { + "_airbyte_raw_id": "post-dst local tz 2", + "_airbyte_extracted_at": "2024-03-10T02:01:00-07:00", + "_airbyte_data": { + "id1": 2, + "id2": 100, + "string": "Bob00" + } + } + """), // and 2 records that got successfully loaded. Jsons.deserialize(""" - { - "_airbyte_raw_id": "pre-dst local tz 3", - "_airbyte_extracted_at": "2024-03-10T02:00:00-08:00", - "_airbyte_loaded_at": "1970-01-01T00:00:00Z", - "_airbyte_data": { - "id1": 3, - "id2": 100, - "string": "Charlie00" - } - } - """), + { + "_airbyte_raw_id": "pre-dst local tz 3", + "_airbyte_extracted_at": "2024-03-10T02:00:00-08:00", + "_airbyte_loaded_at": "1970-01-01T00:00:00Z", + "_airbyte_data": { + "id1": 3, + "id2": 100, + "string": "Charlie00" + } + } + """), Jsons.deserialize(""" - { - "_airbyte_raw_id": "post-dst local tz 4", - "_airbyte_extracted_at": "2024-03-10T02:01:00-07:00", - "_airbyte_loaded_at": "1970-01-01T00:00:00Z", - "_airbyte_data": { - "id1": 4, - "id2": 100, - "string": "Dave00" - } - } - """))); + { + "_airbyte_raw_id": "post-dst local tz 4", + "_airbyte_extracted_at": "2024-03-10T02:01:00-07:00", + "_airbyte_loaded_at": "1970-01-01T00:00:00Z", + "_airbyte_data": { + "id1": 4, + "id2": 100, + "string": "Dave00" + } + } + """))); this.insertFinalTableRecords(false, this.streamId, "", List.of( Jsons.deserialize(""" - { - "_AIRBYTE_RAW_ID": "pre-dst local tz 3", - "_AIRBYTE_EXTRACTED_AT": "2024-03-10T02:00:00-08:00", - "ID1": 3, - "ID2": 100, - "STRING": "Charlie00" - } - """), + { + "_AIRBYTE_RAW_ID": "pre-dst local tz 3", + "_AIRBYTE_EXTRACTED_AT": "2024-03-10T02:00:00-08:00", + "ID1": 3, + "ID2": 100, + "STRING": "Charlie00" + } + """), Jsons.deserialize(""" - { - "_AIRBYTE_RAW_ID": "post-dst local tz 4", - "_AIRBYTE_EXTRACTED_AT": "2024-03-10T02:01:00-07:00", - "ID1": 4, - "ID2": 100, - "STRING": "Dave00" - } - """) - )); + { + "_AIRBYTE_RAW_ID": "post-dst local tz 4", + "_AIRBYTE_EXTRACTED_AT": "2024-03-10T02:01:00-07:00", + "ID1": 4, + "ID2": 100, + "STRING": "Dave00" + } + """))); // Gather initial state at the start of our updated sync - DestinationInitialStatus initialState = this.destinationHandler.gatherInitialState(List.of(this.incrementalDedupStream)).getFirst(); + DestinationInitialStatus initialState = + this.destinationHandler.gatherInitialState(List.of(this.incrementalDedupStream)).getFirst(); this.insertRawTableRecords(this.streamId, List.of( // insert raw records with updates Jsons.deserialize(""" - { - "_airbyte_raw_id": "post-dst utc 1", - "_airbyte_extracted_at": "2024-03-10T02:02:00Z", - "_airbyte_data": { - "id1": 1, - "id2": 100, - "string": "Alice01" - } - } - """), + { + "_airbyte_raw_id": "post-dst utc 1", + "_airbyte_extracted_at": "2024-03-10T02:02:00Z", + "_airbyte_data": { + "id1": 1, + "id2": 100, + "string": "Alice01" + } + } + """), Jsons.deserialize(""" - { - "_airbyte_raw_id": "post-dst utc 2", - "_airbyte_extracted_at": "2024-03-10T02:02:00Z", - "_airbyte_data": { - "id1": 2, - "id2": 100, - "string": "Bob01" - } - } - """), + { + "_airbyte_raw_id": "post-dst utc 2", + "_airbyte_extracted_at": "2024-03-10T02:02:00Z", + "_airbyte_data": { + "id1": 2, + "id2": 100, + "string": "Bob01" + } + } + """), Jsons.deserialize(""" - { - "_airbyte_raw_id": "post-dst utc 3", - "_airbyte_extracted_at": "2024-03-10T02:02:00Z", - "_airbyte_data": { - "id1": 3, - "id2": 100, - "string": "Charlie01" - } - } - """), + { + "_airbyte_raw_id": "post-dst utc 3", + "_airbyte_extracted_at": "2024-03-10T02:02:00Z", + "_airbyte_data": { + "id1": 3, + "id2": 100, + "string": "Charlie01" + } + } + """), Jsons.deserialize(""" - { - "_airbyte_raw_id": "post-dst utc 4", - "_airbyte_extracted_at": "2024-03-10T02:02:00Z", - "_airbyte_data": { - "id1": 4, - "id2": 100, - "string": "Dave01" - } - } - """))); - - TypeAndDedupeTransaction.executeTypeAndDedupe(this.generator, this.destinationHandler, this.incrementalDedupStream, initialState.initialRawTableStatus().maxProcessedTimestamp(), ""); + { + "_airbyte_raw_id": "post-dst utc 4", + "_airbyte_extracted_at": "2024-03-10T02:02:00Z", + "_airbyte_data": { + "id1": 4, + "id2": 100, + "string": "Dave01" + } + } + """))); + + TypeAndDedupeTransaction.executeTypeAndDedupe(this.generator, this.destinationHandler, this.incrementalDedupStream, + initialState.initialRawTableStatus().maxProcessedTimestamp(), ""); DIFFER.diffFinalTableRecords( List.of( Jsons.deserialize(""" - { - "_AIRBYTE_RAW_ID": "post-dst utc 1", - "_AIRBYTE_EXTRACTED_AT": "2024-03-10T02:02:00Z", - "ID1": 1, - "ID2": 100, - "STRING": "Alice01" - } - """), + { + "_AIRBYTE_RAW_ID": "post-dst utc 1", + "_AIRBYTE_EXTRACTED_AT": "2024-03-10T02:02:00Z", + "ID1": 1, + "ID2": 100, + "STRING": "Alice01" + } + """), Jsons.deserialize(""" - { - "_AIRBYTE_RAW_ID": "post-dst utc 2", - "_AIRBYTE_EXTRACTED_AT": "2024-03-10T02:02:00Z", - "ID1": 2, - "ID2": 100, - "STRING": "Bob01" - } - """), + { + "_AIRBYTE_RAW_ID": "post-dst utc 2", + "_AIRBYTE_EXTRACTED_AT": "2024-03-10T02:02:00Z", + "ID1": 2, + "ID2": 100, + "STRING": "Bob01" + } + """), Jsons.deserialize(""" - { - "_AIRBYTE_RAW_ID": "post-dst utc 3", - "_AIRBYTE_EXTRACTED_AT": "2024-03-10T02:02:00Z", - "ID1": 3, - "ID2": 100, - "STRING": "Charlie01" - } - """), + { + "_AIRBYTE_RAW_ID": "post-dst utc 3", + "_AIRBYTE_EXTRACTED_AT": "2024-03-10T02:02:00Z", + "ID1": 3, + "ID2": 100, + "STRING": "Charlie01" + } + """), Jsons.deserialize(""" - { - "_AIRBYTE_RAW_ID": "post-dst utc 4", - "_AIRBYTE_EXTRACTED_AT": "2024-03-10T02:02:00Z", - "ID1": 4, - "ID2": 100, - "STRING": "Dave01" - } - """)), + { + "_AIRBYTE_RAW_ID": "post-dst utc 4", + "_AIRBYTE_EXTRACTED_AT": "2024-03-10T02:02:00Z", + "ID1": 4, + "ID2": 100, + "STRING": "Dave01" + } + """)), this.dumpFinalTableRecords(this.streamId, "")); } @@ -607,57 +608,59 @@ public void dst_test_oldSyncRunsBeforeTransition_thenNewSyncRunsThroughTransitio this.insertRawTableRecords(this.streamId, List.of( // record written by a sync running on the old version of snowflake Jsons.deserialize(""" - { - "_airbyte_raw_id": "pre-dst local tz 1", - "_airbyte_extracted_at": "2024-03-10T01:59:00-08:00", - "_airbyte_data": { - "id1": 1, - "id2": 100, - "string": "Alice00" - } - } - """))); + { + "_airbyte_raw_id": "pre-dst local tz 1", + "_airbyte_extracted_at": "2024-03-10T01:59:00-08:00", + "_airbyte_data": { + "id1": 1, + "id2": 100, + "string": "Alice00" + } + } + """))); // Gather initial state at the start of our updated sync - DestinationInitialStatus initialState = this.destinationHandler.gatherInitialState(List.of(this.incrementalDedupStream)).getFirst(); + DestinationInitialStatus initialState = + this.destinationHandler.gatherInitialState(List.of(this.incrementalDedupStream)).getFirst(); this.insertRawTableRecords(this.streamId, List.of( // update the record twice // this never really happens, but verify that it works Jsons.deserialize(""" - { - "_airbyte_raw_id": "pre-dst utc 1", - "_airbyte_extracted_at": "2024-03-10T02:00:00Z", - "_airbyte_data": { - "id1": 1, - "id2": 100, - "string": "Alice01" - } - } - """), + { + "_airbyte_raw_id": "pre-dst utc 1", + "_airbyte_extracted_at": "2024-03-10T02:00:00Z", + "_airbyte_data": { + "id1": 1, + "id2": 100, + "string": "Alice01" + } + } + """), Jsons.deserialize(""" - { - "_airbyte_raw_id": "post-dst utc 1", - "_airbyte_extracted_at": "2024-03-10T02:01:00Z", - "_airbyte_data": { - "id1": 1, - "id2": 100, - "string": "Alice02" - } - } - """))); - - TypeAndDedupeTransaction.executeTypeAndDedupe(this.generator, this.destinationHandler, this.incrementalDedupStream, initialState.initialRawTableStatus().maxProcessedTimestamp(), ""); + { + "_airbyte_raw_id": "post-dst utc 1", + "_airbyte_extracted_at": "2024-03-10T02:01:00Z", + "_airbyte_data": { + "id1": 1, + "id2": 100, + "string": "Alice02" + } + } + """))); + + TypeAndDedupeTransaction.executeTypeAndDedupe(this.generator, this.destinationHandler, this.incrementalDedupStream, + initialState.initialRawTableStatus().maxProcessedTimestamp(), ""); DIFFER.diffFinalTableRecords( List.of( Jsons.deserialize(""" - { - "_AIRBYTE_RAW_ID": "post-dst utc 1", - "_AIRBYTE_EXTRACTED_AT": "2024-03-10T02:01:00Z", - "ID1": 1, - "ID2": 100, - "STRING": "Alice02" - } - """)), + { + "_AIRBYTE_RAW_ID": "post-dst utc 1", + "_AIRBYTE_EXTRACTED_AT": "2024-03-10T02:01:00Z", + "ID1": 1, + "ID2": 100, + "STRING": "Alice02" + } + """)), this.dumpFinalTableRecords(this.streamId, "")); } @@ -668,128 +671,132 @@ public void dst_test_oldSyncRunsBeforeTransition_thenNewSyncRunsBeforeTransition this.insertRawTableRecords(this.streamId, List.of( // records written by a sync running on the old version of snowflake Jsons.deserialize(""" - { - "_airbyte_raw_id": "pre-dst local tz 1", - "_airbyte_extracted_at": "2024-03-10T01:59:00-08:00", - "_airbyte_data": { - "id1": 1, - "id2": 100, - "string": "Alice00" - } - } - """), + { + "_airbyte_raw_id": "pre-dst local tz 1", + "_airbyte_extracted_at": "2024-03-10T01:59:00-08:00", + "_airbyte_data": { + "id1": 1, + "id2": 100, + "string": "Alice00" + } + } + """), Jsons.deserialize(""" - { - "_airbyte_raw_id": "pre-dst local tz 2", - "_airbyte_extracted_at": "2024-03-10T01:59:00-08:00", - "_airbyte_data": { - "id1": 2, - "id2": 100, - "string": "Bob00" - } - } - """))); + { + "_airbyte_raw_id": "pre-dst local tz 2", + "_airbyte_extracted_at": "2024-03-10T01:59:00-08:00", + "_airbyte_data": { + "id1": 2, + "id2": 100, + "string": "Bob00" + } + } + """))); // Gather initial state at the start of our first new sync - DestinationInitialStatus initialState = this.destinationHandler.gatherInitialState(List.of(this.incrementalDedupStream)).getFirst(); + DestinationInitialStatus initialState = + this.destinationHandler.gatherInitialState(List.of(this.incrementalDedupStream)).getFirst(); this.insertRawTableRecords(this.streamId, List.of( // update the records Jsons.deserialize(""" - { - "_airbyte_raw_id": "pre-dst utc 1", - "_airbyte_extracted_at": "2024-03-10T02:00:00Z", - "_airbyte_data": { - "id1": 1, - "id2": 100, - "string": "Alice01" - } - } - """), + { + "_airbyte_raw_id": "pre-dst utc 1", + "_airbyte_extracted_at": "2024-03-10T02:00:00Z", + "_airbyte_data": { + "id1": 1, + "id2": 100, + "string": "Alice01" + } + } + """), Jsons.deserialize(""" - { - "_airbyte_raw_id": "pre-dst utc 2", - "_airbyte_extracted_at": "2024-03-10T02:00:00Z", - "_airbyte_data": { - "id1": 2, - "id2": 100, - "string": "Bob01" - } - } - """))); - - TypeAndDedupeTransaction.executeTypeAndDedupe(this.generator, this.destinationHandler, this.incrementalDedupStream, initialState.initialRawTableStatus().maxProcessedTimestamp(), ""); + { + "_airbyte_raw_id": "pre-dst utc 2", + "_airbyte_extracted_at": "2024-03-10T02:00:00Z", + "_airbyte_data": { + "id1": 2, + "id2": 100, + "string": "Bob01" + } + } + """))); + + TypeAndDedupeTransaction.executeTypeAndDedupe(this.generator, this.destinationHandler, this.incrementalDedupStream, + initialState.initialRawTableStatus().maxProcessedTimestamp(), ""); DIFFER.diffFinalTableRecords( List.of( Jsons.deserialize(""" - { - "_AIRBYTE_RAW_ID": "pre-dst utc 1", - "_AIRBYTE_EXTRACTED_AT": "2024-03-10T02:00:00Z", - "ID1": 1, - "ID2": 100, - "STRING": "Alice01" - } - """), + { + "_AIRBYTE_RAW_ID": "pre-dst utc 1", + "_AIRBYTE_EXTRACTED_AT": "2024-03-10T02:00:00Z", + "ID1": 1, + "ID2": 100, + "STRING": "Alice01" + } + """), Jsons.deserialize(""" - { - "_AIRBYTE_RAW_ID": "pre-dst utc 2", - "_AIRBYTE_EXTRACTED_AT": "2024-03-10T02:00:00Z", - "ID1": 2, - "ID2": 100, - "STRING": "Bob01" - } - """)), + { + "_AIRBYTE_RAW_ID": "pre-dst utc 2", + "_AIRBYTE_EXTRACTED_AT": "2024-03-10T02:00:00Z", + "ID1": 2, + "ID2": 100, + "STRING": "Bob01" + } + """)), this.dumpFinalTableRecords(this.streamId, "")); // Gather initial state at the start of our second new sync - DestinationInitialStatus initialState2 = this.destinationHandler.gatherInitialState(List.of(this.incrementalDedupStream)).getFirst(); + DestinationInitialStatus initialState2 = + this.destinationHandler.gatherInitialState(List.of(this.incrementalDedupStream)).getFirst(); this.insertRawTableRecords(this.streamId, List.of( // update the records again Jsons.deserialize(""" - { - "_airbyte_raw_id": "post-dst utc 1", - "_airbyte_extracted_at": "2024-03-10T02:01:00Z", - "_airbyte_data": { - "id1": 1, - "id2": 100, - "string": "Alice02" - } - } - """), + { + "_airbyte_raw_id": "post-dst utc 1", + "_airbyte_extracted_at": "2024-03-10T02:01:00Z", + "_airbyte_data": { + "id1": 1, + "id2": 100, + "string": "Alice02" + } + } + """), Jsons.deserialize(""" - { - "_airbyte_raw_id": "post-dst utc 2", - "_airbyte_extracted_at": "2024-03-10T02:01:00Z", - "_airbyte_data": { - "id1": 2, - "id2": 100, - "string": "Bob02" - } - } - """))); - - TypeAndDedupeTransaction.executeTypeAndDedupe(this.generator, this.destinationHandler, this.incrementalDedupStream, initialState2.initialRawTableStatus().maxProcessedTimestamp(), ""); + { + "_airbyte_raw_id": "post-dst utc 2", + "_airbyte_extracted_at": "2024-03-10T02:01:00Z", + "_airbyte_data": { + "id1": 2, + "id2": 100, + "string": "Bob02" + } + } + """))); + + TypeAndDedupeTransaction.executeTypeAndDedupe(this.generator, this.destinationHandler, this.incrementalDedupStream, + initialState2.initialRawTableStatus().maxProcessedTimestamp(), ""); DIFFER.diffFinalTableRecords( List.of( Jsons.deserialize(""" - { - "_AIRBYTE_RAW_ID": "post-dst utc 1", - "_AIRBYTE_EXTRACTED_AT": "2024-03-10T02:01:00Z", - "ID1": 1, - "ID2": 100, - "STRING": "Alice02" - } - """), + { + "_AIRBYTE_RAW_ID": "post-dst utc 1", + "_AIRBYTE_EXTRACTED_AT": "2024-03-10T02:01:00Z", + "ID1": 1, + "ID2": 100, + "STRING": "Alice02" + } + """), Jsons.deserialize(""" - { - "_AIRBYTE_RAW_ID": "post-dst utc 2", - "_AIRBYTE_EXTRACTED_AT": "2024-03-10T02:01:00Z", - "ID1": 2, - "ID2": 100, - "STRING": "Bob02" - } - """)), + { + "_AIRBYTE_RAW_ID": "post-dst utc 2", + "_AIRBYTE_EXTRACTED_AT": "2024-03-10T02:01:00Z", + "ID1": 2, + "ID2": 100, + "STRING": "Bob02" + } + """)), this.dumpFinalTableRecords(this.streamId, "")); } From dba28d37b855f94b5e3fad158bc134017dacdb84 Mon Sep 17 00:00:00 2001 From: Gireesh Sreepathi Date: Tue, 5 Mar 2024 12:12:08 -0800 Subject: [PATCH 20/33] test fixes --- .../SnowflakeSqlGeneratorIntegrationTest.java | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/airbyte-integrations/connectors/destination-snowflake/src/test-integration/java/io/airbyte/integrations/destination/snowflake/typing_deduping/SnowflakeSqlGeneratorIntegrationTest.java b/airbyte-integrations/connectors/destination-snowflake/src/test-integration/java/io/airbyte/integrations/destination/snowflake/typing_deduping/SnowflakeSqlGeneratorIntegrationTest.java index 91631556c57f..1403ecb77cf5 100644 --- a/airbyte-integrations/connectors/destination-snowflake/src/test-integration/java/io/airbyte/integrations/destination/snowflake/typing_deduping/SnowflakeSqlGeneratorIntegrationTest.java +++ b/airbyte-integrations/connectors/destination-snowflake/src/test-integration/java/io/airbyte/integrations/destination/snowflake/typing_deduping/SnowflakeSqlGeneratorIntegrationTest.java @@ -493,6 +493,7 @@ public void dst_test_oldSyncRunsThroughTransition_thenNewSyncRuns() throws Excep { "_AIRBYTE_RAW_ID": "pre-dst local tz 3", "_AIRBYTE_EXTRACTED_AT": "2024-03-10T02:00:00-08:00", + "_AIRBYTE_META": {"errors": []}, "ID1": 3, "ID2": 100, "STRING": "Charlie00" @@ -502,6 +503,7 @@ public void dst_test_oldSyncRunsThroughTransition_thenNewSyncRuns() throws Excep { "_AIRBYTE_RAW_ID": "post-dst local tz 4", "_AIRBYTE_EXTRACTED_AT": "2024-03-10T02:01:00-07:00", + "_AIRBYTE_META": {"errors": []}, "ID1": 4, "ID2": 100, "STRING": "Dave00" @@ -655,7 +657,8 @@ public void dst_test_oldSyncRunsBeforeTransition_thenNewSyncRunsThroughTransitio Jsons.deserialize(""" { "_AIRBYTE_RAW_ID": "post-dst utc 1", - "_AIRBYTE_EXTRACTED_AT": "2024-03-10T02:01:00Z", + "_AIRBYTE_EXTRACTED_AT": "2024-03-10T02:01:00.000000000Z", + "_AIRBYTE_META": {"errors": []}, "ID1": 1, "ID2": 100, "STRING": "Alice02" @@ -729,7 +732,8 @@ public void dst_test_oldSyncRunsBeforeTransition_thenNewSyncRunsBeforeTransition Jsons.deserialize(""" { "_AIRBYTE_RAW_ID": "pre-dst utc 1", - "_AIRBYTE_EXTRACTED_AT": "2024-03-10T02:00:00Z", + "_AIRBYTE_EXTRACTED_AT": "2024-03-10T02:00:00.000000000Z", + "_AIRBYTE_META": {"errors": []}, "ID1": 1, "ID2": 100, "STRING": "Alice01" @@ -738,7 +742,8 @@ public void dst_test_oldSyncRunsBeforeTransition_thenNewSyncRunsBeforeTransition Jsons.deserialize(""" { "_AIRBYTE_RAW_ID": "pre-dst utc 2", - "_AIRBYTE_EXTRACTED_AT": "2024-03-10T02:00:00Z", + "_AIRBYTE_EXTRACTED_AT": "2024-03-10T02:00:00.000000000Z", + "_AIRBYTE_META": {"errors": []}, "ID1": 2, "ID2": 100, "STRING": "Bob01" @@ -782,7 +787,8 @@ public void dst_test_oldSyncRunsBeforeTransition_thenNewSyncRunsBeforeTransition Jsons.deserialize(""" { "_AIRBYTE_RAW_ID": "post-dst utc 1", - "_AIRBYTE_EXTRACTED_AT": "2024-03-10T02:01:00Z", + "_AIRBYTE_EXTRACTED_AT": "2024-03-10T02:01:00.000000000Z", + "_AIRBYTE_META": {"errors": []}, "ID1": 1, "ID2": 100, "STRING": "Alice02" @@ -791,7 +797,8 @@ public void dst_test_oldSyncRunsBeforeTransition_thenNewSyncRunsBeforeTransition Jsons.deserialize(""" { "_AIRBYTE_RAW_ID": "post-dst utc 2", - "_AIRBYTE_EXTRACTED_AT": "2024-03-10T02:01:00Z", + "_AIRBYTE_EXTRACTED_AT": "2024-03-10T02:01:00.000000000Z", + "_AIRBYTE_META": {"errors": []}, "ID1": 2, "ID2": 100, "STRING": "Bob02" From 93531b2633ef52566d01ae5e957fc65acfd549a1 Mon Sep 17 00:00:00 2001 From: Edward Gao Date: Tue, 5 Mar 2024 12:35:32 -0800 Subject: [PATCH 21/33] try fixing test --- .../SnowflakeSqlGeneratorIntegrationTest.java | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/airbyte-integrations/connectors/destination-snowflake/src/test-integration/java/io/airbyte/integrations/destination/snowflake/typing_deduping/SnowflakeSqlGeneratorIntegrationTest.java b/airbyte-integrations/connectors/destination-snowflake/src/test-integration/java/io/airbyte/integrations/destination/snowflake/typing_deduping/SnowflakeSqlGeneratorIntegrationTest.java index 1403ecb77cf5..3e803acb6342 100644 --- a/airbyte-integrations/connectors/destination-snowflake/src/test-integration/java/io/airbyte/integrations/destination/snowflake/typing_deduping/SnowflakeSqlGeneratorIntegrationTest.java +++ b/airbyte-integrations/connectors/destination-snowflake/src/test-integration/java/io/airbyte/integrations/destination/snowflake/typing_deduping/SnowflakeSqlGeneratorIntegrationTest.java @@ -491,22 +491,22 @@ public void dst_test_oldSyncRunsThroughTransition_thenNewSyncRuns() throws Excep this.insertFinalTableRecords(false, this.streamId, "", List.of( Jsons.deserialize(""" { - "_AIRBYTE_RAW_ID": "pre-dst local tz 3", - "_AIRBYTE_EXTRACTED_AT": "2024-03-10T02:00:00-08:00", - "_AIRBYTE_META": {"errors": []}, - "ID1": 3, - "ID2": 100, - "STRING": "Charlie00" + "_airbyte_raw_id": "pre-dst local tz 3", + "_airbyte_extracted_at": "2024-03-10T02:00:00-08:00", + "_airbyte_meta": {"errors": []}, + "id1": 3, + "id2": 100, + "string": "Charlie00" } """), Jsons.deserialize(""" { - "_AIRBYTE_RAW_ID": "post-dst local tz 4", - "_AIRBYTE_EXTRACTED_AT": "2024-03-10T02:01:00-07:00", - "_AIRBYTE_META": {"errors": []}, - "ID1": 4, - "ID2": 100, - "STRING": "Dave00" + "_airbyte_raw_id": "post-dst local tz 4", + "_airbyte_extracted_at": "2024-03-10T02:01:00-07:00", + "_airbyte_meta": {"errors": []}, + "id1": 4, + "id2": 100, + "string": "Dave00" } """))); // Gather initial state at the start of our updated sync From c5898b4fa2d0bbf228638ea61bf98ab4266c5b0c Mon Sep 17 00:00:00 2001 From: Gireesh Sreepathi Date: Tue, 5 Mar 2024 12:35:37 -0800 Subject: [PATCH 22/33] fmt --- .../snowflake/typing_deduping/migrations/SnowflakeState.kt | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/typing_deduping/migrations/SnowflakeState.kt b/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/typing_deduping/migrations/SnowflakeState.kt index f0b8ab35aa31..d6648acb142b 100644 --- a/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/typing_deduping/migrations/SnowflakeState.kt +++ b/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/typing_deduping/migrations/SnowflakeState.kt @@ -8,8 +8,7 @@ import io.airbyte.integrations.base.destination.typing_deduping.migrators.Minimu // Note the nonnullable fields. Even though the underlying storage medium (a JSON blob) supports // nullability, we don't want to deal with that in our codebase. -data class SnowflakeState(val needsSoftReset: Boolean) : - MinimumDestinationState { +data class SnowflakeState(val needsSoftReset: Boolean) : MinimumDestinationState { override fun needsSoftReset(): Boolean { return needsSoftReset } From 575d3402cfec4acb297302a11fc6a82683f29b18 Mon Sep 17 00:00:00 2001 From: Gireesh Sreepathi Date: Tue, 5 Mar 2024 12:44:53 -0800 Subject: [PATCH 23/33] fix tests for real --- .../SnowflakeSqlGeneratorIntegrationTest.java | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/airbyte-integrations/connectors/destination-snowflake/src/test-integration/java/io/airbyte/integrations/destination/snowflake/typing_deduping/SnowflakeSqlGeneratorIntegrationTest.java b/airbyte-integrations/connectors/destination-snowflake/src/test-integration/java/io/airbyte/integrations/destination/snowflake/typing_deduping/SnowflakeSqlGeneratorIntegrationTest.java index 3e803acb6342..8c3e7cd62cbe 100644 --- a/airbyte-integrations/connectors/destination-snowflake/src/test-integration/java/io/airbyte/integrations/destination/snowflake/typing_deduping/SnowflakeSqlGeneratorIntegrationTest.java +++ b/airbyte-integrations/connectors/destination-snowflake/src/test-integration/java/io/airbyte/integrations/destination/snowflake/typing_deduping/SnowflakeSqlGeneratorIntegrationTest.java @@ -567,7 +567,8 @@ public void dst_test_oldSyncRunsThroughTransition_thenNewSyncRuns() throws Excep Jsons.deserialize(""" { "_AIRBYTE_RAW_ID": "post-dst utc 1", - "_AIRBYTE_EXTRACTED_AT": "2024-03-10T02:02:00Z", + "_AIRBYTE_EXTRACTED_AT": "2024-03-10T02:02:00.000000000Z", + "_AIRBYTE_META": {"errors": []}, "ID1": 1, "ID2": 100, "STRING": "Alice01" @@ -576,7 +577,8 @@ public void dst_test_oldSyncRunsThroughTransition_thenNewSyncRuns() throws Excep Jsons.deserialize(""" { "_AIRBYTE_RAW_ID": "post-dst utc 2", - "_AIRBYTE_EXTRACTED_AT": "2024-03-10T02:02:00Z", + "_AIRBYTE_EXTRACTED_AT": "2024-03-10T02:02:00.000000000Z", + "_AIRBYTE_META": {"errors": []}, "ID1": 2, "ID2": 100, "STRING": "Bob01" @@ -585,7 +587,8 @@ public void dst_test_oldSyncRunsThroughTransition_thenNewSyncRuns() throws Excep Jsons.deserialize(""" { "_AIRBYTE_RAW_ID": "post-dst utc 3", - "_AIRBYTE_EXTRACTED_AT": "2024-03-10T02:02:00Z", + "_AIRBYTE_EXTRACTED_AT": "2024-03-10T02:02:00.000000000Z", + "_AIRBYTE_META": {"errors": []}, "ID1": 3, "ID2": 100, "STRING": "Charlie01" @@ -594,7 +597,8 @@ public void dst_test_oldSyncRunsThroughTransition_thenNewSyncRuns() throws Excep Jsons.deserialize(""" { "_AIRBYTE_RAW_ID": "post-dst utc 4", - "_AIRBYTE_EXTRACTED_AT": "2024-03-10T02:02:00Z", + "_AIRBYTE_EXTRACTED_AT": "2024-03-10T02:02:00.000000000Z", + "_AIRBYTE_META": {"errors": []}, "ID1": 4, "ID2": 100, "STRING": "Dave01" From 839df58537b93168de6341f6a7e3b387e17810f3 Mon Sep 17 00:00:00 2001 From: Gireesh Sreepathi Date: Tue, 5 Mar 2024 13:17:04 -0800 Subject: [PATCH 24/33] fix append mode --- .../snowflake/typing_deduping/SnowflakeSqlGenerator.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/typing_deduping/SnowflakeSqlGenerator.java b/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/typing_deduping/SnowflakeSqlGenerator.java index 191b9f34cc66..ba05a9dd22cf 100644 --- a/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/typing_deduping/SnowflakeSqlGenerator.java +++ b/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/typing_deduping/SnowflakeSqlGenerator.java @@ -348,14 +348,15 @@ WITH intermediate_data AS ( "column_casts", columnCasts, "column_errors", columnErrors, "extractedAtCondition", extractedAtCondition, - "column_list", columnList)).replace( + "column_list", columnList, + "airbyte_extracted_at_utc", airbyteExtractedAtUtcForced("\"_airbyte_extracted_at\""))).replace( """ WITH intermediate_data AS ( SELECT ${column_casts} ARRAY_CONSTRUCT_COMPACT(${column_errors}) as "_airbyte_cast_errors", "_airbyte_raw_id", - "_airbyte_extracted_at" + ${airbyte_extracted_at_utc} as "_airbyte_extracted_at" FROM ${raw_table_id} WHERE "_airbyte_loaded_at" IS NULL From e3eeac732694b759b2cfc385ca3ef97f6d320389 Mon Sep 17 00:00:00 2001 From: Edward Gao Date: Tue, 5 Mar 2024 14:14:39 -0800 Subject: [PATCH 25/33] add append mode tests --- .../SnowflakeSqlGeneratorIntegrationTest.java | 517 +++++++++++++++++- 1 file changed, 512 insertions(+), 5 deletions(-) diff --git a/airbyte-integrations/connectors/destination-snowflake/src/test-integration/java/io/airbyte/integrations/destination/snowflake/typing_deduping/SnowflakeSqlGeneratorIntegrationTest.java b/airbyte-integrations/connectors/destination-snowflake/src/test-integration/java/io/airbyte/integrations/destination/snowflake/typing_deduping/SnowflakeSqlGeneratorIntegrationTest.java index 8c3e7cd62cbe..f0666918286d 100644 --- a/airbyte-integrations/connectors/destination-snowflake/src/test-integration/java/io/airbyte/integrations/destination/snowflake/typing_deduping/SnowflakeSqlGeneratorIntegrationTest.java +++ b/airbyte-integrations/connectors/destination-snowflake/src/test-integration/java/io/airbyte/integrations/destination/snowflake/typing_deduping/SnowflakeSqlGeneratorIntegrationTest.java @@ -433,10 +433,8 @@ public void ensurePKsAreIndexedUnique() throws Exception { assertTrue(initialStates.get(0).isSchemaMismatch()); } - // Note the null updated_at values. This forces us to rely on the extracted_at tiebreaker. - @Test - public void dst_test_oldSyncRunsThroughTransition_thenNewSyncRuns() throws Exception { + public void dst_test_oldSyncRunsThroughTransition_thenNewSyncRuns_dedup() throws Exception { this.createRawTable(this.streamId); this.createFinalTable(this.incrementalDedupStream, ""); this.insertRawTableRecords(this.streamId, List.of( @@ -608,7 +606,7 @@ public void dst_test_oldSyncRunsThroughTransition_thenNewSyncRuns() throws Excep } @Test - public void dst_test_oldSyncRunsBeforeTransition_thenNewSyncRunsThroughTransition() throws Exception { + public void dst_test_oldSyncRunsBeforeTransition_thenNewSyncRunsThroughTransition_dedup() throws Exception { this.createRawTable(this.streamId); this.createFinalTable(this.incrementalDedupStream, ""); this.insertRawTableRecords(this.streamId, List.of( @@ -672,7 +670,7 @@ public void dst_test_oldSyncRunsBeforeTransition_thenNewSyncRunsThroughTransitio } @Test - public void dst_test_oldSyncRunsBeforeTransition_thenNewSyncRunsBeforeTransition_thenNewSyncRunsThroughTransition() throws Exception { + public void dst_test_oldSyncRunsBeforeTransition_thenNewSyncRunsBeforeTransition_thenNewSyncRunsThroughTransition_dedup() throws Exception { this.createRawTable(this.streamId); this.createFinalTable(this.incrementalDedupStream, ""); this.insertRawTableRecords(this.streamId, List.of( @@ -811,4 +809,513 @@ public void dst_test_oldSyncRunsBeforeTransition_thenNewSyncRunsBeforeTransition this.dumpFinalTableRecords(this.streamId, "")); } + @Test + public void dst_test_oldSyncRunsThroughTransition_thenNewSyncRuns_append() throws Exception { + this.createRawTable(this.streamId); + this.createFinalTable(this.incrementalAppendStream, ""); + this.insertRawTableRecords(this.streamId, List.of( + // 2 records written by a sync running on the old version of snowflake + Jsons.deserialize(""" + { + "_airbyte_raw_id": "pre-dst local tz 1", + "_airbyte_extracted_at": "2024-03-10T02:00:00-08:00", + "_airbyte_data": { + "id1": 1, + "id2": 100, + "string": "Alice00" + } + } + """), + Jsons.deserialize(""" + { + "_airbyte_raw_id": "post-dst local tz 2", + "_airbyte_extracted_at": "2024-03-10T02:01:00-07:00", + "_airbyte_data": { + "id1": 2, + "id2": 100, + "string": "Bob00" + } + } + """), + // and 2 records that got successfully loaded with local TZ. + Jsons.deserialize(""" + { + "_airbyte_raw_id": "pre-dst local tz 3", + "_airbyte_extracted_at": "2024-03-10T02:00:00-08:00", + "_airbyte_loaded_at": "1970-01-01T00:00:00Z", + "_airbyte_data": { + "id1": 3, + "id2": 100, + "string": "Charlie00" + } + } + """), + Jsons.deserialize(""" + { + "_airbyte_raw_id": "post-dst local tz 4", + "_airbyte_extracted_at": "2024-03-10T02:01:00-07:00", + "_airbyte_loaded_at": "1970-01-01T00:00:00Z", + "_airbyte_data": { + "id1": 4, + "id2": 100, + "string": "Dave00" + } + } + """))); + this.insertFinalTableRecords(false, this.streamId, "", List.of( + Jsons.deserialize(""" + { + "_airbyte_raw_id": "pre-dst local tz 3", + "_airbyte_extracted_at": "2024-03-10T02:00:00-08:00", + "_airbyte_meta": {"errors": []}, + "id1": 3, + "id2": 100, + "string": "Charlie00" + } + """), + Jsons.deserialize(""" + { + "_airbyte_raw_id": "post-dst local tz 4", + "_airbyte_extracted_at": "2024-03-10T02:01:00-07:00", + "_airbyte_meta": {"errors": []}, + "id1": 4, + "id2": 100, + "string": "Dave00" + } + """))); + // Gather initial state at the start of our updated sync + DestinationInitialStatus initialState = + this.destinationHandler.gatherInitialState(List.of(this.incrementalAppendStream)).getFirst(); + this.insertRawTableRecords(this.streamId, List.of( + // insert raw records with updates + Jsons.deserialize(""" + { + "_airbyte_raw_id": "post-dst utc 1", + "_airbyte_extracted_at": "2024-03-10T02:02:00Z", + "_airbyte_data": { + "id1": 1, + "id2": 100, + "string": "Alice01" + } + } + """), + Jsons.deserialize(""" + { + "_airbyte_raw_id": "post-dst utc 2", + "_airbyte_extracted_at": "2024-03-10T02:02:00Z", + "_airbyte_data": { + "id1": 2, + "id2": 100, + "string": "Bob01" + } + } + """), + Jsons.deserialize(""" + { + "_airbyte_raw_id": "post-dst utc 3", + "_airbyte_extracted_at": "2024-03-10T02:02:00Z", + "_airbyte_data": { + "id1": 3, + "id2": 100, + "string": "Charlie01" + } + } + """), + Jsons.deserialize(""" + { + "_airbyte_raw_id": "post-dst utc 4", + "_airbyte_extracted_at": "2024-03-10T02:02:00Z", + "_airbyte_data": { + "id1": 4, + "id2": 100, + "string": "Dave01" + } + } + """))); + + TypeAndDedupeTransaction.executeTypeAndDedupe(this.generator, this.destinationHandler, this.incrementalAppendStream, + initialState.initialRawTableStatus().maxProcessedTimestamp(), ""); + + DIFFER.diffFinalTableRecords( + List.of( + Jsons.deserialize(""" + { + "_AIRBYTE_RAW_ID": "pre-dst local tz 1", + "_AIRBYTE_EXTRACTED_AT": "2024-03-10T02:00:00.000000000Z", + "_AIRBYTE_META": {"errors": []}, + "ID1": 1, + "ID2": 100, + "STRING": "Alice00" + } + """), + Jsons.deserialize(""" + { + "_AIRBYTE_RAW_ID": "post-dst utc 1", + "_AIRBYTE_EXTRACTED_AT": "2024-03-10T02:02:00.000000000Z", + "_AIRBYTE_META": {"errors": []}, + "ID1": 1, + "ID2": 100, + "STRING": "Alice01" + } + """), + Jsons.deserialize(""" + { + "_AIRBYTE_RAW_ID": "post-dst local tz 2", + "_AIRBYTE_EXTRACTED_AT": "2024-03-10T02:01:00.000000000Z", + "_AIRBYTE_META": { + "errors": [] + }, + "ID1": 2, + "ID2": 100, + "STRING": "Bob00" + }"""), + Jsons.deserialize(""" + { + "_AIRBYTE_RAW_ID": "post-dst utc 2", + "_AIRBYTE_EXTRACTED_AT": "2024-03-10T02:02:00.000000000Z", + "_AIRBYTE_META": {"errors": []}, + "ID1": 2, + "ID2": 100, + "STRING": "Bob01" + } + """), + // note local TZ here. This record was loaded by an older version of the connector. + Jsons.deserialize(""" + { + "_AIRBYTE_RAW_ID": "pre-dst local tz 3", + "_AIRBYTE_EXTRACTED_AT": "2024-03-10T02:00:00.000000000-08:00", + "_AIRBYTE_META": { + "errors": [] + }, + "ID1": 3, + "ID2": 100, + "STRING": "Charlie00" + }"""), + Jsons.deserialize(""" + { + "_AIRBYTE_RAW_ID": "post-dst utc 3", + "_AIRBYTE_EXTRACTED_AT": "2024-03-10T02:02:00.000000000Z", + "_AIRBYTE_META": {"errors": []}, + "ID1": 3, + "ID2": 100, + "STRING": "Charlie01" + } + """), + // note local TZ here. This record was loaded by an older version of the connector. + Jsons.deserialize(""" + { + "_AIRBYTE_RAW_ID": "post-dst local tz 4", + "_AIRBYTE_EXTRACTED_AT": "2024-03-10T02:01:00.000000000-07:00", + "_AIRBYTE_META": { + "errors": [] + }, + "ID1": 4, + "ID2": 100, + "STRING": "Dave00" + }"""), + Jsons.deserialize(""" + { + "_AIRBYTE_RAW_ID": "post-dst utc 4", + "_AIRBYTE_EXTRACTED_AT": "2024-03-10T02:02:00.000000000Z", + "_AIRBYTE_META": {"errors": []}, + "ID1": 4, + "ID2": 100, + "STRING": "Dave01" + } + """)), + this.dumpFinalTableRecords(this.streamId, "")); + } + + @Test + public void dst_test_oldSyncRunsBeforeTransition_thenNewSyncRunsThroughTransition_append() throws Exception { + this.createRawTable(this.streamId); + this.createFinalTable(this.incrementalAppendStream, ""); + this.insertRawTableRecords(this.streamId, List.of( + // record written by a sync running on the old version of snowflake + Jsons.deserialize(""" + { + "_airbyte_raw_id": "pre-dst local tz 1", + "_airbyte_extracted_at": "2024-03-10T01:59:00-08:00", + "_airbyte_data": { + "id1": 1, + "id2": 100, + "string": "Alice00" + } + } + """))); + // Gather initial state at the start of our updated sync + DestinationInitialStatus initialState = + this.destinationHandler.gatherInitialState(List.of(this.incrementalAppendStream)).getFirst(); + this.insertRawTableRecords(this.streamId, List.of( + // update the record twice + // this never really happens, but verify that it works + Jsons.deserialize(""" + { + "_airbyte_raw_id": "pre-dst utc 1", + "_airbyte_extracted_at": "2024-03-10T02:00:00Z", + "_airbyte_data": { + "id1": 1, + "id2": 100, + "string": "Alice01" + } + } + """), + Jsons.deserialize(""" + { + "_airbyte_raw_id": "post-dst utc 1", + "_airbyte_extracted_at": "2024-03-10T02:01:00Z", + "_airbyte_data": { + "id1": 1, + "id2": 100, + "string": "Alice02" + } + } + """))); + + TypeAndDedupeTransaction.executeTypeAndDedupe(this.generator, this.destinationHandler, this.incrementalAppendStream, + initialState.initialRawTableStatus().maxProcessedTimestamp(), ""); + + DIFFER.diffFinalTableRecords( + List.of( + Jsons.deserialize(""" + { + "_AIRBYTE_RAW_ID": "pre-dst local tz 1", + "_AIRBYTE_EXTRACTED_AT": "2024-03-10T01:59:00.000000000Z", + "_AIRBYTE_META": { + "errors": [] + }, + "ID1": 1, + "ID2": 100, + "STRING": "Alice00" + }"""), + Jsons.deserialize(""" + { + "_AIRBYTE_RAW_ID": "pre-dst utc 1", + "_AIRBYTE_EXTRACTED_AT": "2024-03-10T02:00:00.000000000Z", + "_AIRBYTE_META": { + "errors": [] + }, + "ID1": 1, + "ID2": 100, + "STRING": "Alice01" + }"""), + Jsons.deserialize(""" + { + "_AIRBYTE_RAW_ID": "post-dst utc 1", + "_AIRBYTE_EXTRACTED_AT": "2024-03-10T02:01:00.000000000Z", + "_AIRBYTE_META": {"errors": []}, + "ID1": 1, + "ID2": 100, + "STRING": "Alice02" + } + """)), + this.dumpFinalTableRecords(this.streamId, "")); + } + + @Test + public void dst_test_oldSyncRunsBeforeTransition_thenNewSyncRunsBeforeTransition_thenNewSyncRunsThroughTransition_append() throws Exception { + this.createRawTable(this.streamId); + this.createFinalTable(this.incrementalAppendStream, ""); + this.insertRawTableRecords(this.streamId, List.of( + // records written by a sync running on the old version of snowflake + Jsons.deserialize(""" + { + "_airbyte_raw_id": "pre-dst local tz 1", + "_airbyte_extracted_at": "2024-03-10T01:59:00-08:00", + "_airbyte_data": { + "id1": 1, + "id2": 100, + "string": "Alice00" + } + } + """), + Jsons.deserialize(""" + { + "_airbyte_raw_id": "pre-dst local tz 2", + "_airbyte_extracted_at": "2024-03-10T01:59:00-08:00", + "_airbyte_data": { + "id1": 2, + "id2": 100, + "string": "Bob00" + } + } + """))); + + // Gather initial state at the start of our first new sync + DestinationInitialStatus initialState = + this.destinationHandler.gatherInitialState(List.of(this.incrementalAppendStream)).getFirst(); + this.insertRawTableRecords(this.streamId, List.of( + // update the records + Jsons.deserialize(""" + { + "_airbyte_raw_id": "pre-dst utc 1", + "_airbyte_extracted_at": "2024-03-10T02:00:00Z", + "_airbyte_data": { + "id1": 1, + "id2": 100, + "string": "Alice01" + } + } + """), + Jsons.deserialize(""" + { + "_airbyte_raw_id": "pre-dst utc 2", + "_airbyte_extracted_at": "2024-03-10T02:00:00Z", + "_airbyte_data": { + "id1": 2, + "id2": 100, + "string": "Bob01" + } + } + """))); + + TypeAndDedupeTransaction.executeTypeAndDedupe(this.generator, this.destinationHandler, this.incrementalAppendStream, + initialState.initialRawTableStatus().maxProcessedTimestamp(), ""); + + DIFFER.diffFinalTableRecords( + List.of( + Jsons.deserialize(""" + { + "_AIRBYTE_RAW_ID": "pre-dst local tz 1", + "_AIRBYTE_EXTRACTED_AT": "2024-03-10T01:59:00.000000000Z", + "_AIRBYTE_META": { + "errors": [] + }, + "ID1": 1, + "ID2": 100, + "STRING": "Alice00" + }"""), + Jsons.deserialize(""" + { + "_AIRBYTE_RAW_ID": "pre-dst utc 1", + "_AIRBYTE_EXTRACTED_AT": "2024-03-10T02:00:00.000000000Z", + "_AIRBYTE_META": {"errors": []}, + "ID1": 1, + "ID2": 100, + "STRING": "Alice01" + } + """), + Jsons.deserialize(""" + { + "_AIRBYTE_RAW_ID": "pre-dst local tz 2", + "_AIRBYTE_EXTRACTED_AT": "2024-03-10T01:59:00.000000000Z", + "_AIRBYTE_META": { + "errors": [] + }, + "ID1": 2, + "ID2": 100, + "STRING": "Bob00" + }"""), + Jsons.deserialize(""" + { + "_AIRBYTE_RAW_ID": "pre-dst utc 2", + "_AIRBYTE_EXTRACTED_AT": "2024-03-10T02:00:00.000000000Z", + "_AIRBYTE_META": {"errors": []}, + "ID1": 2, + "ID2": 100, + "STRING": "Bob01" + } + """)), + this.dumpFinalTableRecords(this.streamId, "")); + + // Gather initial state at the start of our second new sync + DestinationInitialStatus initialState2 = + this.destinationHandler.gatherInitialState(List.of(this.incrementalAppendStream)).getFirst(); + this.insertRawTableRecords(this.streamId, List.of( + // update the records again + Jsons.deserialize(""" + { + "_airbyte_raw_id": "post-dst utc 1", + "_airbyte_extracted_at": "2024-03-10T02:01:00Z", + "_airbyte_data": { + "id1": 1, + "id2": 100, + "string": "Alice02" + } + } + """), + Jsons.deserialize(""" + { + "_airbyte_raw_id": "post-dst utc 2", + "_airbyte_extracted_at": "2024-03-10T02:01:00Z", + "_airbyte_data": { + "id1": 2, + "id2": 100, + "string": "Bob02" + } + } + """))); + + TypeAndDedupeTransaction.executeTypeAndDedupe(this.generator, this.destinationHandler, this.incrementalAppendStream, + initialState2.initialRawTableStatus().maxProcessedTimestamp(), ""); + + DIFFER.diffFinalTableRecords( + List.of( + Jsons.deserialize(""" + { + "_AIRBYTE_RAW_ID": "pre-dst local tz 1", + "_AIRBYTE_EXTRACTED_AT": "2024-03-10T01:59:00.000000000Z", + "_AIRBYTE_META": { + "errors": [] + }, + "ID1": 1, + "ID2": 100, + "STRING": "Alice00" + }"""), + Jsons.deserialize(""" + { + "_AIRBYTE_RAW_ID": "pre-dst utc 1", + "_AIRBYTE_EXTRACTED_AT": "2024-03-10T02:00:00.000000000Z", + "_AIRBYTE_META": { + "errors": [] + }, + "ID1": 1, + "ID2": 100, + "STRING": "Alice01" + }"""), + Jsons.deserialize(""" + { + "_AIRBYTE_RAW_ID": "post-dst utc 1", + "_AIRBYTE_EXTRACTED_AT": "2024-03-10T02:01:00.000000000Z", + "_AIRBYTE_META": {"errors": []}, + "ID1": 1, + "ID2": 100, + "STRING": "Alice02" + } + """), + Jsons.deserialize(""" + { + "_AIRBYTE_RAW_ID": "pre-dst local tz 2", + "_AIRBYTE_EXTRACTED_AT": "2024-03-10T01:59:00.000000000Z", + "_AIRBYTE_META": { + "errors": [] + }, + "ID1": 2, + "ID2": 100, + "STRING": "Bob00" + }"""), + Jsons.deserialize(""" + { + "_AIRBYTE_RAW_ID": "pre-dst utc 2", + "_AIRBYTE_EXTRACTED_AT": "2024-03-10T02:00:00.000000000Z", + "_AIRBYTE_META": { + "errors": [] + }, + "ID1": 2, + "ID2": 100, + "STRING": "Bob01" + }"""), + Jsons.deserialize(""" + { + "_AIRBYTE_RAW_ID": "post-dst utc 2", + "_AIRBYTE_EXTRACTED_AT": "2024-03-10T02:01:00.000000000Z", + "_AIRBYTE_META": {"errors": []}, + "ID1": 2, + "ID2": 100, + "STRING": "Bob02" + } + """)), + this.dumpFinalTableRecords(this.streamId, "")); + } + } From 5ced57c30d869035d9fb34aa99a85ec67f095b5c Mon Sep 17 00:00:00 2001 From: Edward Gao Date: Tue, 5 Mar 2024 14:42:05 -0800 Subject: [PATCH 26/33] move timestamp crap into min/max call --- .../SnowflakeDestinationHandler.java | 43 +++++++++---------- 1 file changed, 21 insertions(+), 22 deletions(-) diff --git a/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/typing_deduping/SnowflakeDestinationHandler.java b/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/typing_deduping/SnowflakeDestinationHandler.java index 335b5e5f069d..625168ce6d47 100644 --- a/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/typing_deduping/SnowflakeDestinationHandler.java +++ b/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/typing_deduping/SnowflakeDestinationHandler.java @@ -147,19 +147,19 @@ private InitialRawTableStatus getInitialRawTableState(final StreamId id, final D "raw_table", id.rawTableId(SnowflakeSqlGenerator.QUOTE))).replace( """ WITH MIN_TS AS ( - SELECT TIMESTAMPADD(NANOSECOND, -1, MIN("_airbyte_extracted_at")) AS MIN_TIMESTAMP + SELECT TIMESTAMPADD(NANOSECOND, -1, + MIN(TIMESTAMPADD( + HOUR, + EXTRACT(timezone_hour from "_airbyte_extracted_at"), + TIMESTAMPADD( + MINUTE, + EXTRACT(timezone_minute from "_airbyte_extracted_at"), + CONVERT_TIMEZONE('UTC', "_airbyte_extracted_at") + ) + ))) AS MIN_TIMESTAMP FROM ${raw_table} WHERE "_airbyte_loaded_at" IS NULL - ) SELECT TO_VARCHAR( - TIMESTAMPADD( - HOUR, - EXTRACT(timezone_hour from MIN_TIMESTAMP), - TIMESTAMPADD( - MINUTE, - EXTRACT(timezone_minute from MIN_TIMESTAMP), - CONVERT_TIMEZONE('UTC', MIN_TIMESTAMP) - ) - ),'YYYY-MM-DDTHH24:MI:SS.FF9TZH:TZM') as MIN_TIMESTAMP_UTC from MIN_TS; + ) SELECT TO_VARCHAR(MIN_TIMESTAMP,'YYYY-MM-DDTHH24:MI:SS.FF9TZH:TZM') as MIN_TIMESTAMP_UTC from MIN_TS; """)), // The query will always return exactly one record, so use .get(0) record -> record.getString("MIN_TIMESTAMP_UTC")).get(0)); @@ -174,19 +174,18 @@ record -> record.getString("MIN_TIMESTAMP_UTC")).get(0)); "raw_table", id.rawTableId(SnowflakeSqlGenerator.QUOTE))).replace( """ WITH MAX_TS AS ( - SELECT MAX("_airbyte_extracted_at") + SELECT MAX(TIMESTAMPADD( + HOUR, + EXTRACT(timezone_hour from "_airbyte_extracted_at"), + TIMESTAMPADD( + MINUTE, + EXTRACT(timezone_minute from "_airbyte_extracted_at"), + CONVERT_TIMEZONE('UTC', "_airbyte_extracted_at") + ) + )) AS MAX_TIMESTAMP FROM ${raw_table} - ) SELECT TO_VARCHAR( - TIMESTAMPADD( - HOUR, - EXTRACT(timezone_hour from MAX_TIMESTAMP), - TIMESTAMPADD( - MINUTE, - EXTRACT(timezone_minute from MAX_TIMESTAMP), - CONVERT_TIMEZONE('UTC', MAX_TIMESTAMP) - ) - ),'YYYY-MM-DDTHH24:MI:SS.FF9TZH:TZM') as MAX_TIMESTAMP_UTC from MAX_TS; + ) SELECT TO_VARCHAR(MAX_TIMESTAMP,'YYYY-MM-DDTHH24:MI:SS.FF9TZH:TZM') as MAX_TIMESTAMP_UTC from MAX_TS; """)), record -> record.getString("MAX_TIMESTAMP_UTC")).get(0)); return new InitialRawTableStatus(true, false, maxTimestamp.map(Instant::parse)); From a067fc5ce11230043bb035b0c76aeb2a26f76c3e Mon Sep 17 00:00:00 2001 From: Edward Gao Date: Tue, 5 Mar 2024 15:34:53 -0800 Subject: [PATCH 27/33] move timestamp fiddling back out of cte for max timestamp --- .../SnowflakeDestinationHandler.java | 26 ++++++++++++------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/typing_deduping/SnowflakeDestinationHandler.java b/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/typing_deduping/SnowflakeDestinationHandler.java index 625168ce6d47..6cb443af7ab3 100644 --- a/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/typing_deduping/SnowflakeDestinationHandler.java +++ b/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/typing_deduping/SnowflakeDestinationHandler.java @@ -169,23 +169,29 @@ record -> record.getString("MIN_TIMESTAMP_UTC")).get(0)); // If there are no unloaded raw records, then we can safely skip all existing raw records. // This second query just finds the newest raw record. + + // This is _technically_ wrong, because during the DST transition we might select + // the wrong max timestamp. We _should_ do the UTC conversion inside the CTE, but that's a lot + // of work for a very small edge case. + // We released the fix to write extracted_at in UTC before DST changed, so this is fine. final Optional maxTimestamp = Optional.ofNullable(database.queryStrings( conn -> conn.createStatement().executeQuery(new StringSubstitutor(Map.of( "raw_table", id.rawTableId(SnowflakeSqlGenerator.QUOTE))).replace( """ WITH MAX_TS AS ( - SELECT MAX(TIMESTAMPADD( - HOUR, - EXTRACT(timezone_hour from "_airbyte_extracted_at"), - TIMESTAMPADD( - MINUTE, - EXTRACT(timezone_minute from "_airbyte_extracted_at"), - CONVERT_TIMEZONE('UTC', "_airbyte_extracted_at") - ) - )) + SELECT MAX("_airbyte_extracted_at") AS MAX_TIMESTAMP FROM ${raw_table} - ) SELECT TO_VARCHAR(MAX_TIMESTAMP,'YYYY-MM-DDTHH24:MI:SS.FF9TZH:TZM') as MAX_TIMESTAMP_UTC from MAX_TS; + ) SELECT TO_VARCHAR( + TIMESTAMPADD( + HOUR, + EXTRACT(timezone_hour from MAX_TIMESTAMP), + TIMESTAMPADD( + MINUTE, + EXTRACT(timezone_minute from MAX_TIMESTAMP), + CONVERT_TIMEZONE('UTC', MAX_TIMESTAMP) + ) + ),'YYYY-MM-DDTHH24:MI:SS.FF9TZH:TZM') as MAX_TIMESTAMP_UTC from MAX_TS; """)), record -> record.getString("MAX_TIMESTAMP_UTC")).get(0)); return new InitialRawTableStatus(true, false, maxTimestamp.map(Instant::parse)); From 840116d7bdd48e32d4682b58d4b656d7e2fa3b3f Mon Sep 17 00:00:00 2001 From: Edward Gao Date: Tue, 5 Mar 2024 15:49:57 -0800 Subject: [PATCH 28/33] format --- .../snowflake/typing_deduping/SnowflakeDestinationHandler.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/typing_deduping/SnowflakeDestinationHandler.java b/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/typing_deduping/SnowflakeDestinationHandler.java index 6cb443af7ab3..61b500ffccdf 100644 --- a/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/typing_deduping/SnowflakeDestinationHandler.java +++ b/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/typing_deduping/SnowflakeDestinationHandler.java @@ -171,7 +171,7 @@ record -> record.getString("MIN_TIMESTAMP_UTC")).get(0)); // This second query just finds the newest raw record. // This is _technically_ wrong, because during the DST transition we might select - // the wrong max timestamp. We _should_ do the UTC conversion inside the CTE, but that's a lot + // the wrong max timestamp. We _should_ do the UTC conversion inside the CTE, but that's a lot // of work for a very small edge case. // We released the fix to write extracted_at in UTC before DST changed, so this is fine. final Optional maxTimestamp = Optional.ofNullable(database.queryStrings( From a46a655efc83f5441799ecad1f36302a78e8f782 Mon Sep 17 00:00:00 2001 From: Edward Gao Date: Tue, 5 Mar 2024 15:50:15 -0800 Subject: [PATCH 29/33] stop filtering on extractedAt in commitRawTable --- .../snowflake/typing_deduping/SnowflakeSqlGenerator.java | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/typing_deduping/SnowflakeSqlGenerator.java b/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/typing_deduping/SnowflakeSqlGenerator.java index ba05a9dd22cf..9c87733e6611 100644 --- a/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/typing_deduping/SnowflakeSqlGenerator.java +++ b/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/typing_deduping/SnowflakeSqlGenerator.java @@ -142,7 +142,7 @@ public Sql updateTable(final StreamConfig stream, dedupFinalTable = dedupFinalTable(stream.id(), finalSuffix, stream.primaryKey(), stream.cursor()); cdcDeletes = cdcDeletes(stream, finalSuffix); } - final String commitRawTable = commitRawTable(stream.id(), minRawTimestamp); + final String commitRawTable = commitRawTable(stream.id()); return transactionally(insertNewRecords, dedupFinalTable, cdcDeletes, commitRawTable); } @@ -424,15 +424,13 @@ private String cdcDeletes(final StreamConfig stream, final String finalSuffix) { } @VisibleForTesting - String commitRawTable(final StreamId id, final Optional minRawTimestamp) { + String commitRawTable(final StreamId id) { return new StringSubstitutor(Map.of( - "raw_table_id", id.rawTableId(QUOTE), - "extractedAtCondition", buildExtractedAtCondition(minRawTimestamp))).replace( + "raw_table_id", id.rawTableId(QUOTE))).replace( """ UPDATE ${raw_table_id} SET "_airbyte_loaded_at" = CURRENT_TIMESTAMP() WHERE "_airbyte_loaded_at" IS NULL - ${extractedAtCondition} ;"""); } From 075cf8a6e9fd5a3e52be720f4f9427d024bee4c3 Mon Sep 17 00:00:00 2001 From: Gireesh Sreepathi Date: Tue, 5 Mar 2024 17:00:55 -0800 Subject: [PATCH 30/33] migration fixtures with mixed TZs --- .../AbstractSnowflakeTypingDedupingTest.java | 6 +++--- .../SnowflakeSqlGeneratorIntegrationTest.java | 7 +++++++ ...ctedrecords_incremental_dedup_final_mixed_tzs.jsonl | 4 ++++ .../dat/sync2_expectedrecords_raw_mixed_tzs.jsonl | 10 ++++++++++ 4 files changed, 24 insertions(+), 3 deletions(-) create mode 100644 airbyte-integrations/connectors/destination-snowflake/src/test-integration/resources/dat/sync2_expectedrecords_incremental_dedup_final_mixed_tzs.jsonl create mode 100644 airbyte-integrations/connectors/destination-snowflake/src/test-integration/resources/dat/sync2_expectedrecords_raw_mixed_tzs.jsonl diff --git a/airbyte-integrations/connectors/destination-snowflake/src/test-integration/java/io/airbyte/integrations/destination/snowflake/typing_deduping/AbstractSnowflakeTypingDedupingTest.java b/airbyte-integrations/connectors/destination-snowflake/src/test-integration/java/io/airbyte/integrations/destination/snowflake/typing_deduping/AbstractSnowflakeTypingDedupingTest.java index 84ea109c25d5..de6f4f849868 100644 --- a/airbyte-integrations/connectors/destination-snowflake/src/test-integration/java/io/airbyte/integrations/destination/snowflake/typing_deduping/AbstractSnowflakeTypingDedupingTest.java +++ b/airbyte-integrations/connectors/destination-snowflake/src/test-integration/java/io/airbyte/integrations/destination/snowflake/typing_deduping/AbstractSnowflakeTypingDedupingTest.java @@ -149,7 +149,7 @@ public void testFinalTableUppercasingMigration_append() throws Exception { runSync(catalog, messages2); - final List expectedRawRecords2 = readRecords("dat/sync2_expectedrecords_raw.jsonl"); + final List expectedRawRecords2 = readRecords("dat/sync2_expectedrecords_raw_mixed_tzs.jsonl"); final List expectedFinalRecords2 = readRecords("dat/sync2_expectedrecords_fullrefresh_append_final.jsonl"); verifySyncResult(expectedRawRecords2, expectedFinalRecords2, disableFinalTableComparison()); } finally { @@ -244,8 +244,8 @@ public void testExtractedAtUtcTimezoneMigration() throws Exception { runSync(catalog, messages2); - final List expectedRawRecords2 = readRecords("dat/sync2_expectedrecords_raw.jsonl"); - final List expectedFinalRecords2 = readRecords("dat/sync2_expectedrecords_incremental_dedup_final.jsonl"); + final List expectedRawRecords2 = readRecords("dat/sync2_expectedrecords_raw_mixed_tzs.jsonl"); + final List expectedFinalRecords2 = readRecords("dat/sync2_expectedrecords_incremental_dedup_final_mixed_tzs.jsonl"); verifySyncResult(expectedRawRecords2, expectedFinalRecords2, disableFinalTableComparison()); } diff --git a/airbyte-integrations/connectors/destination-snowflake/src/test-integration/java/io/airbyte/integrations/destination/snowflake/typing_deduping/SnowflakeSqlGeneratorIntegrationTest.java b/airbyte-integrations/connectors/destination-snowflake/src/test-integration/java/io/airbyte/integrations/destination/snowflake/typing_deduping/SnowflakeSqlGeneratorIntegrationTest.java index f0666918286d..23afc2b95b53 100644 --- a/airbyte-integrations/connectors/destination-snowflake/src/test-integration/java/io/airbyte/integrations/destination/snowflake/typing_deduping/SnowflakeSqlGeneratorIntegrationTest.java +++ b/airbyte-integrations/connectors/destination-snowflake/src/test-integration/java/io/airbyte/integrations/destination/snowflake/typing_deduping/SnowflakeSqlGeneratorIntegrationTest.java @@ -45,6 +45,7 @@ import org.apache.commons.text.StringSubstitutor; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; public class SnowflakeSqlGeneratorIntegrationTest extends BaseSqlGeneratorIntegrationTest { @@ -389,6 +390,12 @@ record -> record.get(JavaBaseConstants.COLUMN_NAME_AB_RAW_ID).asText(), }); } + @Disabled("We removed the optimization to only set the loaded_at column for new records after certain _extracted_at") + @Test + @Override + public void ignoreOldRawRecords() { + } + /** * Verify that the final table does not include NON-NULL PKs (after * https://github.com/airbytehq/airbyte/pull/31082) diff --git a/airbyte-integrations/connectors/destination-snowflake/src/test-integration/resources/dat/sync2_expectedrecords_incremental_dedup_final_mixed_tzs.jsonl b/airbyte-integrations/connectors/destination-snowflake/src/test-integration/resources/dat/sync2_expectedrecords_incremental_dedup_final_mixed_tzs.jsonl new file mode 100644 index 000000000000..ed5dacba150e --- /dev/null +++ b/airbyte-integrations/connectors/destination-snowflake/src/test-integration/resources/dat/sync2_expectedrecords_incremental_dedup_final_mixed_tzs.jsonl @@ -0,0 +1,4 @@ +{"_AIRBYTE_EXTRACTED_AT": "1970-01-01T00:00:02.000000000Z", "_AIRBYTE_META":{"errors":[]}, "ID1": 1, "ID2": 200, "UPDATED_AT": "2000-01-02T00:00:00.000000000Z", "NAME": "Alice", "ADDRESS": {"city": "Seattle", "state": "WA"}} +// Delete Bob, keep Charlie +{"_AIRBYTE_EXTRACTED_AT": "1970-01-01T00:00:01.000000000-08:00", "_AIRBYTE_META": {"errors":["Problem with `age`", "Problem with `registration_date`"]}, "ID1": 2, "ID2": 200, "UPDATED_AT": "2000-01-01T00:03:00.000000000Z", "NAME": "Charlie"} +{"_AIRBYTE_EXTRACTED_AT": "1970-01-01T00:00:01.000000000-08:00", "_AIRBYTE_META": {"errors":[]}, "ID1": 3, "ID2": 200, "UPDATED_AT": "2000-01-01T00:04:00.000000000Z", "NAME": "a\bb\fc\nd\re\tf`~!@#$%^&*()_+-=[]\\{}|'\",./<>?"} diff --git a/airbyte-integrations/connectors/destination-snowflake/src/test-integration/resources/dat/sync2_expectedrecords_raw_mixed_tzs.jsonl b/airbyte-integrations/connectors/destination-snowflake/src/test-integration/resources/dat/sync2_expectedrecords_raw_mixed_tzs.jsonl new file mode 100644 index 000000000000..8de05b528bb9 --- /dev/null +++ b/airbyte-integrations/connectors/destination-snowflake/src/test-integration/resources/dat/sync2_expectedrecords_raw_mixed_tzs.jsonl @@ -0,0 +1,10 @@ +// We keep the records from the first sync +{"_airbyte_extracted_at": "1970-01-01T00:00:01.000000000-08:00", "_airbyte_data": {"id1": 1, "id2": 200, "updated_at": "2000-01-01T00:00:00Z", "_ab_cdc_deleted_at": null, "name": "Alice", "address": {"city": "San Francisco", "state": "CA"}}} +{"_airbyte_extracted_at": "1970-01-01T00:00:01.000000000-08:00", "_airbyte_data": {"id1": 1, "id2": 200, "updated_at": "2000-01-01T00:01:00Z", "_ab_cdc_deleted_at": null, "name": "Alice", "address": {"city": "Los Angeles", "state": "CA"}}} +{"_airbyte_extracted_at": "1970-01-01T00:00:01.000000000-08:00", "_airbyte_data": {"id1": 1, "id2": 201, "updated_at": "2000-01-01T00:02:00Z", "name": "Bob", "address": {"city": "Boston", "state": "MA"}}} +{"_airbyte_extracted_at": "1970-01-01T00:00:01.000000000-08:00", "_airbyte_data": {"id1": 2, "id2": 200, "updated_at": "2000-01-01T00:03:00Z", "name": "Charlie", "age": "this is not an integer", "registration_date": "this is not a date"}} +{"_airbyte_extracted_at": "1970-01-01T00:00:01.000000000-08:00", "_airbyte_data": {"id1": 3, "id2": 200, "updated_at": "2000-01-01T00:04:00Z", "name": "a\bb\fc\nd\re\tf`~!@#$%^&*()_+-=[]\\{}|'\",./<>?"}} +// And append the records from the second sync +{"_airbyte_extracted_at": "1970-01-01T00:00:02.000000000Z", "_airbyte_data": {"id1": 1, "id2": 200, "updated_at": "2000-01-02T00:00:00Z", "_ab_cdc_deleted_at": null, "name": "Alice", "address": {"city": "Seattle", "state": "WA"}}} +{"_airbyte_extracted_at": "1970-01-01T00:00:02.000000000Z", "_airbyte_data": {"id1": 1, "id2": 201, "updated_at": "2000-01-02T00:00:00Z", "_ab_cdc_deleted_at": null, "name": "Bob", "address": {"city": "New York", "state": "NY"}}} +{"_airbyte_extracted_at": "1970-01-01T00:00:02.000000000Z", "_airbyte_data": {"id1": 1, "id2": 201, "updated_at": "2000-01-02T00:01:00Z", "_ab_cdc_deleted_at": "1970-01-01T00:00:00Z"}} From 61b2d99703f5f32a2e45c274f4958e971fa9a659 Mon Sep 17 00:00:00 2001 From: Gireesh Sreepathi Date: Tue, 5 Mar 2024 17:37:13 -0800 Subject: [PATCH 31/33] minor fixes --- .../typing_deduping/SnowflakeSqlGeneratorIntegrationTest.java | 3 ++- ...nc2_expectedrecords_incremental_dedup_final_mixed_tzs.jsonl | 2 +- .../resources/dat/sync2_expectedrecords_raw_mixed_tzs.jsonl | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/airbyte-integrations/connectors/destination-snowflake/src/test-integration/java/io/airbyte/integrations/destination/snowflake/typing_deduping/SnowflakeSqlGeneratorIntegrationTest.java b/airbyte-integrations/connectors/destination-snowflake/src/test-integration/java/io/airbyte/integrations/destination/snowflake/typing_deduping/SnowflakeSqlGeneratorIntegrationTest.java index 23afc2b95b53..7277f5991957 100644 --- a/airbyte-integrations/connectors/destination-snowflake/src/test-integration/java/io/airbyte/integrations/destination/snowflake/typing_deduping/SnowflakeSqlGeneratorIntegrationTest.java +++ b/airbyte-integrations/connectors/destination-snowflake/src/test-integration/java/io/airbyte/integrations/destination/snowflake/typing_deduping/SnowflakeSqlGeneratorIntegrationTest.java @@ -393,7 +393,8 @@ record -> record.get(JavaBaseConstants.COLUMN_NAME_AB_RAW_ID).asText(), @Disabled("We removed the optimization to only set the loaded_at column for new records after certain _extracted_at") @Test @Override - public void ignoreOldRawRecords() { + public void ignoreOldRawRecords() throws Exception { + super.ignoreOldRawRecords(); } /** diff --git a/airbyte-integrations/connectors/destination-snowflake/src/test-integration/resources/dat/sync2_expectedrecords_incremental_dedup_final_mixed_tzs.jsonl b/airbyte-integrations/connectors/destination-snowflake/src/test-integration/resources/dat/sync2_expectedrecords_incremental_dedup_final_mixed_tzs.jsonl index ed5dacba150e..686793ed026b 100644 --- a/airbyte-integrations/connectors/destination-snowflake/src/test-integration/resources/dat/sync2_expectedrecords_incremental_dedup_final_mixed_tzs.jsonl +++ b/airbyte-integrations/connectors/destination-snowflake/src/test-integration/resources/dat/sync2_expectedrecords_incremental_dedup_final_mixed_tzs.jsonl @@ -1,4 +1,4 @@ {"_AIRBYTE_EXTRACTED_AT": "1970-01-01T00:00:02.000000000Z", "_AIRBYTE_META":{"errors":[]}, "ID1": 1, "ID2": 200, "UPDATED_AT": "2000-01-02T00:00:00.000000000Z", "NAME": "Alice", "ADDRESS": {"city": "Seattle", "state": "WA"}} -// Delete Bob, keep Charlie +// Delete Bob, keep Charlie. We continue to keep old records in PST {"_AIRBYTE_EXTRACTED_AT": "1970-01-01T00:00:01.000000000-08:00", "_AIRBYTE_META": {"errors":["Problem with `age`", "Problem with `registration_date`"]}, "ID1": 2, "ID2": 200, "UPDATED_AT": "2000-01-01T00:03:00.000000000Z", "NAME": "Charlie"} {"_AIRBYTE_EXTRACTED_AT": "1970-01-01T00:00:01.000000000-08:00", "_AIRBYTE_META": {"errors":[]}, "ID1": 3, "ID2": 200, "UPDATED_AT": "2000-01-01T00:04:00.000000000Z", "NAME": "a\bb\fc\nd\re\tf`~!@#$%^&*()_+-=[]\\{}|'\",./<>?"} diff --git a/airbyte-integrations/connectors/destination-snowflake/src/test-integration/resources/dat/sync2_expectedrecords_raw_mixed_tzs.jsonl b/airbyte-integrations/connectors/destination-snowflake/src/test-integration/resources/dat/sync2_expectedrecords_raw_mixed_tzs.jsonl index 8de05b528bb9..8bd778660427 100644 --- a/airbyte-integrations/connectors/destination-snowflake/src/test-integration/resources/dat/sync2_expectedrecords_raw_mixed_tzs.jsonl +++ b/airbyte-integrations/connectors/destination-snowflake/src/test-integration/resources/dat/sync2_expectedrecords_raw_mixed_tzs.jsonl @@ -1,4 +1,4 @@ -// We keep the records from the first sync +// We keep the records from the first sync which used to be in PST TZ {"_airbyte_extracted_at": "1970-01-01T00:00:01.000000000-08:00", "_airbyte_data": {"id1": 1, "id2": 200, "updated_at": "2000-01-01T00:00:00Z", "_ab_cdc_deleted_at": null, "name": "Alice", "address": {"city": "San Francisco", "state": "CA"}}} {"_airbyte_extracted_at": "1970-01-01T00:00:01.000000000-08:00", "_airbyte_data": {"id1": 1, "id2": 200, "updated_at": "2000-01-01T00:01:00Z", "_ab_cdc_deleted_at": null, "name": "Alice", "address": {"city": "Los Angeles", "state": "CA"}}} {"_airbyte_extracted_at": "1970-01-01T00:00:01.000000000-08:00", "_airbyte_data": {"id1": 1, "id2": 201, "updated_at": "2000-01-01T00:02:00Z", "name": "Bob", "address": {"city": "Boston", "state": "MA"}}} From f3c9b3b54e0a30c749e3e7887ea04d5cf37275bd Mon Sep 17 00:00:00 2001 From: Gireesh Sreepathi Date: Tue, 5 Mar 2024 19:42:12 -0800 Subject: [PATCH 32/33] remove commitDestinationState temporarily --- .../typing_deduping/SnowflakeDestinationHandler.java | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/typing_deduping/SnowflakeDestinationHandler.java b/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/typing_deduping/SnowflakeDestinationHandler.java index 61b500ffccdf..76b3861fb976 100644 --- a/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/typing_deduping/SnowflakeDestinationHandler.java +++ b/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/typing_deduping/SnowflakeDestinationHandler.java @@ -226,6 +226,14 @@ public void execute(final Sql sql) throws Exception { } } + @Override + public void commitDestinationStates(Map destinationStates) throws Exception { + // Skip the state table for time being since we aren't doing UtcMigration. + // JdbcDestinationHandler#commitDestinationStates + // seems very contentious in snowflake and tests are taking long holding lock on this transaction. + // Revisit this after UtcMigration fix done. + } + private Set getPks(final StreamConfig stream) { return stream.primaryKey() != null ? stream.primaryKey().stream().map(ColumnId::name).collect(Collectors.toSet()) : Collections.emptySet(); } From 690283019a05ccf44f88ec2a9d7000903349dc0e Mon Sep 17 00:00:00 2001 From: Edward Gao Date: Wed, 6 Mar 2024 07:34:33 -0800 Subject: [PATCH 33/33] Revert "remove commitDestinationState temporarily" This reverts commit f3c9b3b54e0a30c749e3e7887ea04d5cf37275bd. --- .../typing_deduping/SnowflakeDestinationHandler.java | 8 -------- 1 file changed, 8 deletions(-) diff --git a/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/typing_deduping/SnowflakeDestinationHandler.java b/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/typing_deduping/SnowflakeDestinationHandler.java index 76b3861fb976..61b500ffccdf 100644 --- a/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/typing_deduping/SnowflakeDestinationHandler.java +++ b/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/typing_deduping/SnowflakeDestinationHandler.java @@ -226,14 +226,6 @@ public void execute(final Sql sql) throws Exception { } } - @Override - public void commitDestinationStates(Map destinationStates) throws Exception { - // Skip the state table for time being since we aren't doing UtcMigration. - // JdbcDestinationHandler#commitDestinationStates - // seems very contentious in snowflake and tests are taking long holding lock on this transaction. - // Revisit this after UtcMigration fix done. - } - private Set getPks(final StreamConfig stream) { return stream.primaryKey() != null ? stream.primaryKey().stream().map(ColumnId::name).collect(Collectors.toSet()) : Collections.emptySet(); }