Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

MySql Source: Fix data processing #6093

Merged
merged 26 commits into from
Oct 12, 2021
Merged
Show file tree
Hide file tree
Changes from 14 commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
88d2191
move logic from static JdbcUtils to JdbcSourceOperations
DoNotPanicUA Sep 1, 2021
f240e89
format
DoNotPanicUA Sep 2, 2021
2695b29
Add methods for all types in order to have possibility rewrite them a…
DoNotPanicUA Sep 2, 2021
a6722d6
Make BigQuery in line impl with JDBC changes
DoNotPanicUA Sep 2, 2021
19edc8f
fix binary type
DoNotPanicUA Sep 2, 2021
df74b34
Merge remote-tracking branch 'origin/master' into aleonets/5704-jdbcu…
DoNotPanicUA Sep 7, 2021
c4c204c
add database creations methods with sourceOperations input
DoNotPanicUA Sep 7, 2021
a0670ac
add MySqlSourceOperations
DoNotPanicUA Sep 14, 2021
e9c96eb
Merge remote-tracking branch 'origin/master' into aleonets/3931-3932-…
DoNotPanicUA Sep 14, 2021
534dbac
Merge remote-tracking branch 'origin/master' into aleonets/3931-3932-…
DoNotPanicUA Sep 15, 2021
7703193
fix processing
DoNotPanicUA Sep 15, 2021
dece86e
Fix CDC processing
DoNotPanicUA Sep 17, 2021
86df40c
format
DoNotPanicUA Sep 17, 2021
35c5781
add some tests for double and float
DoNotPanicUA Sep 17, 2021
ea4e2cf
Merge remote-tracking branch 'origin/master' into aleonets/3931-3932-…
DoNotPanicUA Sep 23, 2021
c4c4e85
incr version
DoNotPanicUA Sep 23, 2021
f1d3b7c
Merge remote-tracking branch 'origin/master' into aleonets/3931-3932-…
alexandr-shegeda Oct 4, 2021
0f0883b
#3931 #3932 added zero-date converting to null param "zeroDateTimeBeh…
alexandr-shegeda Oct 6, 2021
1d6b02a
Merge remote-tracking branch 'origin/master' into aleonets/3931-3932-…
DoNotPanicUA Oct 11, 2021
ad88abc
remove old tests covered by data type tests + incr ver
DoNotPanicUA Oct 11, 2021
11005ce
Update airbyte-integrations/connectors/destination-mysql/src/main/jav…
DoNotPanicUA Oct 12, 2021
8d51497
Update docs/integrations/sources/mysql.md
DoNotPanicUA Oct 12, 2021
b7a4d28
Merge remote-tracking branch 'origin/master' into aleonets/3931-3932-…
DoNotPanicUA Oct 12, 2021
342b8f1
add back comments to the data type mapping
DoNotPanicUA Oct 12, 2021
b73787c
Merge remote-tracking branch 'origin/aleonets/3931-3932-fix-mysql' in…
DoNotPanicUA Oct 12, 2021
a166c6a
incr config version
DoNotPanicUA Oct 12, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 18 additions & 1 deletion airbyte-db/lib/src/main/java/io/airbyte/db/DataTypeUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,17 @@
import java.sql.SQLException;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.time.Duration;
import java.time.Instant;
import java.time.LocalDate;
import java.time.LocalDateTime;
import java.time.format.DateTimeFormatter;
import java.util.function.Function;

public class DataTypeUtils {

public static final DateFormat DATE_FORMAT = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'"); // Quoted "Z" to indicate UTC, no timezone offset
public static final String DATE_FORMAT_PATTERN = "yyyy-MM-dd'T'HH:mm:ss'Z'";
public static final DateFormat DATE_FORMAT = new SimpleDateFormat(DATE_FORMAT_PATTERN); // Quoted "Z" to indicate UTC, no timezone offset

public static <T> T returnNullIfInvalid(DataTypeSupplier<T> valueProducer) {
return returnNullIfInvalid(valueProducer, ignored -> true);
Expand All @@ -60,4 +65,16 @@ public static String toISO8601String(java.util.Date date) {
return DATE_FORMAT.format(date);
}

public static String toISO8601String(LocalDate date) {
return toISO8601String(date.atStartOfDay());
}

public static String toISO8601String(LocalDateTime date) {
return date.format(DateTimeFormatter.ofPattern(DATE_FORMAT_PATTERN));
}

public static String toISO8601String(Duration duration) {
return DATE_FORMAT.format(Date.from(Instant.ofEpochSecond(Math.abs(duration.getSeconds()), Math.abs(duration.getNano()))));
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ public class StreamingJdbcDatabase extends JdbcDatabase {
private final JdbcStreamingQueryConfiguration jdbcStreamingQueryConfiguration;

public StreamingJdbcDatabase(DataSource dataSource, JdbcDatabase database, JdbcStreamingQueryConfiguration jdbcStreamingQueryConfiguration) {
this(dataSource, database, jdbcStreamingQueryConfiguration, JdbcUtils.getDefaultSourceOperations());
this(dataSource, database, jdbcStreamingQueryConfiguration, database.sourceOperations);
}

public StreamingJdbcDatabase(DataSource dataSource,
Expand Down
1 change: 1 addition & 0 deletions airbyte-integrations/bases/debezium/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ project.configurations {
}
dependencies {
implementation project(':airbyte-protocol:models')
implementation project(':airbyte-db:lib')

implementation 'io.debezium:debezium-api:1.4.2.Final'
implementation 'io.debezium:debezium-embedded:1.4.2.Final'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,15 @@

package io.airbyte.integrations.debezium.internals;

import io.airbyte.db.DataTypeUtils;
import io.debezium.spi.converter.CustomConverter;
import io.debezium.spi.converter.RelationalColumn;
import java.sql.Timestamp;
import java.time.Duration;
import java.time.LocalDate;
import java.time.LocalDateTime;
import java.time.format.DateTimeParseException;
import java.util.Arrays;
import java.util.Properties;
import org.apache.kafka.connect.data.SchemaBuilder;
import org.slf4j.Logger;
Expand All @@ -41,22 +45,48 @@
* https://debezium.io/documentation/reference/1.4/development/converters.html This is built from
* reference with {@link io.debezium.connector.mysql.converters.TinyIntOneToBooleanConverter} If you
* rename this class then remember to rename the datetime.type property value in
* {@link io.airbyte.integrations.source.mysql.MySqlCdcProperties#getDebeziumProperties()} (If you
* {@link io.airbyte-integrations.source.mysql.MySqlCdcProperties#getDebeziumProperties()} (If you
* don't rename, a test would still fail but it might be tricky to figure out where to change the
* property name)
*/
public class MySQLDateTimeConverter implements CustomConverter<SchemaBuilder, RelationalColumn> {
public class MySQLConverter implements CustomConverter<SchemaBuilder, RelationalColumn> {

private static final Logger LOGGER = LoggerFactory.getLogger(MySQLDateTimeConverter.class);
private static final Logger LOGGER = LoggerFactory.getLogger(MySQLConverter.class);

private final String[] DATE_TYPES = {"DATE", "DATETIME", "TIME"};
private final String[] TEXT_TYPES = {"VARCHAR", "VARBINARY", "BLOB", "TEXT", "LONGTEXT", "TINYTEXT", "MEDIUMTEXT"};

@Override
public void configure(Properties props) {}

@Override
public void converterFor(RelationalColumn field, ConverterRegistration<SchemaBuilder> registration) {
if (!"DATETIME".equalsIgnoreCase(field.typeName())) {
return;
if (Arrays.stream(DATE_TYPES).anyMatch(s -> s.equalsIgnoreCase(field.typeName()))) {
registerDate(field, registration);
} else if (Arrays.stream(TEXT_TYPES).anyMatch(s -> s.equalsIgnoreCase(field.typeName()))) {
registerText(field, registration);
}
}

private void registerText(RelationalColumn field, ConverterRegistration<SchemaBuilder> registration) {
registration.register(SchemaBuilder.string(), x -> {
if (x == null) {
if (field.isOptional()) {
return null;
} else if (field.hasDefaultValue()) {
return field.defaultValue();
}
return null;
}

if (x instanceof byte[]) {
return new String((byte[]) x);
} else
return x.toString();
});
}

private void registerDate(RelationalColumn field, ConverterRegistration<SchemaBuilder> registration) {
registration.register(SchemaBuilder.string(), x -> {
if (x == null) {
if (field.isOptional()) {
Expand All @@ -75,11 +105,15 @@ public void converterFor(RelationalColumn field, ConverterRegistration<SchemaBui
* Secondly, we use LocalDateTime to handle this cause it represents DATETIME datatype in JAVA
*/
if (x instanceof LocalDateTime) {
return x.toString();
return DataTypeUtils.toISO8601String((LocalDateTime) x);
} else if (x instanceof LocalDate) {
return DataTypeUtils.toISO8601String((LocalDate) x);
} else if (x instanceof Duration) {
return DataTypeUtils.toISO8601String((Duration) x);
} else if (x instanceof Timestamp) {
return ((Timestamp) x).toLocalDateTime().toString();
return DataTypeUtils.toISO8601String(((Timestamp) x).toLocalDateTime());
} else if (x instanceof Number) {
return new Timestamp(((Number) x).longValue()).toLocalDateTime().toString();
return DataTypeUtils.toISO8601String(new Timestamp(((Number) x).longValue()).toLocalDateTime());
} else if (x instanceof String) {
try {
return LocalDateTime.parse((String) x).toString();
Expand All @@ -88,7 +122,7 @@ public void converterFor(RelationalColumn field, ConverterRegistration<SchemaBui
return x.toString();
}
}
LOGGER.warn("Cannot convert value '{}' to LocalDateTime", x);
LOGGER.warn("Uncovered date class type '{}'. Use default converter", x.getClass().getName());
return x.toString();
});
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,9 @@
import io.airbyte.protocol.models.Field;
import io.airbyte.protocol.models.JsonSchemaPrimitive;
import io.airbyte.protocol.models.SyncMode;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
Expand Down Expand Up @@ -119,28 +121,30 @@ public void testDataTypes() throws Exception {
expectedValues.put(testDataHolder.getNameWithTestPrefix(), testDataHolder.getExpectedValues());
});

recordMessages.forEach(msg -> {
for (AirbyteMessage msg : recordMessages) {
String streamName = msg.getRecord().getStream();
List<String> expectedValuesForStream = expectedValues.get(streamName);
if (expectedValuesForStream != null) {
var a = msg.getRecord().getData().get(getTestColumnName());
String value = getValueFromJsonNode(msg.getRecord().getData().get(getTestColumnName()));
assertTrue(expectedValuesForStream.contains(value),
"Returned value '" + value + "' by streamer " + streamName + " should be in the expected list: " + expectedValuesForStream);
"Returned value '" + value + "' by streamer " + streamName
+ " should be in the expected list: " + expectedValuesForStream);
expectedValuesForStream.remove(value);
}
});
}

expectedValues.forEach((streamName, values) -> assertTrue(values.isEmpty(),
"The streamer " + streamName + " should return all expected values. Missing values: " + values));
}

protected String getValueFromJsonNode(JsonNode jsonNode) {
protected String getValueFromJsonNode(JsonNode jsonNode) throws IOException {
if (jsonNode != null) {
if (jsonNode.isArray()) {
return jsonNode.toString();
}

String value = jsonNode.asText();
String value = (jsonNode.isBinary() ? Arrays.toString(jsonNode.binaryValue()) : jsonNode.asText());
value = (value != null && value.equals("null") ? null : value);
return value;
}
Expand Down Expand Up @@ -176,8 +180,6 @@ private void setupDatabaseInternal() throws Exception {
* @return configured catalog
*/
private ConfiguredAirbyteCatalog getConfiguredCatalog() throws Exception {
final JsonNode config = getConfig();

return new ConfiguredAirbyteCatalog().withStreams(
testDataHolders
.stream()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -274,7 +274,8 @@ public JdbcDatabase createDatabase(JsonNode config) throws SQLException {
jdbcConfig.get("jdbc_url").asText(),
driverClass,
jdbcStreamingQueryConfiguration,
jdbcConfig.has("connection_properties") ? jdbcConfig.get("connection_properties").asText() : null);
jdbcConfig.has("connection_properties") ? jdbcConfig.get("connection_properties").asText() : null,
getSourceOperations());

quoteString = (quoteString == null ? database.getMetaData().getIdentifierQuoteString() : quoteString);

Expand Down
2 changes: 1 addition & 1 deletion airbyte-integrations/connectors/source-mysql/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,6 @@ COPY build/distributions/${APPLICATION}*.tar ${APPLICATION}.tar

RUN tar xf ${APPLICATION}.tar --strip-components=1

LABEL io.airbyte.version=0.4.4
LABEL io.airbyte.version=0.4.5

LABEL io.airbyte.name=airbyte/source-mysql
Original file line number Diff line number Diff line change
Expand Up @@ -38,11 +38,11 @@ static Properties getDebeziumProperties() {
// https://debezium.io/documentation/reference/1.4/development/converters.html
/**
* {@link io.debezium.connector.mysql.converters.TinyIntOneToBooleanConverter}
* {@link io.airbyte.integrations.debezium.internals.MySQLDateTimeConverter}
* {@link MySQLConverter}
*/
props.setProperty("converters", "boolean, datetime");
props.setProperty("boolean.type", "io.debezium.connector.mysql.converters.TinyIntOneToBooleanConverter");
props.setProperty("datetime.type", "io.airbyte.integrations.debezium.internals.MySQLDateTimeConverter");
props.setProperty("datetime.type", "io.airbyte.integrations.debezium.internals.MySQLConverter");

// snapshot config
// https://debezium.io/documentation/reference/1.4/connectors/mysql.html#mysql-property-snapshot-mode
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
import io.airbyte.commons.json.Jsons;
import io.airbyte.commons.util.AutoCloseableIterator;
import io.airbyte.db.jdbc.JdbcDatabase;
import io.airbyte.db.jdbc.JdbcSourceOperations;
import io.airbyte.integrations.base.IntegrationRunner;
import io.airbyte.integrations.base.Source;
import io.airbyte.integrations.debezium.AirbyteDebeziumHandler;
Expand Down Expand Up @@ -69,7 +70,7 @@ public class MySqlSource extends AbstractJdbcSource implements Source {
public static final String CDC_LOG_POS = "_ab_cdc_log_pos";

public MySqlSource() {
super(DRIVER_CLASS, new MySqlJdbcStreamingQueryConfiguration());
super(DRIVER_CLASS, new MySqlJdbcStreamingQueryConfiguration(), new MySqlSourceOperations());
}

private static AirbyteStream removeIncrementalWithoutPk(AirbyteStream stream) {
Expand Down Expand Up @@ -256,4 +257,9 @@ public enum ReplicationMethod {
CDC
}

@Override
protected JdbcSourceOperations getSourceOperations() {
return new MySqlSourceOperations();
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
/*
* MIT License
*
* Copyright (c) 2020 Airbyte
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/

package io.airbyte.integrations.source.mysql;

import com.fasterxml.jackson.databind.node.ObjectNode;
import io.airbyte.db.jdbc.JdbcSourceOperations;
import java.sql.ResultSet;
import java.sql.SQLException;

public class MySqlSourceOperations extends JdbcSourceOperations {

@Override
protected void putBoolean(ObjectNode node, String columnName, ResultSet resultSet, int index)
throws SQLException {
node.put(columnName, resultSet.getInt(index) == 1);
}

}
Loading