From 54c4ea3d7c4925c6520f2935d51a4fabb86d0741 Mon Sep 17 00:00:00 2001 From: PhongChuong Date: Thu, 27 Jun 2024 09:51:19 -0400 Subject: [PATCH 1/4] feat: add additional parameters to CsvOptions and ParquetOptions --- .../com/google/cloud/bigquery/CsvOptions.java | 29 +++++++++++++- .../google/cloud/bigquery/ParquetOptions.java | 38 +++++++++++++++++-- .../google/cloud/bigquery/CsvOptionsTest.java | 4 ++ .../cloud/bigquery/ParquetOptionsTest.java | 14 ++++++- 4 files changed, 79 insertions(+), 6 deletions(-) diff --git a/google-cloud-bigquery/src/main/java/com/google/cloud/bigquery/CsvOptions.java b/google-cloud-bigquery/src/main/java/com/google/cloud/bigquery/CsvOptions.java index cbcce2173..eaf4d5023 100644 --- a/google-cloud-bigquery/src/main/java/com/google/cloud/bigquery/CsvOptions.java +++ b/google-cloud-bigquery/src/main/java/com/google/cloud/bigquery/CsvOptions.java @@ -26,12 +26,13 @@ */ public final class CsvOptions extends FormatOptions { - private static final long serialVersionUID = 2193570529308612708L; + private static final long serialVersionUID = 2193570529308612709L; private final Boolean allowJaggedRows; private final Boolean allowQuotedNewLines; private final String encoding; private final String fieldDelimiter; + private final String nullMarker; private final String quote; private final Long skipLeadingRows; private final Boolean preserveAsciiControlCharacters; @@ -42,6 +43,7 @@ public static final class Builder { private Boolean allowQuotedNewLines; private String encoding; private String fieldDelimiter; + private String nullMarker; private String quote; private Long skipLeadingRows; private Boolean preserveAsciiControlCharacters; @@ -53,6 +55,7 @@ private Builder(CsvOptions csvOptions) { this.allowQuotedNewLines = csvOptions.allowQuotedNewLines; this.encoding = csvOptions.encoding; this.fieldDelimiter = csvOptions.fieldDelimiter; + this.nullMarker = csvOptions.getNullMarker(); this.quote = csvOptions.quote; this.skipLeadingRows = csvOptions.skipLeadingRows; this.preserveAsciiControlCharacters = csvOptions.preserveAsciiControlCharacters; @@ -110,6 +113,18 @@ public Builder setFieldDelimiter(String fieldDelimiter) { return this; } + /** + * [Optional] Specifies a string that represents a null value in a CSV file. For example, if you + * specify \"\\N\", BigQuery interprets \"\\N\" as a null value when querying a CSV file. The + * default value is the empty string. If you set this property to a custom value, BigQuery + * throws an error if an empty string is present for all data types except for STRING and BYTE. + * For STRING and BYTE columns, BigQuery interprets the empty string as an empty value. + */ + public Builder setNullMarker(String nullMarker) { + this.nullMarker = nullMarker; + return this; + } + /** * Sets the value that is used to quote data sections in a CSV file. BigQuery converts the * string to ISO-8859-1 encoding, and then uses the first byte of the encoded string to split @@ -154,6 +169,7 @@ private CsvOptions(Builder builder) { this.allowQuotedNewLines = builder.allowQuotedNewLines; this.encoding = builder.encoding; this.fieldDelimiter = builder.fieldDelimiter; + this.nullMarker = builder.nullMarker; this.quote = builder.quote; this.skipLeadingRows = builder.skipLeadingRows; this.preserveAsciiControlCharacters = builder.preserveAsciiControlCharacters; @@ -192,6 +208,11 @@ public String getFieldDelimiter() { return fieldDelimiter; } + /** Returns the string that represents a null value in a CSV file. */ + public String getNullMarker() { + return nullMarker; + } + /** Returns the value that is used to quote data sections in a CSV file. */ public String getQuote() { return quote; @@ -226,6 +247,7 @@ public String toString() { .add("allowQuotedNewLines", allowQuotedNewLines) .add("encoding", encoding) .add("fieldDelimiter", fieldDelimiter) + .add("nullMarker", nullMarker) .add("quote", quote) .add("skipLeadingRows", skipLeadingRows) .add("preserveAsciiControlCharacters", preserveAsciiControlCharacters) @@ -240,6 +262,7 @@ public int hashCode() { allowQuotedNewLines, encoding, fieldDelimiter, + nullMarker, quote, skipLeadingRows, preserveAsciiControlCharacters); @@ -258,6 +281,7 @@ com.google.api.services.bigquery.model.CsvOptions toPb() { csvOptions.setAllowQuotedNewlines(allowQuotedNewLines); csvOptions.setEncoding(encoding); csvOptions.setFieldDelimiter(fieldDelimiter); + csvOptions.setNullMarker(nullMarker); csvOptions.setQuote(quote); csvOptions.setSkipLeadingRows(skipLeadingRows); csvOptions.setPreserveAsciiControlCharacters(preserveAsciiControlCharacters); @@ -283,6 +307,9 @@ static CsvOptions fromPb(com.google.api.services.bigquery.model.CsvOptions csvOp if (csvOptions.getFieldDelimiter() != null) { builder.setFieldDelimiter(csvOptions.getFieldDelimiter()); } + if (csvOptions.getNullMarker() != null) { + builder.setNullMarker(csvOptions.getNullMarker()); + } if (csvOptions.getQuote() != null) { builder.setQuote(csvOptions.getQuote()); } diff --git a/google-cloud-bigquery/src/main/java/com/google/cloud/bigquery/ParquetOptions.java b/google-cloud-bigquery/src/main/java/com/google/cloud/bigquery/ParquetOptions.java index 174da41d8..840c60297 100644 --- a/google-cloud-bigquery/src/main/java/com/google/cloud/bigquery/ParquetOptions.java +++ b/google-cloud-bigquery/src/main/java/com/google/cloud/bigquery/ParquetOptions.java @@ -21,10 +21,11 @@ public class ParquetOptions extends FormatOptions { - private static final long serialVersionUID = 1992L; + private static final long serialVersionUID = 1993L; private final Boolean enableListInference; private final Boolean enumAsString; + private final String mapTargetType; public Boolean getEnableListInference() { return enableListInference; @@ -34,16 +35,24 @@ public Boolean getEnumAsString() { return enumAsString; } + + /** Returns how the Parquet map is represented. */ + public String getMapTargetType() { + return mapTargetType; + } + /** A builder for {@code ParquetOptions} objects. */ public static final class Builder { private Boolean enableListInference; private Boolean enumAsString; + private String mapTargetType; private Builder() {} private Builder(ParquetOptions parquetOptions) { this.enableListInference = parquetOptions.enableListInference; this.enumAsString = parquetOptions.enumAsString; + this.mapTargetType = parquetOptions.mapTargetType; } public Builder setEnableListInference(Boolean enableListInference) { @@ -56,6 +65,19 @@ public Builder setEnumAsString(Boolean enumAsString) { return this; } + /** + * [Optional] Indicates how to represent a Parquet map if present. + * + *

When MapTargetType is MAP_TARGET_TYPE_UNSPECIFIED, the map will have the following schema: + * struct map_field_name { repeated struct key_value { key value } }. When it is + * ARRAY_OF_STRUCT, the map will have the following schema: repeated struct map_field_name { key + * value }. + */ + public Builder setMapTargetType(String mapTargetType) { + this.mapTargetType = mapTargetType; + return this; + } + public ParquetOptions build() { return new ParquetOptions(this); } @@ -69,6 +91,7 @@ public Builder toBuilder() { super(FormatOptions.PARQUET); enableListInference = builder.enableListInference; enumAsString = builder.enumAsString; + mapTargetType = builder.mapTargetType; } @Override @@ -76,12 +99,13 @@ public String toString() { return MoreObjects.toStringHelper(this) .add("enableListInference", enableListInference) .add("enumAsString", enumAsString) + .add("mapTargetType", mapTargetType) .toString(); } @Override public final int hashCode() { - return Objects.hash(enableListInference, enumAsString); + return Objects.hash(enableListInference, enumAsString, mapTargetType); } @Override @@ -93,7 +117,9 @@ public final boolean equals(Object obj) { return false; } ParquetOptions other = (ParquetOptions) obj; - return enableListInference == other.enableListInference && enumAsString == other.enumAsString; + return enableListInference == other.enableListInference + && enumAsString == other.enumAsString + && Objects.equals(mapTargetType, ((ParquetOptions) obj).getMapTargetType()); } /** Returns a builder for a {@link ParquetOptions} object. */ @@ -110,6 +136,9 @@ static ParquetOptions fromPb( if (parquetOptions.getEnumAsString() != null) { builder.setEnumAsString(parquetOptions.getEnumAsString()); } + if (parquetOptions.getMapTargetType() != null) { + builder.setMapTargetType(parquetOptions.getMapTargetType()); + } return builder.build(); } @@ -122,6 +151,9 @@ com.google.api.services.bigquery.model.ParquetOptions toPb() { if (enumAsString != null) { parquetOptions.setEnumAsString(enumAsString); } + if (mapTargetType != null) { + parquetOptions.setMapTargetType(mapTargetType); + } return parquetOptions; } } diff --git a/google-cloud-bigquery/src/test/java/com/google/cloud/bigquery/CsvOptionsTest.java b/google-cloud-bigquery/src/test/java/com/google/cloud/bigquery/CsvOptionsTest.java index fa05cddc5..fb0293a97 100644 --- a/google-cloud-bigquery/src/test/java/com/google/cloud/bigquery/CsvOptionsTest.java +++ b/google-cloud-bigquery/src/test/java/com/google/cloud/bigquery/CsvOptionsTest.java @@ -28,6 +28,7 @@ public class CsvOptionsTest { private static final Boolean ALLOW_QUOTED_NEWLINE = true; private static final Charset ENCODING = StandardCharsets.UTF_8; private static final String FIELD_DELIMITER = ","; + private static final String NULL_MARKER = "\\N"; private static final String QUOTE = "\""; private static final long SKIP_LEADING_ROWS = 42L; @@ -38,6 +39,7 @@ public class CsvOptionsTest { .setAllowQuotedNewLines(ALLOW_QUOTED_NEWLINE) .setEncoding(ENCODING) .setFieldDelimiter(FIELD_DELIMITER) + .setNullMarker(NULL_MARKER) .setQuote(QUOTE) .setSkipLeadingRows(SKIP_LEADING_ROWS) .setPreserveAsciiControlCharacters(PRESERVE_ASCII_CONTROL_CHARACTERS) @@ -65,6 +67,7 @@ public void testBuilder() { assertEquals(ALLOW_QUOTED_NEWLINE, CSV_OPTIONS.allowQuotedNewLines()); assertEquals(ENCODING.name(), CSV_OPTIONS.getEncoding()); assertEquals(FIELD_DELIMITER, CSV_OPTIONS.getFieldDelimiter()); + assertEquals(NULL_MARKER, CSV_OPTIONS.getNullMarker()); assertEquals(QUOTE, CSV_OPTIONS.getQuote()); assertEquals(SKIP_LEADING_ROWS, (long) CSV_OPTIONS.getSkipLeadingRows()); assertEquals( @@ -84,6 +87,7 @@ private void compareCsvOptions(CsvOptions expected, CsvOptions value) { assertEquals(expected.allowQuotedNewLines(), value.allowQuotedNewLines()); assertEquals(expected.getEncoding(), value.getEncoding()); assertEquals(expected.getFieldDelimiter(), value.getFieldDelimiter()); + assertEquals(expected.getNullMarker(), value.getNullMarker()); assertEquals(expected.getQuote(), value.getQuote()); assertEquals(expected.getSkipLeadingRows(), value.getSkipLeadingRows()); } diff --git a/google-cloud-bigquery/src/test/java/com/google/cloud/bigquery/ParquetOptionsTest.java b/google-cloud-bigquery/src/test/java/com/google/cloud/bigquery/ParquetOptionsTest.java index 8812b2e27..ca21d805c 100644 --- a/google-cloud-bigquery/src/test/java/com/google/cloud/bigquery/ParquetOptionsTest.java +++ b/google-cloud-bigquery/src/test/java/com/google/cloud/bigquery/ParquetOptionsTest.java @@ -24,14 +24,23 @@ public class ParquetOptionsTest { private static final ParquetOptions OPTIONS = - ParquetOptions.newBuilder().setEnableListInference(true).setEnumAsString(true).build(); + ParquetOptions.newBuilder() + .setEnableListInference(true) + .setEnumAsString(true) + .setMapTargetType("ARRAY_OF_STRUCT") + .build(); @Test public void testToBuilder() { compareParquetOptions(OPTIONS, OPTIONS.toBuilder().build()); ParquetOptions parquetOptions = OPTIONS.toBuilder().setEnableListInference(true).build(); assertEquals(true, parquetOptions.getEnableListInference()); - parquetOptions = parquetOptions.toBuilder().setEnumAsString(true).build(); + parquetOptions = + parquetOptions + .toBuilder() + .setEnumAsString(true) + .setMapTargetType("ARRAY_OF_STRUCT") + .build(); compareParquetOptions(OPTIONS, parquetOptions); } @@ -47,6 +56,7 @@ public void testBuilder() { assertEquals(FormatOptions.PARQUET, OPTIONS.getType()); assertEquals(true, OPTIONS.getEnableListInference()); assertEquals(true, OPTIONS.getEnumAsString()); + assertEquals("ARRAY_OF_STRUCT", OPTIONS.getMapTargetType()); } @Test From 19292cbc6e1876907b20e38242aeb7c2f8cb7e64 Mon Sep 17 00:00:00 2001 From: PhongChuong Date: Thu, 27 Jun 2024 10:01:20 -0400 Subject: [PATCH 2/4] fix lint --- .../src/main/java/com/google/cloud/bigquery/ParquetOptions.java | 1 - 1 file changed, 1 deletion(-) diff --git a/google-cloud-bigquery/src/main/java/com/google/cloud/bigquery/ParquetOptions.java b/google-cloud-bigquery/src/main/java/com/google/cloud/bigquery/ParquetOptions.java index 840c60297..301287603 100644 --- a/google-cloud-bigquery/src/main/java/com/google/cloud/bigquery/ParquetOptions.java +++ b/google-cloud-bigquery/src/main/java/com/google/cloud/bigquery/ParquetOptions.java @@ -35,7 +35,6 @@ public Boolean getEnumAsString() { return enumAsString; } - /** Returns how the Parquet map is represented. */ public String getMapTargetType() { return mapTargetType; From 2d9e8c5e67a52be76d429783cd58a99ac3560f9e Mon Sep 17 00:00:00 2001 From: Owl Bot Date: Thu, 27 Jun 2024 14:52:46 +0000 Subject: [PATCH 3/4] =?UTF-8?q?=F0=9F=A6=89=20Updates=20from=20OwlBot=20po?= =?UTF-8?q?st-processor?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 3a77fbac9..03cd6681d 100644 --- a/README.md +++ b/README.md @@ -60,13 +60,13 @@ implementation 'com.google.cloud:google-cloud-bigquery' If you are using Gradle without BOM, add this to your dependencies: ```Groovy -implementation 'com.google.cloud:google-cloud-bigquery:2.40.3' +implementation 'com.google.cloud:google-cloud-bigquery:2.41.0' ``` If you are using SBT, add this to your dependencies: ```Scala -libraryDependencies += "com.google.cloud" % "google-cloud-bigquery" % "2.40.3" +libraryDependencies += "com.google.cloud" % "google-cloud-bigquery" % "2.41.0" ``` @@ -351,7 +351,7 @@ Java is a registered trademark of Oracle and/or its affiliates. [kokoro-badge-link-5]: http://storage.googleapis.com/cloud-devrel-public/java/badges/java-bigquery/java11.html [stability-image]: https://img.shields.io/badge/stability-stable-green [maven-version-image]: https://img.shields.io/maven-central/v/com.google.cloud/google-cloud-bigquery.svg -[maven-version-link]: https://central.sonatype.com/artifact/com.google.cloud/google-cloud-bigquery/2.40.3 +[maven-version-link]: https://central.sonatype.com/artifact/com.google.cloud/google-cloud-bigquery/2.41.0 [authentication]: https://github.com/googleapis/google-cloud-java#authentication [auth-scopes]: https://developers.google.com/identity/protocols/oauth2/scopes [predefined-iam-roles]: https://cloud.google.com/iam/docs/understanding-roles#predefined_roles From 9fdf1573c0576140b91eed6402113fe04a9e6318 Mon Sep 17 00:00:00 2001 From: PhongChuong Date: Thu, 27 Jun 2024 16:40:48 -0400 Subject: [PATCH 4/4] Addressed review comments --- .../src/main/java/com/google/cloud/bigquery/CsvOptions.java | 2 +- .../main/java/com/google/cloud/bigquery/ParquetOptions.java | 6 ++---- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/google-cloud-bigquery/src/main/java/com/google/cloud/bigquery/CsvOptions.java b/google-cloud-bigquery/src/main/java/com/google/cloud/bigquery/CsvOptions.java index eaf4d5023..b39c82a7e 100644 --- a/google-cloud-bigquery/src/main/java/com/google/cloud/bigquery/CsvOptions.java +++ b/google-cloud-bigquery/src/main/java/com/google/cloud/bigquery/CsvOptions.java @@ -55,7 +55,7 @@ private Builder(CsvOptions csvOptions) { this.allowQuotedNewLines = csvOptions.allowQuotedNewLines; this.encoding = csvOptions.encoding; this.fieldDelimiter = csvOptions.fieldDelimiter; - this.nullMarker = csvOptions.getNullMarker(); + this.nullMarker = csvOptions.nullMarker; this.quote = csvOptions.quote; this.skipLeadingRows = csvOptions.skipLeadingRows; this.preserveAsciiControlCharacters = csvOptions.preserveAsciiControlCharacters; diff --git a/google-cloud-bigquery/src/main/java/com/google/cloud/bigquery/ParquetOptions.java b/google-cloud-bigquery/src/main/java/com/google/cloud/bigquery/ParquetOptions.java index 301287603..b150f3b06 100644 --- a/google-cloud-bigquery/src/main/java/com/google/cloud/bigquery/ParquetOptions.java +++ b/google-cloud-bigquery/src/main/java/com/google/cloud/bigquery/ParquetOptions.java @@ -67,10 +67,8 @@ public Builder setEnumAsString(Boolean enumAsString) { /** * [Optional] Indicates how to represent a Parquet map if present. * - *

When MapTargetType is MAP_TARGET_TYPE_UNSPECIFIED, the map will have the following schema: - * struct map_field_name { repeated struct key_value { key value } }. When it is - * ARRAY_OF_STRUCT, the map will have the following schema: repeated struct map_field_name { key - * value }. + * @see + * MapTargetType */ public Builder setMapTargetType(String mapTargetType) { this.mapTargetType = mapTargetType;