diff --git a/README.md b/README.md index c0f895d92e..9f208a2b0b 100644 --- a/README.md +++ b/README.md @@ -110,6 +110,7 @@ Samples are in the [`samples/`](https://github.com/googleapis/java-bigquerystora | Sample | Source Code | Try it | | --------------------------- | --------------------------------- | ------ | +| Json Writer Stream Cdc | [source code](https://github.com/googleapis/java-bigquerystorage/blob/main/samples/snippets/src/main/java/com/example/bigquerystorage/JsonWriterStreamCdc.java) | [![Open in Cloud Shell][shell_img]](https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/googleapis/java-bigquerystorage&page=editor&open_in_editor=samples/snippets/src/main/java/com/example/bigquerystorage/JsonWriterStreamCdc.java) | | Parallel Write Committed Stream | [source code](https://github.com/googleapis/java-bigquerystorage/blob/main/samples/snippets/src/main/java/com/example/bigquerystorage/ParallelWriteCommittedStream.java) | [![Open in Cloud Shell][shell_img]](https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/googleapis/java-bigquerystorage&page=editor&open_in_editor=samples/snippets/src/main/java/com/example/bigquerystorage/ParallelWriteCommittedStream.java) | | Storage Arrow Sample | [source code](https://github.com/googleapis/java-bigquerystorage/blob/main/samples/snippets/src/main/java/com/example/bigquerystorage/StorageArrowSample.java) | [![Open in Cloud Shell][shell_img]](https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/googleapis/java-bigquerystorage&page=editor&open_in_editor=samples/snippets/src/main/java/com/example/bigquerystorage/StorageArrowSample.java) | | Storage Sample | [source code](https://github.com/googleapis/java-bigquerystorage/blob/main/samples/snippets/src/main/java/com/example/bigquerystorage/StorageSample.java) | [![Open in Cloud Shell][shell_img]](https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/googleapis/java-bigquerystorage&page=editor&open_in_editor=samples/snippets/src/main/java/com/example/bigquerystorage/StorageSample.java) | diff --git a/samples/snippets/src/main/java/com/example/bigquerystorage/JsonWriterStreamCdc.java b/samples/snippets/src/main/java/com/example/bigquerystorage/JsonWriterStreamCdc.java new file mode 100644 index 0000000000..fef48095a2 --- /dev/null +++ b/samples/snippets/src/main/java/com/example/bigquerystorage/JsonWriterStreamCdc.java @@ -0,0 +1,205 @@ +/* + * Copyright 2023 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.example.bigquerystorage; + +import com.google.api.core.ApiFuture; +import com.google.api.core.ApiFutureCallback; +import com.google.api.core.ApiFutures; +import com.google.cloud.bigquery.BigQuery; +import com.google.cloud.bigquery.BigQueryException; +import com.google.cloud.bigquery.BigQueryOptions; +import com.google.cloud.bigquery.QueryJobConfiguration; +import com.google.cloud.bigquery.storage.v1.AppendRowsResponse; +import com.google.cloud.bigquery.storage.v1.JsonStreamWriter; +import com.google.cloud.bigquery.storage.v1.TableFieldSchema; +import com.google.cloud.bigquery.storage.v1.TableFieldSchema.Mode; +import com.google.cloud.bigquery.storage.v1.TableName; +import com.google.cloud.bigquery.storage.v1.TableSchema; +import com.google.common.util.concurrent.MoreExecutors; +import com.google.protobuf.Descriptors.DescriptorValidationException; +import java.io.BufferedReader; +import java.io.FileNotFoundException; +import java.io.FileReader; +import java.io.IOException; +import org.json.JSONArray; +import org.json.JSONObject; + +public class JsonWriterStreamCdc { + + private static final String CHANGE_TYPE_PSEUDO_COLUMN = "_change_type"; + + private static final String CREATE_TABLE_QUERY = + "CREATE TABLE `%s.%s` (\n" + + " Customer_ID INT64 PRIMARY KEY NOT ENFORCED,\n" + + " Customer_Enrollment_Date DATE,\n" + + " Customer_Name STRING,\n" + + " Customer_Address STRING,\n" + + " Customer_Tier STRING,\n" + + " Active_Subscriptions JSON)\n" + + "OPTIONS(max_staleness = INTERVAL 15 MINUTE);"; + + private static final String ALTER_TABLE_QUERY = + "ALTER TABLE `%s.%s`\n" + "SET OPTIONS (\n" + " max_staleness = INTERVAL 0 MINUTE);\n"; + + public static void main(String[] args) throws Exception { + // This sample follows the BigQuery change data capture (CDC) blog post that can be found at: + // https://cloud.google.com/blog/products/data-analytics/bigquery-gains-change-data-capture-functionality + if (args.length != 5) { + System.out.println( + "Arguments: project, dataset, table, new_customers_data_file, " + + "modified_customers_data_file"); + return; + } + + final String projectId = args[0]; + final String datasetName = args[1]; + final String tableName = args[2]; + final String newCustomersDataFile = args[3]; + final String modifiedCustomersDataFile = args[4]; + + // Creates a destination table with (max_staleness = INTERVAL 15 MINUTE). + createDestinationTable(datasetName, tableName); + + // Write new customer records to the destination table using UPSERT. + JSONArray newCustomersRecords = getRecordsFromDataFile(newCustomersDataFile); + writeToDefaultStream(projectId, datasetName, tableName, newCustomersRecords); + + // Alter the destination table so that (max_staleness = INTERVAL 0 MINUTE). + alterDestinationTable(datasetName, tableName); + + // Modify the customer records in the destination table using UPSERT. + JSONArray modifiedCustomersRecords = getRecordsFromDataFile(modifiedCustomersDataFile); + writeToDefaultStream(projectId, datasetName, tableName, modifiedCustomersRecords); + } + + public static void createDestinationTable(String datasetName, String tableName) { + query(String.format(CREATE_TABLE_QUERY, datasetName, tableName)); + } + + public static void alterDestinationTable(String datasetName, String tableName) { + query(String.format(ALTER_TABLE_QUERY, datasetName, tableName)); + } + + private static void query(String query) { + BigQuery bigquery = BigQueryOptions.getDefaultInstance().getService(); + QueryJobConfiguration queryConfig = QueryJobConfiguration.newBuilder(query).build(); + try { + bigquery.query(queryConfig); + } catch (BigQueryException | InterruptedException e) { + System.out.println("Query did not run \n" + e.toString()); + } + } + + // writeToDefaultStream: Writes records from the source file to the destination table. + public static void writeToDefaultStream( + String projectId, String datasetName, String tableName, JSONArray data) + throws DescriptorValidationException, InterruptedException, IOException { + // To use the UPSERT functionality, the table schema needs to be padded with an additional + // column "_change_type". + TableSchema tableSchema = + TableSchema.newBuilder() + .addFields( + TableFieldSchema.newBuilder() + .setName("Customer_ID") + .setType(TableFieldSchema.Type.INT64) + .setMode(Mode.NULLABLE) + .build()) + .addFields( + TableFieldSchema.newBuilder() + .setName("Customer_Enrollment_Date") + .setType(TableFieldSchema.Type.DATE) + .setMode(Mode.NULLABLE) + .build()) + .addFields( + TableFieldSchema.newBuilder() + .setName("Customer_Name") + .setType(TableFieldSchema.Type.STRING) + .setMode(Mode.NULLABLE) + .build()) + .addFields( + TableFieldSchema.newBuilder() + .setName("Customer_Address") + .setType(TableFieldSchema.Type.STRING) + .setMode(Mode.NULLABLE) + .build()) + .addFields( + TableFieldSchema.newBuilder() + .setName("Customer_Tier") + .setType(TableFieldSchema.Type.STRING) + .setMode(Mode.NULLABLE) + .build()) + .addFields( + TableFieldSchema.newBuilder() + .setName("Active_Subscriptions") + .setType(TableFieldSchema.Type.JSON) + .setMode(Mode.NULLABLE) + .build()) + .addFields( + TableFieldSchema.newBuilder() + .setName(CHANGE_TYPE_PSEUDO_COLUMN) + .setType(TableFieldSchema.Type.STRING) + .setMode(Mode.NULLABLE) + .build()) + .build(); + + // Use the JSON stream writer to send records in JSON format. + TableName parentTable = TableName.of(projectId, datasetName, tableName); + try (JsonStreamWriter writer = + JsonStreamWriter.newBuilder(parentTable.toString(), tableSchema).build()) { + + ApiFuture future = writer.append(data); + // The append method is asynchronous. Rather than waiting for the method to complete, + // which can hurt performance, register a completion callback and continue streaming. + ApiFutures.addCallback(future, new AppendCompleteCallback(), MoreExecutors.directExecutor()); + } + } + + public static JSONArray getRecordsFromDataFile(String dataFile) + throws FileNotFoundException, IOException { + JSONArray result = new JSONArray(); + + BufferedReader reader = new BufferedReader(new FileReader(dataFile)); + String line = reader.readLine(); + while (line != null) { + JSONObject record = new JSONObject(line); + result.put(record); + line = reader.readLine(); + } + + return result; + } + + static class AppendCompleteCallback implements ApiFutureCallback { + private static final Object lock = new Object(); + private static int batchCount = 0; + + public void onSuccess(AppendRowsResponse response) { + synchronized (lock) { + if (response.hasError()) { + System.out.format("Error: %s\n", response.getError()); + } else { + ++batchCount; + System.out.format("Wrote batch %d\n", batchCount); + } + } + } + + public void onFailure(Throwable throwable) { + System.out.format("Error: %s\n", throwable.toString()); + } + } +} diff --git a/samples/snippets/src/test/java/com/example/bigquerystorage/JsonWriterStreamCdcIT.java b/samples/snippets/src/test/java/com/example/bigquerystorage/JsonWriterStreamCdcIT.java new file mode 100644 index 0000000000..e7b64cc7bc --- /dev/null +++ b/samples/snippets/src/test/java/com/example/bigquerystorage/JsonWriterStreamCdcIT.java @@ -0,0 +1,89 @@ +/* + * Copyright 2023 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.example.bigquerystorage; + +import static com.google.common.truth.Truth.assertThat; + +import com.google.cloud.bigquery.BigQuery; +import com.google.cloud.bigquery.BigQuery.DatasetDeleteOption; +import com.google.cloud.bigquery.BigQueryOptions; +import com.google.cloud.bigquery.DatasetId; +import com.google.cloud.bigquery.DatasetInfo; +import java.io.ByteArrayOutputStream; +import java.io.PrintStream; +import java.nio.file.FileSystems; +import java.nio.file.Path; +import java.util.UUID; +import org.junit.After; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.JUnit4; + +@RunWith(JUnit4.class) +public class JsonWriterStreamCdcIT { + + private static final String GOOGLE_CLOUD_PROJECT = System.getenv("GOOGLE_CLOUD_PROJECT"); + + private ByteArrayOutputStream bout; + private PrintStream out; + private BigQuery bigquery; + private String datasetName; + + @BeforeClass + public static void beforeClass() {} + + @Before + public void setUp() { + bout = new ByteArrayOutputStream(); + out = new PrintStream(bout); + System.setOut(out); + + bigquery = BigQueryOptions.getDefaultInstance().getService(); + + // Create a new dataset for each test. + datasetName = "JAVA_WRITER_STREAM_CDC_TEST" + UUID.randomUUID().toString().substring(0, 8); + bigquery.create(DatasetInfo.newBuilder(datasetName).build()); + } + + @Test + public void testJsonWriterStreamCdc() throws Exception { + Path newCustomersDataFilePath = + FileSystems.getDefault().getPath("../snippets/src/test/resources", "NewCustomers.json"); + Path modifiedCustomersDataFilePath = + FileSystems.getDefault() + .getPath("../snippets/src/test/resources", "ModifiedCustomers.json"); + String[] args = { + GOOGLE_CLOUD_PROJECT, + datasetName, + "customers", + newCustomersDataFilePath.toAbsolutePath().toString(), + modifiedCustomersDataFilePath.toAbsolutePath().toString() + }; + + JsonWriterStreamCdc.main(args); + assertThat(bout.toString()).contains("Wrote batch"); + } + + @After + public void tearDown() { + bigquery.delete( + DatasetId.of(GOOGLE_CLOUD_PROJECT, datasetName), DatasetDeleteOption.deleteContents()); + System.setOut(null); + } +} diff --git a/samples/snippets/src/test/resources/ModifiedCustomers.json b/samples/snippets/src/test/resources/ModifiedCustomers.json new file mode 100644 index 0000000000..a415545483 --- /dev/null +++ b/samples/snippets/src/test/resources/ModifiedCustomers.json @@ -0,0 +1,5 @@ +{"Customer_ID":1,"Customer_Enrollment_Date":19301,"Customer_Name":"Nick_2.0","Customer_Address":"1600AmphitheatrePkwy,MountainView,CA","Customer_Tier":"Platinum","Active_Subscriptions":"{\"Internet_Subscription\":\"Paid\",\"Music_Subscription\":\"Paid\"}","_CHANGE_TYPE":"UPSERT"} +{"Customer_ID":2,"Customer_Enrollment_Date":19318,"Customer_Name":"Heather","Customer_Address":"285FultonSt,NewYork,NY","Customer_Tier":"Commercial","Active_Subscriptions":"{\"TV_Subscription\":\"Free\"}","_CHANGE_TYPE":"UPSERT"} +{"Customer_ID":7,"_CHANGE_TYPE":"DELETE"} +{"Customer_ID":8,"_CHANGE_TYPE":"DELETE"} +{"Customer_ID":10,"Customer_Enrollment_Date":19410,"Customer_Name":"Melody","Customer_Address":"345SpearSt,SanFrancisco,CA","Customer_Tier":"Commercial","Active_Subscriptions":"{\"Music_Subscription\":\"Free\"}","_CHANGE_TYPE":"UPSERT"} \ No newline at end of file diff --git a/samples/snippets/src/test/resources/NewCustomers.json b/samples/snippets/src/test/resources/NewCustomers.json new file mode 100644 index 0000000000..50bfe34cd3 --- /dev/null +++ b/samples/snippets/src/test/resources/NewCustomers.json @@ -0,0 +1,9 @@ +{"Customer_ID":1,"Customer_Enrollment_Date":19301,"Customer_Name":"Nick","Customer_Address":"1600AmphitheatrePkwy,MountainView,CA","Customer_Tier":"Commercial","Active_Subscriptions":"{\"Internet_Subscription\":\"Trial\",\"Music_Subscription\":\"Free\"}","_CHANGE_TYPE":"UPSERT"} +{"Customer_ID":2,"Customer_Enrollment_Date":19318,"Customer_Name":"Heather","Customer_Address":"350FifthAvenue,NewYork,NY","Customer_Tier":"Commercial","Active_Subscriptions":"{}","_CHANGE_TYPE":"UPSERT"} +{"Customer_ID":3,"Customer_Enrollment_Date":19250,"Customer_Name":"Lyle","Customer_Address":"10DowningStreet,London,England","Customer_Tier":"Enterprise","Active_Subscriptions":"{\"Internet_Subscription\":\"Paid\",\"Music_Subscription\":\"Paid\"}","_CHANGE_TYPE":"UPSERT"} +{"Customer_ID":4,"Customer_Enrollment_Date":19140,"Customer_Name":"Heidi","Customer_Address":"4059MtLeeDr.,Hollywood,CA","Customer_Tier":"Commercial","Active_Subscriptions":"{\"TV_Subscription\":\"Free\"}","_CHANGE_TYPE":"UPSERT"} +{"Customer_ID":5,"Customer_Enrollment_Date":19299,"Customer_Name":"Paul","Customer_Address":"221BBakerSt,London,England","Customer_Tier":"Commercial","Active_Subscriptions":"{\"Music_Subscription\":\"Free\"}","_CHANGE_TYPE":"UPSERT"} +{"Customer_ID":6,"Customer_Enrollment_Date":19329,"Customer_Name":"Dylan","Customer_Address":"1DrCarltonBGoodlettPl,SanFrancisco,CA","Customer_Tier":"Commercial","Active_Subscriptions":"{\"TV_Subscription\":\"Trial\"}","_CHANGE_TYPE":"UPSERT"} +{"Customer_ID":7,"Customer_Enrollment_Date":19400,"Customer_Name":"Monica","Customer_Address":"PiazzadelColosseo,1,00184RomaRM,Italy","Customer_Tier":"Commercial","Active_Subscriptions":"{\"Internet_Subscription\":\"Paid\"}","_CHANGE_TYPE":"UPSERT"} +{"Customer_ID":8,"Customer_Enrollment_Date":19377,"Customer_Name":"Katie","Customer_Address":"11WallStreet,NewYork,NY","Customer_Tier":"Enterprise","Active_Subscriptions":"{\"Music_Subscription\":\"Paid\"}","_CHANGE_TYPE":"UPSERT"} +{"Customer_ID":9,"Customer_Enrollment_Date":19410,"Customer_Name":"Jeremy","Customer_Address":"1600PennsylvaniaAvenue,WashingtonDC","Customer_Tier":"Enterprise","Active_Subscriptions":"{\"Internet_Subscription\":\"Paid\",\"TV_Subscription\":\"Paid\",\"Music_Subscription\":\"Trial\"}","_CHANGE_TYPE":"UPSERT"} \ No newline at end of file