diff --git a/dlp/README.md b/dlp/README.md new file mode 100644 index 00000000000..8715bb5770c --- /dev/null +++ b/dlp/README.md @@ -0,0 +1,115 @@ +# Cloud Data Loss Prevention (DLP) API Samples +The [Data Loss Prevention API](https://cloud.google.com/dlp/docs/) provides programmatic access to +a powerful detection engine for personally identifiable information and other privacy-sensitive data + in unstructured data streams. + +## Setup +- A Google Cloud project with billing enabled +- [Enable](https://console.cloud.google.com/launcher/details/google/dlp.googleapis.com) the DLP API. +- (Local testing)[Create a service account](https://cloud.google.com/docs/authentication/getting-started) +and set the `GOOGLE_APPLICATION_CREDENTIALS` environment variable pointing to the downloaded credentials file. + +## Build +This project uses the [Assembly Plugin](https://maven.apache.org/plugins/maven-assembly-plugin/usage.html) to build an uber jar. +Run: +``` + mvn clean package +``` + +## Retrieve InfoTypes +An [InfoType identifier](https://cloud.google.com/dlp/docs/infotypes-categories) represents an element of sensitive data. + +[Info types](https://cloud.google.com/dlp/docs/infotypes-reference#global) are updated periodically. Use the API to retrieve the most current +info types for a given category. eg. HEALTH or GOVERNMENT. + ``` + java -cp target/dlp-samples-1.0-jar-with-dependencies.jar com.example.dlp.Metadata -category GOVERNMENT + ``` + +## Retrieve Categories +[Categories](https://cloud.google.com/dlp/docs/infotypes-categories) provide a way to easily access a group of related InfoTypes. +``` + java -cp target/dlp-samples-1.0-jar-with-dependencies.jar com.example.dlp.Metadata +``` + +## Inspect data for sensitive elements +Inspect strings, files locally and on Google Cloud Storage and Cloud Datastore kinds with the DLP API. + +Note: image scanning is not currently supported on Google Cloud Storage. +For more information, refer to the [API documentation](https://cloud.google.com/dlp/docs). +Optional flags are explained in [this resource](https://cloud.google.com/dlp/docs/reference/rest/v2beta1/content/inspect#InspectConfig). +``` +Commands: + -s Inspect a string using the Data Loss Prevention API. + -f Inspects a local text, PNG, or JPEG file using the Data Loss Prevention API. + -gcs -bucketName -fileName Inspects a text file stored on Google Cloud Storage using the Data Loss + Prevention API. + -ds -projectId [projectId] -namespace [namespace] - kind Inspect a Datastore instance using the Data Loss Prevention API. + +Options: + --help Show help + -minLikelihood [string] [choices: "LIKELIHOOD_UNSPECIFIED", "VERY_UNLIKELY", "UNLIKELY", "POSSIBLE", "LIKELY", "VERY_LIKELY"] + [default: "LIKELIHOOD_UNSPECIFIED"] + specifies the minimum reporting likelihood threshold. + -f, --maxFindings [number] [default: 0] + maximum number of results to retrieve + -q, --includeQuote [boolean] [default: true] include matching string in results + -t, --infoTypes restrict to limited set of infoTypes [ default: []] + [ eg. PHONE_NUMBER US_PASSPORT] +``` +### Examples + - Inspect a string: + ``` + java -cp target/dlp-samples-1.0-jar-with-dependencies.jar com.example.dlp.Inspect -s "My phone number is (123) 456-7890 and my email address is me@somedomain.com" + ``` + - Inspect a local file (text / image): + ``` + java -cp target/dlp-samples-1.0-jar-with-dependencies.jar com.example.dlp.Inspect -f resources/test.txt + java -cp target/dlp-samples-1.0-jar-with-dependencies.jar com.example.dlp.Inspect -f resources/test.png + ``` +- Inspect a file on Google Cloud Storage: + ``` + java -cp target/dlp-samples-1.0-jar-with-dependencies.jar com.example.dlp.Inspect -gcs -bucketName my-bucket -fileName my-file.txt + ``` +- Inspect a Google Cloud Datastore kind: + ``` + java -cp target/dlp-samples-1.0-jar-with-dependencies.jar com.example.dlp.Inspect -ds -kind my-kind + ``` + +## Automatic redaction of sensitive data +[Automatic redaction](https://cloud.google.com/dlp/docs/classification-redaction) produces an output with sensitive data matches removed. + +``` +Commands: + -s Source input string + -r String to replace detected info types + Options: + --help Show help + -minLikelihood choices: "LIKELIHOOD_UNSPECIFIED", "VERY_UNLIKELY", "UNLIKELY", "POSSIBLE", "LIKELY", "VERY_LIKELY"] + [default: "LIKELIHOOD_UNSPECIFIED"] + specifies the minimum reporting likelihood threshold. + + -infoTypes restrict operation to limited set of info types [ default: []] + [ eg. PHONE_NUMBER US_PASSPORT] +``` + +### Example +- Replace sensitive data in text with `_REDACTED_`: + ``` + java -cp target/dlp-samples-1.0-jar-with-dependencies.jar com.example.dlp.Redact -s "My phone number is (123) 456-7890 and my email address is me@somedomain.com" -r "_REDACTED_" + ``` + +## Integration tests +### Setup +- [Create a Google Cloud Storage bucket](https://console.cloud.google.com/storage) and upload [test.txt](src/test/resources/test.txt). +- [Create a Google Cloud Datastore](https://console.cloud.google.com/datastore) kind and add an entity with properties: + - `property1` : john@doe.com + - `property2` : 343-343-3435 +- Update the Google Cloud Storage path and Datastore kind in [InspectIT.java](src/test/java/com/example/dlp/InspectIT.java). +- Ensure that `GOOGLE_APPLICATION_CREDENTIALS` points to authorized service account credentials file. + +## Run +Run all tests: + ``` + mvn clean verify + ``` + diff --git a/dlp/pom.xml b/dlp/pom.xml new file mode 100644 index 00000000000..95f15f638f3 --- /dev/null +++ b/dlp/pom.xml @@ -0,0 +1,101 @@ + + + + + 4.0.0 + jar + com.example + dlp-samples + 1.0 + + + + doc-samples + com.google.cloud + 1.0.0 + .. + + + + 1.8 + 1.8 + 0.7.0 + UTF-8 + + + + + + + com.google.auth + google-auth-library-credentials + ${google.auth.version} + + + com.google.auth + google-auth-library-oauth2-http + ${google.auth.version} + + + + + + + + + com.google.cloud + google-cloud-dlp + 0.20.2-alpha + + + + commons-cli + commons-cli + 1.4 + + + + junit + junit + 4.12 + + + + + + + maven-assembly-plugin + 3.0.0 + + + jar-with-dependencies + + + + + make-assembly + package + + single + + + + + + + + diff --git a/dlp/src/main/java/com/example/dlp/Inspect.java b/dlp/src/main/java/com/example/dlp/Inspect.java new file mode 100644 index 00000000000..4b8750f3e76 --- /dev/null +++ b/dlp/src/main/java/com/example/dlp/Inspect.java @@ -0,0 +1,444 @@ +/** + * Copyright 2017, Google, Inc. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.example.dlp; + +import com.google.api.gax.grpc.OperationFuture; +import com.google.cloud.ServiceOptions; +import com.google.cloud.dlp.v2beta1.DlpServiceClient; +import com.google.privacy.dlp.v2beta1.CloudStorageOptions; +import com.google.privacy.dlp.v2beta1.CloudStorageOptions.FileSet; +import com.google.privacy.dlp.v2beta1.ContentItem; +import com.google.privacy.dlp.v2beta1.DatastoreOptions; +import com.google.privacy.dlp.v2beta1.Finding; +import com.google.privacy.dlp.v2beta1.InfoType; +import com.google.privacy.dlp.v2beta1.InspectConfig; +import com.google.privacy.dlp.v2beta1.InspectContentRequest; +import com.google.privacy.dlp.v2beta1.InspectContentResponse; +import com.google.privacy.dlp.v2beta1.InspectOperationMetadata; +import com.google.privacy.dlp.v2beta1.InspectOperationResult; +import com.google.privacy.dlp.v2beta1.InspectResult; +import com.google.privacy.dlp.v2beta1.KindExpression; +import com.google.privacy.dlp.v2beta1.Likelihood; +import com.google.privacy.dlp.v2beta1.OutputStorageConfig; +import com.google.privacy.dlp.v2beta1.PartitionId; +import com.google.privacy.dlp.v2beta1.ResultName; +import com.google.privacy.dlp.v2beta1.StorageConfig; +import com.google.protobuf.ByteString; +import org.apache.commons.cli.CommandLine; +import org.apache.commons.cli.CommandLineParser; +import org.apache.commons.cli.DefaultParser; +import org.apache.commons.cli.HelpFormatter; +import org.apache.commons.cli.Option; +import org.apache.commons.cli.OptionGroup; +import org.apache.commons.cli.Options; +import org.apache.commons.cli.ParseException; + +import java.net.URLConnection; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import javax.activation.MimetypesFileTypeMap; + +public class Inspect { + + private static void inspectString(String string, Likelihood minLikelihood, int maxFindings, + List infoTypes, boolean includeQuote) { + // [START dlp_inspect_string] + // instantiate a client + try (DlpServiceClient dlpServiceClient = DlpServiceClient.create()) { + + // The minimum likelihood required before returning a match + // minLikelihood = LIKELIHOOD_UNSPECIFIED; + + // The maximum number of findings to report (0 = server maximum) + // maxFindings = 0; + + // The infoTypes of information to match + // infoTypes = ['US_MALE_NAME', 'US_FEMALE_NAME']; + + // Whether to include the matching string + // includeQuote = true; + InspectConfig inspectConfig = InspectConfig.newBuilder() + .addAllInfoTypes(infoTypes) + .setMinLikelihood(minLikelihood) + .setMaxFindings(maxFindings) + .setIncludeQuote(includeQuote) + .build(); + + // The string to inspect + // string = 'My name is Gary and my email is gary@example.com'; + ContentItem contentItem = ContentItem.newBuilder() + .setType("text/plain") + .setValue(string) + .build(); + + InspectContentRequest request = InspectContentRequest.newBuilder() + .setInspectConfig(inspectConfig) + .addItems(contentItem) + .build(); + InspectContentResponse response = dlpServiceClient.inspectContent(request); + + for (InspectResult result : response.getResultsList()) { + if (result.getFindingsCount() > 0) { + System.out.println("Findings: "); + for (Finding finding : result.getFindingsList()) { + if (includeQuote) { + System.out.print("Quote: " + finding.getQuote()); + } + System.out.print("\tInfo type: " + finding.getInfoType().getName()); + System.out.println("\tLikelihood: " + finding.getLikelihood()); + } + } else { + System.out.println("No findings."); + } + } + } catch (Exception e) { + System.out.println("Error in inspectString: " + e.getMessage()); + } + // [END dlp_inspect_string] + } + + private static void inspectFile(String filePath, Likelihood minLikelihood, int maxFindings, + List infoTypes, boolean includeQuote) { + // [START dlp_inspect_file] + // Instantiates a client + try (DlpServiceClient dlpServiceClient = DlpServiceClient.create()) { + // The path to a local file to inspect. Can be a text, JPG, or PNG file. + // fileName = 'path/to/image.png'; + + // The minimum likelihood required before returning a match + // minLikelihood = LIKELIHOOD_UNSPECIFIED; + + // The maximum number of findings to report (0 = server maximum) + // maxFindings = 0; + + // The infoTypes of information to match + // infoTypes = ['US_MALE_NAME', 'US_FEMALE_NAME']; + + // Whether to include the matching string + // includeQuote = true; + Path path = Paths.get(filePath); + + // detect file mime type, default to application/octet-stream + String mimeType = URLConnection.guessContentTypeFromName(filePath); + if (mimeType == null) { + mimeType = MimetypesFileTypeMap.getDefaultFileTypeMap().getContentType(filePath); + } + if (mimeType == null) { + mimeType = "application/octet-stream"; + } + + byte[] data = Files.readAllBytes(path); + ContentItem contentItem = ContentItem.newBuilder() + .setType(mimeType) + .setData(ByteString.copyFrom(data)) + .build(); + + InspectConfig inspectConfig = InspectConfig.newBuilder() + .addAllInfoTypes(infoTypes) + .setMinLikelihood(minLikelihood) + .setMaxFindings(maxFindings) + .setIncludeQuote(includeQuote) + .build(); + + InspectContentRequest request = InspectContentRequest.newBuilder() + .setInspectConfig(inspectConfig) + .addItems(contentItem) + .build(); + InspectContentResponse response = dlpServiceClient.inspectContent(request); + + for (InspectResult result : response.getResultsList()) { + if (result.getFindingsCount() > 0) { + System.out.println("Findings: "); + for (Finding finding : result.getFindingsList()) { + if (includeQuote) { + System.out.print("Quote: " + finding.getQuote()); + } + System.out.print("\tInfo type: " + finding.getInfoType().getName()); + System.out.println("\tLikelihood: " + finding.getLikelihood()); + } + } else { + System.out.println("No findings."); + } + } + } catch (Exception e) { + e.printStackTrace(); + System.out.println("Error in inspectFile: " + e.getMessage()); + } + // [END dlp_inspect_file] + } + + private static void inspectGcsFile(String bucketName, String fileName, + Likelihood minLikelihood, List infoTypes) + throws Exception { + // [START dlp_inspect_gcs] + // Instantiates a client + try (DlpServiceClient dlpServiceClient = DlpServiceClient.create()) { +// The name of the bucket where the file resides. + // bucketName = 'YOUR-BUCKET'; + + // The path to the file within the bucket to inspect. + // Can contain wildcards, e.g. "my-image.*" + // fileName = 'my-image.png'; + + // The minimum likelihood required before returning a match + // minLikelihood = LIKELIHOOD_UNSPECIFIED; + + // The maximum number of findings to report (0 = server maximum) + // maxFindings = 0; + + // The infoTypes of information to match + // infoTypes = ['US_MALE_NAME', 'US_FEMALE_NAME']; + + CloudStorageOptions cloudStorageOptions = CloudStorageOptions + .newBuilder() + .setFileSet(FileSet.newBuilder().setUrl( + "gs://" + bucketName + "/" + fileName + )) + .build(); + + StorageConfig storageConfig = StorageConfig.newBuilder() + .setCloudStorageOptions(cloudStorageOptions) + .build(); + + InspectConfig inspectConfig = InspectConfig.newBuilder() + .addAllInfoTypes(infoTypes) + .setMinLikelihood(minLikelihood) + .build(); + + // optionally provide an output configuration to store results, default : none + OutputStorageConfig outputConfig = OutputStorageConfig.getDefaultInstance(); + + // asynchronously submit an inspect operation + OperationFuture responseFuture = + dlpServiceClient.createInspectOperationAsync(inspectConfig, storageConfig, outputConfig); + + // ... + // block on response, returning job id of the operation + InspectOperationResult inspectOperationResult = responseFuture.get(); + ResultName resultName = inspectOperationResult.getNameAsResultName(); + InspectResult inspectResult = dlpServiceClient.listInspectFindings(resultName).getResult(); + + if (inspectResult.getFindingsCount() > 0) { + System.out.println("Findings: "); + for (Finding finding : inspectResult.getFindingsList()) { + System.out.print("\tInfo type: " + finding.getInfoType().getName()); + System.out.println("\tLikelihood: " + finding.getLikelihood()); + } + } else { + System.out.println("No findings."); + } + } catch (Exception e) { + e.printStackTrace(); + System.out.println("Error in inspectGCSFileAsync: " + e.getMessage()); + } + // [END dlp_inspect_gcs] + } + + private static void inspectDatastore(String projectId, String namespaceId, String kind, + Likelihood minLikelihood, List infoTypes) { + // [START dlp_inspect_datastore] + // Instantiates a client + try (DlpServiceClient dlpServiceClient = DlpServiceClient.create()) { + + // (Optional) The project ID containing the target Datastore + // projectId = my-project-id + + // (Optional) The ID namespace of the Datastore document to inspect. + // To ignore Datastore namespaces, set this to an empty string ('') + // namespaceId = ''; + + // The kind of the Datastore entity to inspect. + // kind = 'Person'; + + // The minimum likelihood required before returning a match + // minLikelihood = LIKELIHOOD_UNSPECIFIED; + + // The infoTypes of information to match + // infoTypes = ['US_MALE_NAME', 'US_FEMALE_NAME']; + + // Get reference to the file to be inspected + PartitionId partitionId = PartitionId.newBuilder().setProjectId(projectId) + .setNamespaceId(namespaceId).build(); + KindExpression kindExpression = KindExpression.newBuilder().setName(kind).build(); + DatastoreOptions datastoreOptions = DatastoreOptions.newBuilder() + .setKind(kindExpression).setPartitionId(partitionId).build(); + StorageConfig storageConfig = StorageConfig.newBuilder() + .setDatastoreOptions(datastoreOptions).build(); + + InspectConfig inspectConfig = InspectConfig.newBuilder() + .addAllInfoTypes(infoTypes) + .setMinLikelihood(minLikelihood) + .build(); + + // optionally provide an output configuration to store results, default : none + OutputStorageConfig outputConfig = OutputStorageConfig.getDefaultInstance(); + + // asynchronously submit an inspect operation + OperationFuture responseFuture = + dlpServiceClient.createInspectOperationAsync(inspectConfig, storageConfig, outputConfig); + + // ... + // block on response, returning job id of the operation + InspectOperationResult inspectOperationResult = responseFuture.get(); + ResultName resultName = inspectOperationResult.getNameAsResultName(); + InspectResult inspectResult = dlpServiceClient.listInspectFindings(resultName).getResult(); + + if (inspectResult.getFindingsCount() > 0) { + System.out.println("Findings: "); + for (Finding finding : inspectResult.getFindingsList()) { + System.out.print("\tInfo type: " + finding.getInfoType().getName()); + System.out.println("\tLikelihood: " + finding.getLikelihood()); + } + } else { + System.out.println("No findings."); + } + } catch (Exception e) { + e.printStackTrace(); + System.out.println("Error in inspectDatastore: " + e.getMessage()); + } + // [END dlp_inspect_datastore] + } + + public static void main(String[] args) throws Exception { + + OptionGroup optionsGroup = new OptionGroup(); + optionsGroup.setRequired(true); + Option stringOption = new Option("s", "string", true, "inspect string"); + optionsGroup.addOption(stringOption); + + Option fileOption = new Option("f", "file path", true, "inspect input file path"); + optionsGroup.addOption(fileOption); + + Option gcsOption = new Option("gcs", "Google Cloud Storage", false, "inspect GCS file"); + optionsGroup.addOption(gcsOption); + + Option datastoreOption = new Option("ds", "Google Datastore", false, "inspect Datastore kind"); + optionsGroup.addOption(datastoreOption); + + Options commandLineOptions = new Options(); + commandLineOptions.addOptionGroup(optionsGroup); + + Option minLikelihoodOption = Option.builder("minLikelihood") + .hasArg(true) + .required(false) + .build(); + + commandLineOptions.addOption(minLikelihoodOption); + + Option maxFindingsOption = Option.builder("maxFindings") + .hasArg(true) + .required(false) + .build(); + + commandLineOptions.addOption(maxFindingsOption); + + Option infoTypesOption = Option.builder("infoTypes") + .hasArg(true) + .required(false) + .build(); + infoTypesOption.setArgs(Option.UNLIMITED_VALUES); + commandLineOptions.addOption(infoTypesOption); + + Option includeQuoteOption = Option.builder("includeQuote") + .hasArg(true) + .required(false) + .build(); + commandLineOptions.addOption(includeQuoteOption); + + Option bucketNameOption = Option.builder("bucketName") + .hasArg(true) + .required(false) + .build(); + commandLineOptions.addOption(bucketNameOption); + + Option gcsFileNameOption = Option.builder("fileName") + .hasArg(true) + .required(false) + .build(); + commandLineOptions.addOption(gcsFileNameOption); + + Option datastoreProjectIdOption = Option.builder("projectId") + .hasArg(true) + .required(false) + .build(); + commandLineOptions.addOption(datastoreProjectIdOption); + + Option datastoreNamespaceOption = Option.builder("namespace") + .hasArg(true) + .required(false) + .build(); + commandLineOptions.addOption(datastoreNamespaceOption); + + Option datastoreKindOption = Option.builder("kind") + .hasArg(true) + .required(false) + .build(); + commandLineOptions.addOption(datastoreKindOption); + + CommandLineParser parser = new DefaultParser(); + HelpFormatter formatter = new HelpFormatter(); + CommandLine cmd; + + try { + cmd = parser.parse(commandLineOptions, args); + } catch (ParseException e) { + System.out.println(e.getMessage()); + formatter.printHelp(Inspect.class.getName(), commandLineOptions); + System.exit(1); + return; + } + + Likelihood minLikelihood = Likelihood.valueOf(cmd.getOptionValue(minLikelihoodOption.getOpt(), + Likelihood.LIKELIHOOD_UNSPECIFIED.name())); + int maxFindings = Integer.parseInt(cmd.getOptionValue(maxFindingsOption.getOpt(), "0")); + boolean includeQuote = Boolean + .parseBoolean(cmd.getOptionValue(includeQuoteOption.getOpt(), "true")); + + List infoTypesList = Collections.emptyList(); + if (cmd.hasOption(infoTypesOption.getOpt())) { + infoTypesList = new ArrayList<>(); + String[] infoTypes = cmd.getOptionValues(infoTypesOption.getOpt()); + for (String infoType : infoTypes) { + infoTypesList.add(InfoType.newBuilder().setName(infoType).build()); + } + } + // string inspection + if (cmd.hasOption("s")) { + String val = cmd.getOptionValue(stringOption.getOpt()); + inspectString(val, minLikelihood, maxFindings, infoTypesList, includeQuote); + } else if (cmd.hasOption("f")) { + String filePath = cmd.getOptionValue(fileOption.getOpt()); + inspectFile(filePath, minLikelihood, maxFindings, infoTypesList, includeQuote); + // gcs file inspection + } else if (cmd.hasOption("gcs")) { + String bucketName = cmd.getOptionValue(bucketNameOption.getOpt()); + String fileName = cmd.getOptionValue(gcsFileNameOption.getOpt()); + inspectGcsFile(bucketName, fileName, minLikelihood, infoTypesList); + // datastore kind inspection + } else if (cmd.hasOption("ds")) { + String namespaceId = cmd.getOptionValue(datastoreNamespaceOption.getOpt(), ""); + String kind = cmd.getOptionValue(datastoreKindOption.getOpt()); + // use default project id when project id is not specified + String projectId = cmd.getOptionValue(datastoreProjectIdOption.getOpt(), + ServiceOptions.getDefaultProjectId()); + inspectDatastore(projectId, namespaceId, kind, minLikelihood, infoTypesList); + } + } +} diff --git a/dlp/src/main/java/com/example/dlp/Metadata.java b/dlp/src/main/java/com/example/dlp/Metadata.java new file mode 100644 index 00000000000..8045a22ef35 --- /dev/null +++ b/dlp/src/main/java/com/example/dlp/Metadata.java @@ -0,0 +1,96 @@ +/** + * Copyright 2017, Google, Inc. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.example.dlp; + +import com.google.cloud.dlp.v2beta1.DlpServiceClient; +import com.google.privacy.dlp.v2beta1.CategoryDescription; +import com.google.privacy.dlp.v2beta1.InfoTypeDescription; +import com.google.privacy.dlp.v2beta1.ListInfoTypesResponse; +import com.google.privacy.dlp.v2beta1.ListRootCategoriesResponse; +import org.apache.commons.cli.CommandLine; +import org.apache.commons.cli.CommandLineParser; +import org.apache.commons.cli.DefaultParser; +import org.apache.commons.cli.HelpFormatter; +import org.apache.commons.cli.Option; +import org.apache.commons.cli.Options; +import org.apache.commons.cli.ParseException; + +import java.util.List; + +public class Metadata { + + private static void listInfoTypes(String category, String languageCode) throws Exception { + // [START dlp_list_info_types] + // Instantiate a DLP client + try (DlpServiceClient dlpClient = DlpServiceClient.create()) { + // The category of info types to list, e.g. category = 'GOVERNMENT'; + // Optional BCP-47 language code for localized info type friendly names, e.g. 'en-US' + ListInfoTypesResponse infoTypesResponse = dlpClient.listInfoTypes(category, languageCode); + List infoTypeDescriptions = infoTypesResponse.getInfoTypesList(); + for (InfoTypeDescription infoTypeDescription : infoTypeDescriptions) { + System.out.println("Name : " + infoTypeDescription.getName()); + System.out.println("Display name : " + infoTypeDescription.getDisplayName()); + } + } + // [END dlp_list_info_types] + } + + private static void listRootCategories(String languageCode) throws Exception { + // [START dlp_list_root_categories] + // Instantiate a DLP client + try (DlpServiceClient dlpClient = DlpServiceClient.create()) { + // The BCP-47 language code to use, e.g. 'en-US' + // languageCode = 'en-US' + ListRootCategoriesResponse rootCategoriesResponse = dlpClient + .listRootCategories(languageCode); + for (CategoryDescription categoryDescription : rootCategoriesResponse.getCategoriesList()) { + System.out.println("Name : " + categoryDescription.getName()); + System.out.println("Display name : " + categoryDescription.getDisplayName()); + } + } + // [END dlp_list_root_categories] + } + + public static void main(String[] args) throws Exception { + Options options = new Options(); + Option languageCodeOption = new Option("language", null, true, "BCP-47 language code"); + languageCodeOption.setRequired(false); + options.addOption(languageCodeOption); + + Option categoryOption = new Option("category", null, true, "Category of info types to list."); + categoryOption.setRequired(false); + options.addOption(categoryOption); + + CommandLineParser parser = new DefaultParser(); + HelpFormatter formatter = new HelpFormatter(); + CommandLine cmd; + try { + cmd = parser.parse(options, args); + } catch (ParseException e) { + System.out.println(e.getMessage()); + formatter.printHelp(Metadata.class.getName(), options); + System.exit(1); + return; + } + String languageCode = cmd.getOptionValue(languageCodeOption.getOpt(), "en-US"); + if (cmd.hasOption(categoryOption.getOpt())) { + String category = cmd.getOptionValue(categoryOption.getOpt()); + listInfoTypes(category, languageCode); + } else { + listRootCategories(languageCode); + } + } +} diff --git a/dlp/src/main/java/com/example/dlp/Redact.java b/dlp/src/main/java/com/example/dlp/Redact.java new file mode 100644 index 00000000000..780c34dd3ff --- /dev/null +++ b/dlp/src/main/java/com/example/dlp/Redact.java @@ -0,0 +1,142 @@ +/** + * Copyright 2017, Google, Inc. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.example.dlp; + +import com.google.cloud.dlp.v2beta1.DlpServiceClient; +import com.google.privacy.dlp.v2beta1.ContentItem; +import com.google.privacy.dlp.v2beta1.InfoType; +import com.google.privacy.dlp.v2beta1.InspectConfig; +import com.google.privacy.dlp.v2beta1.Likelihood; +import com.google.privacy.dlp.v2beta1.RedactContentRequest.ReplaceConfig; +import com.google.privacy.dlp.v2beta1.RedactContentResponse; +import com.google.protobuf.ByteString; +import org.apache.commons.cli.CommandLine; +import org.apache.commons.cli.CommandLineParser; +import org.apache.commons.cli.DefaultParser; +import org.apache.commons.cli.HelpFormatter; +import org.apache.commons.cli.Option; +import org.apache.commons.cli.Options; +import org.apache.commons.cli.ParseException; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +public class Redact { + + private static void redactString(String string, String replacement, Likelihood minLikelihood, + List infoTypes) throws Exception { + // [START dlp_redact_string] + // Instantiate the DLP client + try (DlpServiceClient dlpClient = DlpServiceClient.create()) { + // The minimum likelihood required before returning a match + // eg.minLikelihood = LIKELIHOOD_VERY_LIKELY; + InspectConfig inspectConfig = InspectConfig.newBuilder() + .addAllInfoTypes(infoTypes) + .setMinLikelihood(minLikelihood) + .build(); + + ContentItem contentItem = ContentItem.newBuilder() + .setType("text/plain") + .setData(ByteString.copyFrom(string.getBytes())) + .build(); + + List replaceConfigs = new ArrayList<>(); + + if (infoTypes.isEmpty()) { + // replace all detected sensitive elements with replacement string + replaceConfigs.add( + ReplaceConfig.newBuilder() + .setReplaceWith(replacement) + .build()); + } else { + // Replace select info types with chosen replacement string + for (InfoType infoType : infoTypes) { + replaceConfigs.add( + ReplaceConfig.newBuilder() + .setInfoType(infoType) + .setReplaceWith(replacement) + .build()); + } + } + + RedactContentResponse contentResponse = dlpClient.redactContent( + inspectConfig, Collections.singletonList(contentItem), replaceConfigs); + for (ContentItem responseItem : contentResponse.getItemsList()) { + // print out string with redacted content + System.out.println(responseItem.getData().toStringUtf8()); + } + } + // [END dlp_redact_string] + } + + // Command line application to redact strings using the Data Loss Prevention API + public static void main(String[] args) throws Exception { + Options commandLineOptions = new Options(); + + Option stringOption = Option.builder("s") + .longOpt("source string") + .hasArg(true) + .required(true) + .build(); + commandLineOptions.addOption(stringOption); + + Option replaceOption = Option.builder("r") + .longOpt("replace string") + .hasArg(true) + .required(true) + .build(); + commandLineOptions.addOption(replaceOption); + + Option minLikelihoodOption = Option.builder("minLikelihood") + .hasArg(true) + .required(false) + .build(); + commandLineOptions.addOption(minLikelihoodOption); + + Option infoTypesOption = Option.builder("infoTypes") + .hasArg(true) + .required(false) + .build(); + infoTypesOption.setArgs(Option.UNLIMITED_VALUES); + commandLineOptions.addOption(infoTypesOption); + + CommandLineParser parser = new DefaultParser(); + HelpFormatter formatter = new HelpFormatter(); + CommandLine cmd; + + try { + cmd = parser.parse(commandLineOptions, args); + } catch (ParseException e) { + System.out.println(e.getMessage()); + formatter.printHelp(Redact.class.getName(), commandLineOptions); + System.exit(1); + return; + } + + String source = cmd.getOptionValue(stringOption.getOpt()); + String replacement = cmd.getOptionValue(replaceOption.getOpt()); + + List infoTypesList = new ArrayList<>(); + String[] infoTypes = cmd.getOptionValues(infoTypesOption.getOpt()); + if (infoTypes != null) { + for (String infoType : infoTypes) { + infoTypesList.add(InfoType.newBuilder().setName(infoType).build()); + } + } + redactString(source, replacement, Likelihood.LIKELIHOOD_UNSPECIFIED, infoTypesList); + } +} diff --git a/dlp/src/test/java/com/example/dlp/InspectIT.java b/dlp/src/test/java/com/example/dlp/InspectIT.java new file mode 100644 index 00000000000..fff56b734b0 --- /dev/null +++ b/dlp/src/test/java/com/example/dlp/InspectIT.java @@ -0,0 +1,103 @@ +/** + * Copyright 2017, Google, Inc. Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You may obtain a copy of the License + * at + * + *

http://www.apache.org/licenses/LICENSE-2.0 + * + *

Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.example.dlp; + +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; + +import org.junit.After; +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.JUnit4; + +import java.io.ByteArrayOutputStream; +import java.io.File; +import java.io.PrintStream; + +@RunWith(JUnit4.class) +@SuppressWarnings("checkstyle:abbreviationaswordinname") +public class InspectIT { + private ByteArrayOutputStream bout; + private PrintStream out; + + // Update to Google Cloud Storage path containing test.txt + private String bucketName = System.getenv("GOOGLE_CLOUD_PROJECT") + "/dlp"; + + // Update to Google Cloud Datastore Kind containing an entity + // with phone number and email address properties. + private String datastoreKind = "dlp"; + + @Before + public void setUp() { + bout = new ByteArrayOutputStream(); + out = new PrintStream(bout); + System.setOut(out); + assertNotNull(System.getenv("GOOGLE_APPLICATION_CREDENTIALS")); + } + + @Test + public void testStringInspectionReturnsInfoTypes() throws Exception { + String text = + "\"My phone number is (234) 456-7890 and my email address is gary@somedomain.com\""; + Inspect.main(new String[] {"-s", text}); + String output = bout.toString(); + assertTrue(output.contains("PHONE_NUMBER")); + assertTrue(output.contains("EMAIL_ADDRESS")); + } + + @Test + public void testTextFileInspectionReturnsInfoTypes() throws Exception { + ClassLoader classLoader = getClass().getClassLoader(); + File file = new File(classLoader.getResource("test.txt").getFile()); + Inspect.main(new String[] {"-f", file.getAbsolutePath()}); + String output = bout.toString(); + assertTrue(output.contains("PHONE_NUMBER")); + assertTrue(output.contains("EMAIL_ADDRESS")); + } + + @Test + public void testImageFileInspectionReturnsInfoTypes() throws Exception { + ClassLoader classLoader = getClass().getClassLoader(); + File file = new File(classLoader.getResource("test.png").getFile()); + Inspect.main(new String[] {"-f", file.getAbsolutePath()}); + String output = bout.toString(); + assertTrue(output.contains("PHONE_NUMBER")); + assertTrue(output.contains("EMAIL_ADDRESS")); + } + + // Requires that bucket by the specified name exists + @Test + public void testGcsFileInspectionReturnsInfoTypes() throws Exception { + Inspect.main(new String[] {"-gcs", "-bucketName", bucketName, "-fileName", "test.txt"}); + String output = bout.toString(); + assertTrue(output.contains("PHONE_NUMBER")); + assertTrue(output.contains("EMAIL_ADDRESS")); + } + + // Requires a Datastore kind containing an entity + // with phone number and email address properties. + @Test + public void testDatastoreInspectionReturnsInfoTypes() throws Exception { + Inspect.main(new String[] {"-ds", "-kind", datastoreKind}); + String output = bout.toString(); + assertTrue(output.contains("PHONE_NUMBER")); + assertTrue(output.contains("EMAIL_ADDRESS")); + } + + @After + public void tearDown() { + System.setOut(null); + bout.reset(); + } +} diff --git a/dlp/src/test/java/com/example/dlp/MetadataIT.java b/dlp/src/test/java/com/example/dlp/MetadataIT.java new file mode 100644 index 00000000000..ebd0d1a2e9e --- /dev/null +++ b/dlp/src/test/java/com/example/dlp/MetadataIT.java @@ -0,0 +1,61 @@ +/** + * Copyright 2017, Google, Inc. Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You may obtain a copy of the License + * at + * + *

http://www.apache.org/licenses/LICENSE-2.0 + * + *

Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.example.dlp; + +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; + +import org.junit.After; +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.JUnit4; + +import java.io.ByteArrayOutputStream; +import java.io.PrintStream; + +@RunWith(JUnit4.class) +@SuppressWarnings("checkstyle:abbreviationaswordinname") +public class MetadataIT { + + private ByteArrayOutputStream bout; + private PrintStream out; + + @Before + public void setUp() { + bout = new ByteArrayOutputStream(); + out = new PrintStream(bout); + System.setOut(out); + assertNotNull(System.getenv("GOOGLE_APPLICATION_CREDENTIALS")); + } + + @Test + public void testRootCategoriesAreRetrieved() throws Exception { + Metadata.main(new String[] {}); + String output = bout.toString(); + assertTrue(output.contains("GOVERNMENT")); + assertTrue(output.contains("HEALTH")); + } + + @Test + public void testInfoTypesAreRetrieved() throws Exception { + Metadata.main(new String[] {"-category", "GOVERNMENT"}); + String output = bout.toString(); + assertTrue(output.contains("AUSTRALIA_TAX_FILE_NUMBER")); + } + + @After + public void tearDown() { + System.setOut(null); + } +} diff --git a/dlp/src/test/java/com/example/dlp/RedactIT.java b/dlp/src/test/java/com/example/dlp/RedactIT.java new file mode 100644 index 00000000000..6e768a1cedf --- /dev/null +++ b/dlp/src/test/java/com/example/dlp/RedactIT.java @@ -0,0 +1,55 @@ +/** + * Copyright 2017, Google, Inc. Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You may obtain a copy of the License + * at + * + *

http://www.apache.org/licenses/LICENSE-2.0 + * + *

Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.example.dlp; + +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; + +import org.junit.After; +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.JUnit4; + +import java.io.ByteArrayOutputStream; +import java.io.PrintStream; + +@RunWith(JUnit4.class) +@SuppressWarnings("checkstyle:abbreviationaswordinname") +public class RedactIT { + private ByteArrayOutputStream bout; + private PrintStream out; + + @Before + public void setUp() { + bout = new ByteArrayOutputStream(); + out = new PrintStream(bout); + System.setOut(out); + assertNotNull(System.getenv("GOOGLE_APPLICATION_CREDENTIALS")); + } + + @Test + public void testInfoTypesInStringAreReplaced() throws Exception { + String text = + "\"My phone number is (234) 456-7890 and my email address is gary@somedomain.com\""; + Redact.main(new String[] {"-s", text, "-r", "_REDACTED_"}); + String output = bout.toString(); + assertTrue(output.contains("My phone number is _REDACTED_ and my email address is _REDACTED_")); + } + + @After + public void tearDown() { + System.setOut(null); + bout.reset(); + } +} diff --git a/dlp/src/test/resources/test.png b/dlp/src/test/resources/test.png new file mode 100644 index 00000000000..8f32c825884 Binary files /dev/null and b/dlp/src/test/resources/test.png differ diff --git a/dlp/src/test/resources/test.txt b/dlp/src/test/resources/test.txt new file mode 100644 index 00000000000..c2ee3815bc9 --- /dev/null +++ b/dlp/src/test/resources/test.txt @@ -0,0 +1 @@ +My phone number is (223) 456-7890 and my email address is gary@somedomain.com. \ No newline at end of file diff --git a/pom.xml b/pom.xml index f2a4b975147..6042874f607 100644 --- a/pom.xml +++ b/pom.xml @@ -78,6 +78,7 @@ compute/sendgrid datastore datastore/cloud-client + dlp iap kms language/analysis