From 69d3cc1a5277658e105eb6de7d03d2b2feaa2320 Mon Sep 17 00:00:00 2001 From: Jessica Date: Tue, 9 Jun 2020 23:44:37 +0000 Subject: [PATCH 1/2] Add code sample and tests for redaction --- dlp/deid.py | 57 ++++++++++++++++++++++++++++++++++++++++++++++++ dlp/deid_test.py | 6 +++++ 2 files changed, 63 insertions(+) diff --git a/dlp/deid.py b/dlp/deid.py index d6afea08b430..072be4d44085 100644 --- a/dlp/deid.py +++ b/dlp/deid.py @@ -83,6 +83,63 @@ def deidentify_with_mask( # [END dlp_deidentify_masking] +# [START dlp_deidentify_redact] +def deidentify_with_redact( + project, + input_str, + info_types, +): + """Uses the Data Loss Prevention API to deidentify sensitive data in a + string by redacting matched input values. + Args: + project: The Google Cloud project id to use as a parent resource. + input_str: The string to deidentify (will be treated as text). + info_types: A list of strings representing info types to look for. + Returns: + None; the response from the API is printed to the terminal. + """ + import google.cloud.dlp + + # Instantiate a client + dlp = google.cloud.dlp_v2.DlpServiceClient() + + # Convert the project id into a full resource id. + parent = dlp.project_path(project) + + # Construct inspect configuration dictionary + inspect_config = { + "info_types": [{"name": info_type} for info_type in info_types] + } + + # Construct deidentify configuration dictionary + deidentify_config = { + "info_type_transformations": { + "transformations": [ + { + "primitive_transformation": { + "redact_config": {} + } + } + ] + } + } + + # Construct item + item = {"value": input_str} + + # Call the API + response = dlp.deidentify_content( + parent, + inspect_config=inspect_config, + deidentify_config=deidentify_config, + item=item, + ) + + # Print out the results. + print(response.item.value) + +# [END dlp_deidentify_redact] + # [START dlp_deidentify_replace] def deidentify_with_replace( project, diff --git a/dlp/deid_test.py b/dlp/deid_test.py index 0a2c53829613..e273c3c49770 100644 --- a/dlp/deid_test.py +++ b/dlp/deid_test.py @@ -87,6 +87,12 @@ def test_deidentify_with_mask_masking_number_specified(capsys): out, _ = capsys.readouterr() assert "My SSN is *******27" in out +def test_deidentify_with_redact(capsys): + deid.deidentify_with_redact( + GCLOUD_PROJECT, HARMFUL_STRING + "!", ["US_SOCIAL_SECURITY_NUMBER"] + ) + out, _ = capsys.readouterr() + assert "My SSN is !" in out def test_deidentify_with_replace(capsys): deid.deidentify_with_replace( From 908f5d483cb7acbfac7c927847840ef7e80bc206 Mon Sep 17 00:00:00 2001 From: Jessica Date: Wed, 10 Jun 2020 01:04:23 +0000 Subject: [PATCH 2/2] Fix lint errors --- dlp/deid.py | 1 + dlp/deid_test.py | 2 ++ 2 files changed, 3 insertions(+) diff --git a/dlp/deid.py b/dlp/deid.py index 072be4d44085..af5213256962 100644 --- a/dlp/deid.py +++ b/dlp/deid.py @@ -138,6 +138,7 @@ def deidentify_with_redact( # Print out the results. print(response.item.value) + # [END dlp_deidentify_redact] # [START dlp_deidentify_replace] diff --git a/dlp/deid_test.py b/dlp/deid_test.py index e273c3c49770..a407fff97fc7 100644 --- a/dlp/deid_test.py +++ b/dlp/deid_test.py @@ -87,6 +87,7 @@ def test_deidentify_with_mask_masking_number_specified(capsys): out, _ = capsys.readouterr() assert "My SSN is *******27" in out + def test_deidentify_with_redact(capsys): deid.deidentify_with_redact( GCLOUD_PROJECT, HARMFUL_STRING + "!", ["US_SOCIAL_SECURITY_NUMBER"] @@ -94,6 +95,7 @@ def test_deidentify_with_redact(capsys): out, _ = capsys.readouterr() assert "My SSN is !" in out + def test_deidentify_with_replace(capsys): deid.deidentify_with_replace( GCLOUD_PROJECT, HARMFUL_STRING, ["US_SOCIAL_SECURITY_NUMBER"],