From d6438a2e0736f9109114bf80fb625efde4bfc70d Mon Sep 17 00:00:00 2001
From: Franklin Nunez <69214580+b-loved-dreamer@users.noreply.github.com>
Date: Mon, 15 Mar 2021 10:10:28 -0700
Subject: [PATCH] feat: adds model adaptation sample (#121)

* I updated the comment on the transcribe_async file to reflect time limitations on local files for the long_running_recognize

* docs: I updated the comment on the transcribe_async file to reflect time limitations on local files for the long_running_recognize

* chore: I updated the comments on the transcribe_async file to reflect time limitations on local files for the long_running_recognize

* fix: resolved conflicts

    pick f510e8f chore: I updated the comments on the transcribe_async file to reflect time limitations on local files for the long_running_recognize

* chore: added a profanity filter sample

* feat: adds new multi-region sample

* fix: migrated to speech 2.0.0

* fix: fixed lint issues

* fix: deleted a duplicate line that calls the recognizer

* docs: repaired region tag mismatch

* chore: formatting

* chore: added ]

* docs: udated documentation to point to python-speech instead of python-docs-samples

* docs: udated documentation to point to python-speech instead of python-docs-samples

* docs: udated documentation to point to python-speech instead of python-docs-samples

* fix: applied suggested changes

* fix: applied suggested changes

* feat: adds model adaptation sample

* feat: fix issues

* feat: fix region tag

* feat: fixed region tag

* chore: applied suggested changes

* chore: applied suggested changes

* chore: applied suggested changes

* chore: applied suggested changes

* chore: applied suggested changes

* chore: applied suggested changes

* chore: applied suggested changes
---
 speech/microphone/README.rst                  |  6 +-
 speech/microphone/noxfile.py                  | 38 ++++----
 speech/snippets/README.rst                    | 45 ++-------
 speech/snippets/multi_region.py               |  1 +
 speech/snippets/noxfile.py                    | 38 ++++----
 .../snippets/speech_model_adaptation_beta.py  | 93 +++++++++++++++++++
 .../speech_model_adaptation_beta_test.py      | 60 ++++++++++++
 speech/snippets/transcribe_async.py           |  1 -
 8 files changed, 205 insertions(+), 77 deletions(-)
 create mode 100644 speech/snippets/speech_model_adaptation_beta.py
 create mode 100644 speech/snippets/speech_model_adaptation_beta_test.py

diff --git a/speech/microphone/README.rst b/speech/microphone/README.rst
index ef75b54181b5..e3185b312d41 100644
--- a/speech/microphone/README.rst
+++ b/speech/microphone/README.rst
@@ -18,6 +18,10 @@ This directory contains samples for Google Cloud Speech API. The `Google Cloud S
 
 .. _Google Cloud Speech API: https://cloud.google.com/speech/docs/
 
+
+
+
+
 Setup
 -------------------------------------------------------------------------------
 
@@ -39,7 +43,7 @@ Install Dependencies
 
     .. code-block:: bash
 
-        $ git clone https://github.com/googleapis/python-speech.git   
+        $ git clone https://github.com/googleapis/python-speech.git
 
 #. Install `pip`_ and `virtualenv`_ if you do not already have them. You may want to refer to the `Python Development Environment Setup Guide`_ for Google Cloud Platform for instructions.
 
diff --git a/speech/microphone/noxfile.py b/speech/microphone/noxfile.py
index 97bf7da80e39..f2320ea0001c 100644
--- a/speech/microphone/noxfile.py
+++ b/speech/microphone/noxfile.py
@@ -38,28 +38,25 @@
 
 TEST_CONFIG = {
     # You can opt out from the test for specific Python versions.
-    'ignored_versions': ["2.7"],
-
+    "ignored_versions": ["2.7"],
     # Old samples are opted out of enforcing Python type hints
     # All new samples should feature them
-    'enforce_type_hints': False,
-
+    "enforce_type_hints": False,
     # An envvar key for determining the project id to use. Change it
     # to 'BUILD_SPECIFIC_GCLOUD_PROJECT' if you want to opt in using a
     # build specific Cloud project. You can also use your own string
     # to use your own Cloud project.
-    'gcloud_project_env': 'GOOGLE_CLOUD_PROJECT',
+    "gcloud_project_env": "GOOGLE_CLOUD_PROJECT",
     # 'gcloud_project_env': 'BUILD_SPECIFIC_GCLOUD_PROJECT',
-
     # A dictionary you want to inject into your test. Don't put any
     # secrets here. These values will override predefined values.
-    'envs': {},
+    "envs": {},
 }
 
 
 try:
     # Ensure we can import noxfile_config in the project's directory.
-    sys.path.append('.')
+    sys.path.append(".")
     from noxfile_config import TEST_CONFIG_OVERRIDE
 except ImportError as e:
     print("No user noxfile_config found: detail: {}".format(e))
@@ -74,12 +71,12 @@ def get_pytest_env_vars() -> Dict[str, str]:
     ret = {}
 
     # Override the GCLOUD_PROJECT and the alias.
-    env_key = TEST_CONFIG['gcloud_project_env']
+    env_key = TEST_CONFIG["gcloud_project_env"]
     # This should error out if not set.
-    ret['GOOGLE_CLOUD_PROJECT'] = os.environ[env_key]
+    ret["GOOGLE_CLOUD_PROJECT"] = os.environ[env_key]
 
     # Apply user supplied envs.
-    ret.update(TEST_CONFIG['envs'])
+    ret.update(TEST_CONFIG["envs"])
     return ret
 
 
@@ -88,7 +85,7 @@ def get_pytest_env_vars() -> Dict[str, str]:
 ALL_VERSIONS = ["2.7", "3.6", "3.7", "3.8", "3.9"]
 
 # Any default versions that should be ignored.
-IGNORED_VERSIONS = TEST_CONFIG['ignored_versions']
+IGNORED_VERSIONS = TEST_CONFIG["ignored_versions"]
 
 TESTED_VERSIONS = sorted([v for v in ALL_VERSIONS if v not in IGNORED_VERSIONS])
 
@@ -137,7 +134,7 @@ def _determine_local_import_names(start_dir: str) -> List[str]:
 
 @nox.session
 def lint(session: nox.sessions.Session) -> None:
-    if not TEST_CONFIG['enforce_type_hints']:
+    if not TEST_CONFIG["enforce_type_hints"]:
         session.install("flake8", "flake8-import-order")
     else:
         session.install("flake8", "flake8-import-order", "flake8-annotations")
@@ -146,9 +143,11 @@ def lint(session: nox.sessions.Session) -> None:
     args = FLAKE8_COMMON_ARGS + [
         "--application-import-names",
         ",".join(local_names),
-        "."
+        ".",
     ]
     session.run("flake8", *args)
+
+
 #
 # Black
 #
@@ -161,6 +160,7 @@ def blacken(session: nox.sessions.Session) -> None:
 
     session.run("black", *python_files)
 
+
 #
 # Sample Tests
 #
@@ -169,7 +169,9 @@ def blacken(session: nox.sessions.Session) -> None:
 PYTEST_COMMON_ARGS = ["--junitxml=sponge_log.xml"]
 
 
-def _session_tests(session: nox.sessions.Session, post_install: Callable = None) -> None:
+def _session_tests(
+    session: nox.sessions.Session, post_install: Callable = None
+) -> None:
     """Runs py.test for a particular project."""
     if os.path.exists("requirements.txt"):
         session.install("-r", "requirements.txt")
@@ -200,9 +202,9 @@ def py(session: nox.sessions.Session) -> None:
     if session.python in TESTED_VERSIONS:
         _session_tests(session)
     else:
-        session.skip("SKIPPED: {} tests are disabled for this sample.".format(
-            session.python
-        ))
+        session.skip(
+            "SKIPPED: {} tests are disabled for this sample.".format(session.python)
+        )
 
 
 #
diff --git a/speech/snippets/README.rst b/speech/snippets/README.rst
index e0e235a2cd52..692fc77a354c 100644
--- a/speech/snippets/README.rst
+++ b/speech/snippets/README.rst
@@ -18,6 +18,10 @@ This directory contains samples for Google Cloud Speech API. The `Google Cloud S
 
 .. _Google Cloud Speech API: https://cloud.google.com/speech/docs/
 
+
+
+
+
 Setup
 -------------------------------------------------------------------------------
 
@@ -39,7 +43,7 @@ Install Dependencies
 
     .. code-block:: bash
 
-        $ git clone https://github.com/googleapis/python-speech.git 
+        $ git clone https://github.com/googleapis/python-speech.git
 
 #. Install `pip`_ and `virtualenv`_ if you do not already have them. You may want to refer to the `Python Development Environment Setup Guide`_ for Google Cloud Platform for instructions.
 
@@ -132,7 +136,6 @@ To run this sample:
 
     Google Cloud Speech API sample application using the REST API for async
     batch processing.
-
     Example usage:
         python transcribe_async.py resources/audio.raw
         python transcribe_async.py gs://cloud-samples-tests/speech/vr.flac
@@ -268,42 +271,6 @@ To run this sample:
 
 
 
-Transcribe with Model Selection
-+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
-
-.. image:: https://gstatic.com/cloudssh/images/open-btn.png
-   :target: https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/GoogleCloudPlatform/python-docs-samples&page=editor&open_in_editor=speech/cloud-client/transcribe_auto_punctuation.py,speech/cloud-client/README.rst
-
-
-
-
-To run this sample:
-
-.. code-block:: bash
-
-    $ python transcribe_model_selection.py
-    
-      usage: transcribe_model_selection.py [-h]
-                                           [--model {command_and_search,phone_call,video,default}]
-                                           path
-
-      Google Cloud Speech API sample that demonstrates how to select the model
-      used for speech recognition.
-
-      Example usage:
-          python transcribe_model_selection.py resources/Google_Gnome.wav --model video
-          python transcribe_model_selection.py gs://cloud-samples-tests/speech/Google_Gnome.wav --model video
-
-      positional arguments:
-        path                  File or GCS path for audio file to be recognized
-
-      optional arguments:
-        -h, --help            show this help message and exit
-        --model {command_and_search,phone_call,video,default}
-                              The speech recognition model to use
-
-
-
 Beta Samples
 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 
@@ -358,4 +325,4 @@ to `browse the source`_ and  `report issues`_.
     https://github.com/GoogleCloudPlatform/google-cloud-python/issues
 
 
-.. _Google Cloud SDK: https://cloud.google.com/sdk/
+.. _Google Cloud SDK: https://cloud.google.com/sdk/
\ No newline at end of file
diff --git a/speech/snippets/multi_region.py b/speech/snippets/multi_region.py
index 0027912adc9f..57d4db1bb8d0 100644
--- a/speech/snippets/multi_region.py
+++ b/speech/snippets/multi_region.py
@@ -16,6 +16,7 @@
 
 
 def sync_recognize_with_multi_region_gcs():
+
     # [START speech_multi_region]
 
     # Imports the Google Cloud client library
diff --git a/speech/snippets/noxfile.py b/speech/snippets/noxfile.py
index 97bf7da80e39..f2320ea0001c 100644
--- a/speech/snippets/noxfile.py
+++ b/speech/snippets/noxfile.py
@@ -38,28 +38,25 @@
 
 TEST_CONFIG = {
     # You can opt out from the test for specific Python versions.
-    'ignored_versions': ["2.7"],
-
+    "ignored_versions": ["2.7"],
     # Old samples are opted out of enforcing Python type hints
     # All new samples should feature them
-    'enforce_type_hints': False,
-
+    "enforce_type_hints": False,
     # An envvar key for determining the project id to use. Change it
     # to 'BUILD_SPECIFIC_GCLOUD_PROJECT' if you want to opt in using a
     # build specific Cloud project. You can also use your own string
     # to use your own Cloud project.
-    'gcloud_project_env': 'GOOGLE_CLOUD_PROJECT',
+    "gcloud_project_env": "GOOGLE_CLOUD_PROJECT",
     # 'gcloud_project_env': 'BUILD_SPECIFIC_GCLOUD_PROJECT',
-
     # A dictionary you want to inject into your test. Don't put any
     # secrets here. These values will override predefined values.
-    'envs': {},
+    "envs": {},
 }
 
 
 try:
     # Ensure we can import noxfile_config in the project's directory.
-    sys.path.append('.')
+    sys.path.append(".")
     from noxfile_config import TEST_CONFIG_OVERRIDE
 except ImportError as e:
     print("No user noxfile_config found: detail: {}".format(e))
@@ -74,12 +71,12 @@ def get_pytest_env_vars() -> Dict[str, str]:
     ret = {}
 
     # Override the GCLOUD_PROJECT and the alias.
-    env_key = TEST_CONFIG['gcloud_project_env']
+    env_key = TEST_CONFIG["gcloud_project_env"]
     # This should error out if not set.
-    ret['GOOGLE_CLOUD_PROJECT'] = os.environ[env_key]
+    ret["GOOGLE_CLOUD_PROJECT"] = os.environ[env_key]
 
     # Apply user supplied envs.
-    ret.update(TEST_CONFIG['envs'])
+    ret.update(TEST_CONFIG["envs"])
     return ret
 
 
@@ -88,7 +85,7 @@ def get_pytest_env_vars() -> Dict[str, str]:
 ALL_VERSIONS = ["2.7", "3.6", "3.7", "3.8", "3.9"]
 
 # Any default versions that should be ignored.
-IGNORED_VERSIONS = TEST_CONFIG['ignored_versions']
+IGNORED_VERSIONS = TEST_CONFIG["ignored_versions"]
 
 TESTED_VERSIONS = sorted([v for v in ALL_VERSIONS if v not in IGNORED_VERSIONS])
 
@@ -137,7 +134,7 @@ def _determine_local_import_names(start_dir: str) -> List[str]:
 
 @nox.session
 def lint(session: nox.sessions.Session) -> None:
-    if not TEST_CONFIG['enforce_type_hints']:
+    if not TEST_CONFIG["enforce_type_hints"]:
         session.install("flake8", "flake8-import-order")
     else:
         session.install("flake8", "flake8-import-order", "flake8-annotations")
@@ -146,9 +143,11 @@ def lint(session: nox.sessions.Session) -> None:
     args = FLAKE8_COMMON_ARGS + [
         "--application-import-names",
         ",".join(local_names),
-        "."
+        ".",
     ]
     session.run("flake8", *args)
+
+
 #
 # Black
 #
@@ -161,6 +160,7 @@ def blacken(session: nox.sessions.Session) -> None:
 
     session.run("black", *python_files)
 
+
 #
 # Sample Tests
 #
@@ -169,7 +169,9 @@ def blacken(session: nox.sessions.Session) -> None:
 PYTEST_COMMON_ARGS = ["--junitxml=sponge_log.xml"]
 
 
-def _session_tests(session: nox.sessions.Session, post_install: Callable = None) -> None:
+def _session_tests(
+    session: nox.sessions.Session, post_install: Callable = None
+) -> None:
     """Runs py.test for a particular project."""
     if os.path.exists("requirements.txt"):
         session.install("-r", "requirements.txt")
@@ -200,9 +202,9 @@ def py(session: nox.sessions.Session) -> None:
     if session.python in TESTED_VERSIONS:
         _session_tests(session)
     else:
-        session.skip("SKIPPED: {} tests are disabled for this sample.".format(
-            session.python
-        ))
+        session.skip(
+            "SKIPPED: {} tests are disabled for this sample.".format(session.python)
+        )
 
 
 #
diff --git a/speech/snippets/speech_model_adaptation_beta.py b/speech/snippets/speech_model_adaptation_beta.py
new file mode 100644
index 000000000000..f821ae467868
--- /dev/null
+++ b/speech/snippets/speech_model_adaptation_beta.py
@@ -0,0 +1,93 @@
+# Copyright 2021 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+# [START speech_transcribe_with_model_adaptation]
+
+from google.cloud import speech_v1p1beta1 as speech
+
+
+def transcribe_with_model_adaptation(
+    project_id, location, storage_uri, custom_class_id, phrase_set_id
+):
+
+    """
+    Create`PhraseSet` and `CustomClasses` to create custom lists of similar
+    items that are likely to occur in your input data.
+    """
+
+    # Create the adaptation client
+    adaptation_client = speech.AdaptationClient()
+
+    # The parent resource where the custom class and phrase set will be created.
+    parent = f"projects/{project_id}/locations/{location}"
+
+    # Create the custom class
+    custom_class_response = adaptation_client.create_custom_class(
+        {
+            "parent": parent,
+            "custom_class_id": custom_class_id,
+            "custom_class": {
+                "items": [
+                    {"value": "sushido"},
+                    {"value": "altura"},
+                    {"value": "taneda"},
+                ]
+            },
+        }
+    )
+
+    # Create the phrase set
+    phrase_set_response = adaptation_client.create_phrase_set(
+        {
+            "parent": parent,
+            "phrase_set_id": phrase_set_id,
+            "phrase_set": {
+                "boost": 10,
+                "phrases": [{"value": f"Visit restaurants like ${custom_class_id}"}],
+            },
+        }
+    )
+
+    # The next section shows how to use the newly created custom
+    # class and phrase set to send a transcription request with speech adaptation
+
+    # Speech adaptation configuration
+    speech_adaptation = speech.SpeechAdaptation(
+        phrase_sets=[phrase_set_response], custom_classes=[custom_class_response]
+    )
+
+    # speech configuration object
+    config = speech.RecognitionConfig(
+        encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
+        sample_rate_hertz=24000,
+        language_code="en-US",
+        adaptation=speech_adaptation,
+    )
+
+    # The name of the audio file to transcribe
+    # storage_uri URI for audio file in Cloud Storage, e.g. gs://[BUCKET]/[FILE]
+
+    audio = speech.RecognitionAudio(uri=storage_uri)
+
+    # Create the speech client
+    speech_client = speech.SpeechClient()
+
+    response = speech_client.recognize(config=config, audio=audio)
+
+    for result in response.results:
+        print("Transcript: {}".format(result.alternatives[0].transcript))
+
+    # [END speech_transcribe_with_model_adaptation]
+    return response.results[0].alternatives[0].transcript
diff --git a/speech/snippets/speech_model_adaptation_beta_test.py b/speech/snippets/speech_model_adaptation_beta_test.py
new file mode 100644
index 000000000000..437487a8cd39
--- /dev/null
+++ b/speech/snippets/speech_model_adaptation_beta_test.py
@@ -0,0 +1,60 @@
+# Copyright 2021 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import uuid
+
+import google.auth
+
+from google.cloud import speech_v1p1beta1 as speech
+
+import pytest
+
+import speech_model_adaptation_beta
+
+
+STORAGE_URI = "gs://cloud-samples-data/speech/brooklyn_bridge.raw"
+_, PROJECT_ID = google.auth.default()
+LOCATION = "us-west1"
+client = speech.AdaptationClient()
+
+
+def test_model_adaptation_beta(custom_class_id, phrase_set_id, capsys):
+    class_id = custom_class_id
+    phrase_id = phrase_set_id
+    transcript = speech_model_adaptation_beta.transcribe_with_model_adaptation(
+        PROJECT_ID, LOCATION, STORAGE_URI, class_id, phrase_id
+    )
+    assert "how long is the Brooklyn Bridge" in transcript
+
+
+@pytest.fixture
+def custom_class_id():
+    custom_class_id = f"customClassId{uuid.uuid4()}"
+    yield custom_class_id
+    # clean up resources
+    CLASS_PARENT = (
+        f"projects/{PROJECT_ID}/locations/{LOCATION}/customClasses/{custom_class_id}"
+    )
+    client.delete_custom_class(name=CLASS_PARENT)
+
+
+@pytest.fixture
+def phrase_set_id():
+    phrase_set_id = f"phraseSetId{uuid.uuid4()}"
+    yield phrase_set_id
+    # clean up resources
+    PHRASE_PARENT = (
+        f"projects/{PROJECT_ID}/locations/{LOCATION}/phraseSets/{phrase_set_id}"
+    )
+    client.delete_phrase_set(name=PHRASE_PARENT)
diff --git a/speech/snippets/transcribe_async.py b/speech/snippets/transcribe_async.py
index b98d5516880d..5ab85d624883 100644
--- a/speech/snippets/transcribe_async.py
+++ b/speech/snippets/transcribe_async.py
@@ -16,7 +16,6 @@
 
 """Google Cloud Speech API sample application using the REST API for async
 batch processing.
-
 Example usage:
     python transcribe_async.py resources/audio.raw
     python transcribe_async.py gs://cloud-samples-tests/speech/vr.flac