diff --git a/packages/google-cloud-python-speech/samples/snippets/profanity_filter.py b/packages/google-cloud-python-speech/samples/snippets/profanity_filter.py new file mode 100644 index 000000000000..ca8a9a97af00 --- /dev/null +++ b/packages/google-cloud-python-speech/samples/snippets/profanity_filter.py @@ -0,0 +1,55 @@ +# Copyright 2020 Google LLC +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" Google Cloud Speech API sample application using the REST API for batch +processing. + +Example usage: + python transcribe.py gs://cloud-samples-tests/speech/brooklyn.flac +""" + +import argparse + + +# [START speech_recognize_with_profanity_filter_gcs] +def sync_recognize_with_profanity_filter_gcs(storage_uri): + + from google.cloud import speech + + client = speech.SpeechClient() + + audio = {"uri": storage_uri} + + config = speech.RecognitionConfig( + encoding=speech.RecognitionConfig.AudioEncoding.FLAC, + sample_rate_hertz=16000, + language_code="en-US", + profanity_filter=True, + ) + + response = client.recognize(config=config, audio=audio) + + for i, result in enumerate(response.results): + alternative = result.alternatives[0] + print(u"Transcript: {}".format(alternative.transcript)) + + +# [END speech_recognize_with_profanity_filter_gcs] + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter + ) + parser.add_argument("path", help="GCS path for audio file to be recognized") + args = parser.parse_args() + sync_recognize_with_profanity_filter_gcs(args.path) diff --git a/packages/google-cloud-python-speech/samples/snippets/profanity_filter_test.py b/packages/google-cloud-python-speech/samples/snippets/profanity_filter_test.py new file mode 100644 index 000000000000..0176c3b3602a --- /dev/null +++ b/packages/google-cloud-python-speech/samples/snippets/profanity_filter_test.py @@ -0,0 +1,24 @@ +# Copyright 2020 Google LLC +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import re + +import profanity_filter + + +def test_profanity_filter(capsys): + profanity_filter.sync_recognize_with_profanity_filter_gcs( + "gs://cloud-samples-tests/speech/brooklyn.flac" + ) + out, err = capsys.readouterr() + assert re.search(r"how old is the Brooklyn Bridge", out, re.DOTALL | re.I) diff --git a/packages/google-cloud-python-speech/samples/snippets/quickstart.py b/packages/google-cloud-python-speech/samples/snippets/quickstart.py index d050c68585d2..748598860c50 100644 --- a/packages/google-cloud-python-speech/samples/snippets/quickstart.py +++ b/packages/google-cloud-python-speech/samples/snippets/quickstart.py @@ -17,8 +17,6 @@ def run_quickstart(): # [START speech_quickstart] - import io - import os # Imports the Google Cloud client library # [START speech_python_migration_imports] @@ -32,12 +30,9 @@ def run_quickstart(): # [END speech_python_migration_client] # The name of the audio file to transcribe - file_name = os.path.join(os.path.dirname(__file__), "resources", "audio.raw") + gcs_uri = "gs://cloud-samples-data/speech/brooklyn_bridge.raw" - # Loads the audio into memory - with io.open(file_name, "rb") as audio_file: - content = audio_file.read() - audio = speech.RecognitionAudio(content=content) + audio = speech.RecognitionAudio(uri=gcs_uri) config = speech.RecognitionConfig( encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16, diff --git a/packages/google-cloud-python-speech/samples/snippets/transcribe_async.py b/packages/google-cloud-python-speech/samples/snippets/transcribe_async.py index 51d5a1328553..b3a0fc342faf 100644 --- a/packages/google-cloud-python-speech/samples/snippets/transcribe_async.py +++ b/packages/google-cloud-python-speech/samples/snippets/transcribe_async.py @@ -38,7 +38,7 @@ def transcribe_file(speech_file): content = audio_file.read() """ - Note that transcription is limited to 60 seconds audio. + Note that transcription is limited to a 60 seconds audio file. Use a GCS file for audio longer than 1 minute. """ audio = speech.RecognitionAudio(content=content)