-
Notifications
You must be signed in to change notification settings - Fork 6.4k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge remote-tracking branch 'migration/main' into python-texttospeec…
…h-migration
- Loading branch information
Showing
17 changed files
with
730 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
#!/usr/bin/env python | ||
|
||
# Copyright 2018 Google LLC. All Rights Reserved. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
"""Google Cloud Text-To-Speech API sample application for audio profile. | ||
Example usage: | ||
python audio_profile.py --text "hello" --effects_profile_id | ||
"telephony-class-application" --output "output.mp3" | ||
""" | ||
|
||
import argparse | ||
|
||
|
||
# [START tts_synthesize_text_audio_profile] | ||
# [START tts_synthesize_text_audio_profile_beta] | ||
def synthesize_text_with_audio_profile(text, output, effects_profile_id): | ||
"""Synthesizes speech from the input string of text.""" | ||
from google.cloud import texttospeech | ||
|
||
client = texttospeech.TextToSpeechClient() | ||
|
||
input_text = texttospeech.SynthesisInput(text=text) | ||
|
||
# Note: the voice can also be specified by name. | ||
# Names of voices can be retrieved with client.list_voices(). | ||
voice = texttospeech.VoiceSelectionParams(language_code="en-US") | ||
|
||
# Note: you can pass in multiple effects_profile_id. They will be applied | ||
# in the same order they are provided. | ||
audio_config = texttospeech.AudioConfig( | ||
audio_encoding=texttospeech.AudioEncoding.MP3, | ||
effects_profile_id=[effects_profile_id], | ||
) | ||
|
||
response = client.synthesize_speech( | ||
input=input_text, voice=voice, audio_config=audio_config | ||
) | ||
|
||
# The response's audio_content is binary. | ||
with open(output, "wb") as out: | ||
out.write(response.audio_content) | ||
print('Audio content written to file "%s"' % output) | ||
|
||
|
||
# [END tts_synthesize_text_audio_profile_beta] | ||
# [END tts_synthesize_text_audio_profile] | ||
|
||
|
||
if __name__ == "__main__": | ||
parser = argparse.ArgumentParser( | ||
description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter | ||
) | ||
parser.add_argument("--output", help="The output mp3 file.") | ||
parser.add_argument("--text", help="The text from which to synthesize speech.") | ||
parser.add_argument( | ||
"--effects_profile_id", help="The audio effects profile id to be applied." | ||
) | ||
|
||
args = parser.parse_args() | ||
|
||
synthesize_text_with_audio_profile(args.text, args.output, args.effects_profile_id) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
# Copyright 2018, Google, LLC. | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
import os | ||
import os.path | ||
|
||
import audio_profile | ||
|
||
TEXT = "hello" | ||
OUTPUT = "output.mp3" | ||
EFFECTS_PROFILE_ID = "telephony-class-application" | ||
|
||
|
||
def test_audio_profile(capsys): | ||
if os.path.exists(OUTPUT): | ||
os.remove(OUTPUT) | ||
assert not os.path.exists(OUTPUT) | ||
audio_profile.synthesize_text_with_audio_profile(TEXT, OUTPUT, EFFECTS_PROFILE_ID) | ||
out, err = capsys.readouterr() | ||
|
||
assert ('Audio content written to file "%s"' % OUTPUT) in out | ||
assert os.path.exists(OUTPUT) | ||
os.remove(OUTPUT) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
#!/usr/bin/env python | ||
|
||
# Copyright 2018 Google Inc. All Rights Reserved. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
"""Google Cloud Text-To-Speech API sample application. | ||
Example usage: | ||
python list_voices.py | ||
""" | ||
|
||
|
||
# [START tts_list_voices] | ||
def list_voices(): | ||
"""Lists the available voices.""" | ||
from google.cloud import texttospeech | ||
|
||
client = texttospeech.TextToSpeechClient() | ||
|
||
# Performs the list voices request | ||
voices = client.list_voices() | ||
|
||
for voice in voices.voices: | ||
# Display the voice's name. Example: tpc-vocoded | ||
print(f"Name: {voice.name}") | ||
|
||
# Display the supported language codes for this voice. Example: "en-US" | ||
for language_code in voice.language_codes: | ||
print(f"Supported language: {language_code}") | ||
|
||
ssml_gender = texttospeech.SsmlVoiceGender(voice.ssml_gender) | ||
|
||
# Display the SSML Voice Gender | ||
print(f"SSML Voice Gender: {ssml_gender.name}") | ||
|
||
# Display the natural sample rate hertz for this voice. Example: 24000 | ||
print(f"Natural Sample Rate Hertz: {voice.natural_sample_rate_hertz}\n") | ||
|
||
|
||
# [END tts_list_voices] | ||
|
||
|
||
if __name__ == "__main__": | ||
list_voices() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
# Copyright 2018, Google, Inc. | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
import list_voices | ||
|
||
|
||
def test_list_voices(capsys): | ||
list_voices.list_voices() | ||
out, err = capsys.readouterr() | ||
|
||
assert "en-US" in out | ||
assert "SSML Voice Gender: MALE" in out | ||
assert "SSML Voice Gender: FEMALE" in out |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
#!/usr/bin/env python | ||
|
||
# Copyright 2018 Google Inc. All Rights Reserved. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
"""Google Cloud Text-To-Speech API sample application . | ||
Example usage: | ||
python quickstart.py | ||
""" | ||
|
||
|
||
def run_quickstart(): | ||
# [START tts_quickstart] | ||
"""Synthesizes speech from the input string of text or ssml. | ||
Make sure to be working in a virtual environment. | ||
Note: ssml must be well-formed according to: | ||
https://www.w3.org/TR/speech-synthesis/ | ||
""" | ||
from google.cloud import texttospeech | ||
|
||
# Instantiates a client | ||
client = texttospeech.TextToSpeechClient() | ||
|
||
# Set the text input to be synthesized | ||
synthesis_input = texttospeech.SynthesisInput(text="Hello, World!") | ||
|
||
# Build the voice request, select the language code ("en-US") and the ssml | ||
# voice gender ("neutral") | ||
voice = texttospeech.VoiceSelectionParams( | ||
language_code="en-US", ssml_gender=texttospeech.SsmlVoiceGender.NEUTRAL | ||
) | ||
|
||
# Select the type of audio file you want returned | ||
audio_config = texttospeech.AudioConfig( | ||
audio_encoding=texttospeech.AudioEncoding.MP3 | ||
) | ||
|
||
# Perform the text-to-speech request on the text input with the selected | ||
# voice parameters and audio file type | ||
response = client.synthesize_speech( | ||
input=synthesis_input, voice=voice, audio_config=audio_config | ||
) | ||
|
||
# The response's audio_content is binary. | ||
with open("output.mp3", "wb") as out: | ||
# Write the response to the output file. | ||
out.write(response.audio_content) | ||
print('Audio content written to file "output.mp3"') | ||
# [END tts_quickstart] | ||
|
||
|
||
if __name__ == "__main__": | ||
run_quickstart() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
pytest==7.2.0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
future==0.18.2 | ||
google-cloud-texttospeech==2.12.3 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
<speak>123 Street Ln, Small Town, IL 12345 USA | ||
<break time="2s"/>1 Jenny St & Number St, Tutone City, CA 86753 | ||
<break time="2s"/>1 Piazza del Fibonacci, 12358 Pisa, Italy | ||
<break time="2s"/></speak> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
123 Street Ln, Small Town, IL 12345 USA | ||
1 Jenny St & Number St, Tutone City, CA 86753 | ||
1 Piazza del Fibonacci, 12358 Pisa, Italy |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
<speak>Hello there.</speak> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
Hello there! |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,119 @@ | ||
# Copyright 2019 Google LLC | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
|
||
# [START tts_ssml_address_imports] | ||
import html | ||
|
||
from google.cloud import texttospeech | ||
|
||
# [END tts_ssml_address_imports] | ||
|
||
|
||
# [START tts_ssml_address_audio] | ||
def ssml_to_audio(ssml_text, outfile): | ||
# Generates SSML text from plaintext. | ||
# | ||
# Given a string of SSML text and an output file name, this function | ||
# calls the Text-to-Speech API. The API returns a synthetic audio | ||
# version of the text, formatted according to the SSML commands. This | ||
# function saves the synthetic audio to the designated output file. | ||
# | ||
# Args: | ||
# ssml_text: string of SSML text | ||
# outfile: string name of file under which to save audio output | ||
# | ||
# Returns: | ||
# nothing | ||
|
||
# Instantiates a client | ||
client = texttospeech.TextToSpeechClient() | ||
|
||
# Sets the text input to be synthesized | ||
synthesis_input = texttospeech.SynthesisInput(ssml=ssml_text) | ||
|
||
# Builds the voice request, selects the language code ("en-US") and | ||
# the SSML voice gender ("MALE") | ||
voice = texttospeech.VoiceSelectionParams( | ||
language_code="en-US", ssml_gender=texttospeech.SsmlVoiceGender.MALE | ||
) | ||
|
||
# Selects the type of audio file to return | ||
audio_config = texttospeech.AudioConfig( | ||
audio_encoding=texttospeech.AudioEncoding.MP3 | ||
) | ||
|
||
# Performs the text-to-speech request on the text input with the selected | ||
# voice parameters and audio file type | ||
response = client.synthesize_speech( | ||
input=synthesis_input, voice=voice, audio_config=audio_config | ||
) | ||
|
||
# Writes the synthetic audio to the output file. | ||
with open(outfile, "wb") as out: | ||
out.write(response.audio_content) | ||
print("Audio content written to file " + outfile) | ||
# [END tts_ssml_address_audio] | ||
|
||
|
||
# [START tts_ssml_address_ssml] | ||
def text_to_ssml(inputfile): | ||
# Generates SSML text from plaintext. | ||
# Given an input filename, this function converts the contents of the text | ||
# file into a string of formatted SSML text. This function formats the SSML | ||
# string so that, when synthesized, the synthetic audio will pause for two | ||
# seconds between each line of the text file. This function also handles | ||
# special text characters which might interfere with SSML commands. | ||
# | ||
# Args: | ||
# inputfile: string name of plaintext file | ||
# | ||
# Returns: | ||
# A string of SSML text based on plaintext input | ||
|
||
# Parses lines of input file | ||
with open(inputfile, "r") as f: | ||
raw_lines = f.read() | ||
|
||
# Replace special characters with HTML Ampersand Character Codes | ||
# These Codes prevent the API from confusing text with | ||
# SSML commands | ||
# For example, '<' --> '<' and '&' --> '&' | ||
|
||
escaped_lines = html.escape(raw_lines) | ||
|
||
# Convert plaintext to SSML | ||
# Wait two seconds between each address | ||
ssml = "<speak>{}</speak>".format( | ||
escaped_lines.replace("\n", '\n<break time="2s"/>') | ||
) | ||
|
||
# Return the concatenated string of ssml script | ||
return ssml | ||
|
||
|
||
# [END tts_ssml_address_ssml] | ||
|
||
|
||
# [START tts_ssml_address_test] | ||
def main(): | ||
# test example address file | ||
plaintext = "resources/example.txt" | ||
ssml_text = text_to_ssml(plaintext) | ||
ssml_to_audio(ssml_text, "resources/example.mp3") | ||
# [END tts_ssml_address_test] | ||
|
||
|
||
if __name__ == "__main__": | ||
main() |
Oops, something went wrong.