Skip to content

Commit

Permalink
Merge pull request #343 from GoogleCloudPlatform/speech-streaming
Browse files Browse the repository at this point in the history
Fix speech streaming sample & test
  • Loading branch information
jerjou committed May 10, 2016
2 parents 2a813b9 + 5605ac5 commit d5cffb7
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 7 deletions.
10 changes: 10 additions & 0 deletions speech/api/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,16 @@ for more information.
$ pip install -r requirements-speech_streaming.txt
```
The sample uses the [PyAudio][pyaudio] library to stream audio from your
computer's microphone. PyAudio depends on [PortAudio][portaudio], which may
need to be compiled when you install PyAudio. If you run into compilation
issues that mention PortAudio, you may have to [install some
dependencies][pyaudio-install].

[pyaudio]: https://people.csail.mit.edu/hubert/pyaudio/
[portaudio]: http://www.portaudio.com/
[pyaudio-install]: https://people.csail.mit.edu/hubert/pyaudio/#downloads

## Run the example

* To run the `speech_rest.py` sample:
Expand Down
21 changes: 16 additions & 5 deletions speech/api/speech_streaming.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#!/usr/bin/python

import contextlib
import re
import threading

from gcloud.credentials import get_credentials
Expand Down Expand Up @@ -70,16 +71,27 @@ def request_stream(stop_audio, channels=CHANNELS, rate=RATE, chunk=CHUNK):
# The initial request must contain metadata about the stream, so the
# server knows how to interpret it.
metadata = InitialRecognizeRequest(
encoding='LINEAR16', sample_rate=rate)
audio_request = AudioRequest(content=audio_stream.read(chunk))
encoding='LINEAR16', sample_rate=rate,
# Note that setting interim_results to True means that you'll
# likely get multiple results for the same bit of audio, as the
# system re-interprets audio in the context of subsequent audio.
# However, this will give us quick results without having to tell
# the server when to finalize a piece of audio.
interim_results=True, continuous=False,
)
data = audio_stream.read(chunk)
audio_request = AudioRequest(content=data)

yield RecognizeRequest(
initial_request=metadata,
audio_request=audio_request)

while not stop_audio.is_set():
data = audio_stream.read(chunk)
if not data:
raise StopIteration()
# Subsequent requests can all just have the content
audio_request = AudioRequest(content=audio_stream.read(chunk))
audio_request = AudioRequest(content=data)

yield RecognizeRequest(audio_request=audio_request)

Expand All @@ -95,8 +107,7 @@ def listen_print_loop(recognize_stream):

# Exit recognition if any of the transcribed phrases could be
# one of our keywords.
if any(alt.confidence > .5 and
(alt.transcript.strip() in ('exit', 'quit'))
if any(re.search(r'\b(exit|quit)\b', alt.transcript)
for result in resp.results
for alt in result.alternatives):
print('Exiting..')
Expand Down
6 changes: 4 additions & 2 deletions speech/api/speech_streaming_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@
import io
import re
import sys
import time

from gcp.testing.flaky import flaky
import pytest

import speech_streaming
Expand All @@ -39,6 +39,9 @@ def __call__(self, *args):
return self

def read(self, num_frames):
# Approximate realtime by sleeping for the appropriate time for the
# requested number of frames
time.sleep(num_frames / float(speech_streaming.RATE))
# audio is 16-bit samples, whereas python byte is 8-bit
num_bytes = 2 * num_frames
chunk = self.audio_file.read(num_bytes) or self.silence.read(num_bytes)
Expand All @@ -54,7 +57,6 @@ def mock_audio_stream(channels, rate, chunk):
return mock_audio_stream


@flaky
@pytest.mark.skipif(
sys.version_info >= (3, 0),
reason=("grpc doesn't yet support python3 "
Expand Down

0 comments on commit d5cffb7

Please sign in to comment.