Skip to content

Commit

Permalink
Merge pull request #10 from ehennenfent/phrase_timeout
Browse files Browse the repository at this point in the history
Specify Phrase Timeout
  • Loading branch information
ehennenfent authored Jan 27, 2024
2 parents 127e384 + 3a9ec57 commit b947674
Show file tree
Hide file tree
Showing 5 changed files with 19 additions and 9 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/linting.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,9 @@ jobs:
sudo apt install portaudio19-dev
pip install .[dev]
- name: Lint imports
run: isort .
run: isort --check .
- name: Lint formatting
run: black .
run: black --check .
- name: Lint semantics
run: ruff .
- name: Lint types
Expand Down
9 changes: 8 additions & 1 deletion live_illustrate/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,13 @@ def get_args() -> argparse.Namespace:
type=float,
help="How frequently to summarize the conversation and generate an image",
)
parser.add_argument(
"--phrase_timeout",
default=0.75,
type=float,
help="Period of time after which to force transcription, even without a pause. "
"Specified as a fraction of wait_minutes",
)
parser.add_argument(
"--max_context",
default=2000, # very roughly ten minutes or so?
Expand Down Expand Up @@ -111,7 +118,7 @@ def main() -> None:
logging.getLogger("werkzeug").setLevel(logging.INFO if args.verbose > 0 else logging.WARNING) # flask

# create each of our thread objects with the apppropriate command line args
transcriber = AudioTranscriber(model=args.audio_model)
transcriber = AudioTranscriber(model=args.audio_model, phrase_timeout=args.wait_minutes * args.phrase_timeout)
buffer = TextBuffer(
wait_minutes=args.wait_minutes, max_context=args.max_context, persistence=args.persistence_of_memory
)
Expand Down
8 changes: 5 additions & 3 deletions live_illustrate/summarize.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,11 @@ def work(self, transcription: Transcription) -> Summary | None:
self.logger.info("Summarized %d tokens in %s", token_count, datetime.now() - start)
if response.choices:
return [
Summary.from_transcription(transcription, content.strip())
if (content := choice.message.content)
else None
(
Summary.from_transcription(transcription, content.strip())
if (content := choice.message.content)
else None
)
for choice in response.choices
][-1]
return None
5 changes: 3 additions & 2 deletions live_illustrate/transcribe.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,13 @@


class AudioTranscriber(AsyncThread):
def __init__(self, model: str) -> None:
def __init__(self, model: str, phrase_timeout: float) -> None:
super().__init__("AudioTranscriber")

self.recorder = sr.Recognizer()
self.source = sr.Microphone(sample_rate=SAMPLE_RATE)
self.model = model
self.phrase_timeout = int(phrase_timeout * 60)

self.recorder.dynamic_energy_threshold = DYNAMIC_ENERGY_THRESHOLD

Expand All @@ -29,6 +30,6 @@ def start(self, callback: t.Callable[[str], None]) -> None:
self.recorder.adjust_for_ambient_noise(self.source)
# This creates a separate thread for the audio recording,
# but it's non-blocking, so we just let it live here
self.recorder.listen_in_background(self.source, self.send)
self.recorder.listen_in_background(self.source, self.send, phrase_time_limit=self.phrase_timeout)

super().start(callback)
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ dependencies = [

[project.optional-dependencies] # Optional
dev = [
"black",
"black>=24.0,<25.0",
"isort",
"mypy",
"ruff",
Expand Down

0 comments on commit b947674

Please sign in to comment.