Merge pull request #10 from ehennenfent/phrase_timeout

Specify Phrase Timeout
ehennenfent · Jan 27, 2024 · b947674 · b947674
2 parents 127e384 + 3a9ec57
commit b947674
Show file tree

Hide file tree

Showing 5 changed files with 19 additions and 9 deletions.
diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml
@@ -27,9 +27,9 @@ jobs:
         sudo apt install portaudio19-dev
         pip install .[dev]
     - name: Lint imports
-      run: isort .
+      run: isort --check .
     - name: Lint formatting
-      run: black .
+      run: black --check .
     - name: Lint semantics
       run: ruff .
     - name: Lint types

diff --git a/live_illustrate/__main__.py b/live_illustrate/__main__.py
@@ -34,6 +34,13 @@ def get_args() -> argparse.Namespace:
         type=float,
         help="How frequently to summarize the conversation and generate an image",
     )
+    parser.add_argument(
+        "--phrase_timeout",
+        default=0.75,
+        type=float,
+        help="Period of time after which to force transcription, even without a pause. "
+        "Specified as a fraction of wait_minutes",
+    )
     parser.add_argument(
         "--max_context",
         default=2000,  # very roughly ten minutes or so?
@@ -111,7 +118,7 @@ def main() -> None:
     logging.getLogger("werkzeug").setLevel(logging.INFO if args.verbose > 0 else logging.WARNING)  # flask
 
     # create each of our thread objects with the apppropriate command line args
-    transcriber = AudioTranscriber(model=args.audio_model)
+    transcriber = AudioTranscriber(model=args.audio_model, phrase_timeout=args.wait_minutes * args.phrase_timeout)
     buffer = TextBuffer(
         wait_minutes=args.wait_minutes, max_context=args.max_context, persistence=args.persistence_of_memory
     )

diff --git a/live_illustrate/summarize.py b/live_illustrate/summarize.py
@@ -35,9 +35,11 @@ def work(self, transcription: Transcription) -> Summary | None:
         self.logger.info("Summarized %d tokens in %s", token_count, datetime.now() - start)
         if response.choices:
             return [
-                Summary.from_transcription(transcription, content.strip())
-                if (content := choice.message.content)
-                else None
+                (
+                    Summary.from_transcription(transcription, content.strip())
+                    if (content := choice.message.content)
+                    else None
+                )
                 for choice in response.choices
             ][-1]
         return None
diff --git a/live_illustrate/transcribe.py b/live_illustrate/transcribe.py
@@ -11,12 +11,13 @@
 
 
 class AudioTranscriber(AsyncThread):
-    def __init__(self, model: str) -> None:
+    def __init__(self, model: str, phrase_timeout: float) -> None:
         super().__init__("AudioTranscriber")
 
         self.recorder = sr.Recognizer()
         self.source = sr.Microphone(sample_rate=SAMPLE_RATE)
         self.model = model
+        self.phrase_timeout = int(phrase_timeout * 60)
 
         self.recorder.dynamic_energy_threshold = DYNAMIC_ENERGY_THRESHOLD
 
@@ -29,6 +30,6 @@ def start(self, callback: t.Callable[[str], None]) -> None:
             self.recorder.adjust_for_ambient_noise(self.source)
         # This creates a separate thread for the audio recording,
         # but it's non-blocking, so we just let it live here
-        self.recorder.listen_in_background(self.source, self.send)
+        self.recorder.listen_in_background(self.source, self.send, phrase_time_limit=self.phrase_timeout)
 
         super().start(callback)
diff --git a/pyproject.toml b/pyproject.toml
@@ -36,7 +36,7 @@ dependencies = [
 
 [project.optional-dependencies] # Optional
 dev = [
-    "black", 
+    "black>=24.0,<25.0", 
     "isort", 
     "mypy",
     "ruff",