build: Upgrade openai-whisper and re-introduce audio extra (#6319)

* upgrade openai-whisper and re-introduce audio extra * add audio extra to
deepset-ai · Nov 16, 2023 · 34ecff1 · 34ecff1
1 parent 8b092a9
commit 34ecff1
Show file tree

Hide file tree

Showing 8 changed files with 19 additions and 27 deletions.
diff --git a/.github/workflows/e2e_preview.yml b/.github/workflows/e2e_preview.yml
@@ -36,9 +36,7 @@ jobs:
         sudo apt install ffmpeg  # for local Whisper tests
 
     - name: Install Haystack
-      run: |
-          pip install .[dev,preview] langdetect transformers[torch,sentencepiece]==4.34.1 'sentence-transformers>=2.2.0' pypdf tika 'azure-ai-formrecognizer>=3.2.0b2'
-          pip install --no-deps llvmlite numba 'openai-whisper>=20230918'  # prevent outdated version of tiktoken pinned by openai-whisper
+      run: pip install .[dev,preview,audio] langdetect transformers[torch,sentencepiece]==4.34.1 'sentence-transformers>=2.2.0' pypdf tika 'azure-ai-formrecognizer>=3.2.0b2'
 
     - name: Run tests
       run: pytest e2e/preview
diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml
@@ -41,7 +41,6 @@ jobs:
       - name: Install Haystack
         run: |
           pip install ".[all,dev]"
-          pip install --no-deps llvmlite numba "openai-whisper>=20230918"
 
       - name: Mypy
         if: steps.files.outputs.any_changed == 'true'
@@ -76,7 +75,6 @@ jobs:
       - name: Install Haystack
         run: |
           pip install ".[all,dev]"
-          pip install --no-deps llvmlite numba "openai-whisper>=20230918"
           pip install ./haystack-linter
 
       - name: Pylint

diff --git a/.github/workflows/linting_preview.yml b/.github/workflows/linting_preview.yml
@@ -38,9 +38,7 @@ jobs:
           python-version: ${{ env.PYTHON_VERSION }}
 
       - name: Install Haystack
-        run: |
-          pip install .[dev,preview] langdetect transformers[torch,sentencepiece]==4.34.1 'sentence-transformers>=2.2.0' pypdf tika 'azure-ai-formrecognizer>=3.2.0b2'
-          pip install --no-deps llvmlite numba 'openai-whisper>=20230918'  # prevent outdated version of tiktoken pinned by openai-whisper
+        run: pip install .[dev,preview,audio] langdetect transformers[torch,sentencepiece]==4.34.1 'sentence-transformers>=2.2.0' pypdf tika 'azure-ai-formrecognizer>=3.2.0b2'
 
       - name: Mypy
         if: steps.files.outputs.any_changed == 'true'
@@ -71,8 +69,7 @@ jobs:
 
       - name: Install Haystack
         run: |
-          pip install .[dev,preview] langdetect transformers[torch,sentencepiece]==4.34.1 'sentence-transformers>=2.2.0' pypdf markdown-it-py mdit_plain tika 'azure-ai-formrecognizer>=3.2.0b2'
-          pip install --no-deps llvmlite numba 'openai-whisper>=20230918'  # prevent outdated version of tiktoken pinned by openai-whisper
+          pip install .[dev,preview,audio] langdetect transformers[torch,sentencepiece]==4.34.1 'sentence-transformers>=2.2.0' pypdf markdown-it-py mdit_plain tika 'azure-ai-formrecognizer>=3.2.0b2'
           pip install ./haystack-linter
 
       - name: Pylint

diff --git a/.github/workflows/tests_preview.yml b/.github/workflows/tests_preview.yml
@@ -116,9 +116,7 @@ jobs:
           python-version: ${{ env.PYTHON_VERSION }}
 
       - name: Install Haystack
-        run: |
-          pip install .[dev,preview] langdetect transformers[torch,sentencepiece]==4.34.1 'sentence-transformers>=2.2.0' pypdf markdown-it-py mdit_plain tika 'azure-ai-formrecognizer>=3.2.0b2'
-          pip install --no-deps llvmlite numba 'openai-whisper>=20230918'  # prevent outdated version of tiktoken pinned by openai-whisper
+        run: pip install .[dev,preview,audio] langdetect transformers[torch,sentencepiece]==4.34.1 'sentence-transformers>=2.2.0' pypdf markdown-it-py mdit_plain tika 'azure-ai-formrecognizer>=3.2.0b2'
 
       - name: Run
         run: pytest -m "unit" test/preview
@@ -177,9 +175,7 @@ jobs:
           sudo apt install ffmpeg  # for local Whisper tests
 
       - name: Install Haystack
-        run: |
-          pip install .[dev,preview] langdetect transformers[torch,sentencepiece]==4.34.1 'sentence-transformers>=2.2.0' pypdf markdown-it-py mdit_plain tika 'azure-ai-formrecognizer>=3.2.0b2'
-          pip install --no-deps llvmlite numba 'openai-whisper>=20230918'  # prevent outdated version of tiktoken pinned by openai-whisper
+        run: pip install .[dev,preview,audio] langdetect transformers[torch,sentencepiece]==4.34.1 'sentence-transformers>=2.2.0' pypdf markdown-it-py mdit_plain tika 'azure-ai-formrecognizer>=3.2.0b2'
 
       - name: Run
         run: pytest --maxfail=5 -m "integration" test/preview
@@ -236,9 +232,7 @@ jobs:
           colima start
 
       - name: Install Haystack
-        run: |
-          pip install .[dev,preview] langdetect transformers[torch,sentencepiece]==4.34.1 'sentence-transformers>=2.2.0' pypdf markdown-it-py mdit_plain tika 'azure-ai-formrecognizer>=3.2.0b2'
-          pip install --no-deps llvmlite numba 'openai-whisper>=20230918'  # prevent outdated version of tiktoken pinned by openai-whisper
+        run: pip install .[dev,preview,audio] langdetect transformers[torch,sentencepiece]==4.34.1 'sentence-transformers>=2.2.0' pypdf markdown-it-py mdit_plain tika 'azure-ai-formrecognizer>=3.2.0b2'
 
       - name: Run Tika
         run: docker run -d -p 9998:9998 apache/tika:2.9.0.0
@@ -290,9 +284,7 @@ jobs:
           python-version: ${{ env.PYTHON_VERSION }}
 
       - name: Install Haystack
-        run: |
-          pip install .[dev,preview] langdetect transformers[torch,sentencepiece]==4.34.1 'sentence-transformers>=2.2.0' pypdf markdown-it-py mdit_plain tika 'azure-ai-formrecognizer>=3.2.0b2'
-          pip install --no-deps llvmlite numba 'openai-whisper>=20230918'  # prevent outdated version of tiktoken pinned by openai-whisper
+        run: pip install .[dev,preview,audio] langdetect transformers[torch,sentencepiece]==4.34.1 'sentence-transformers>=2.2.0' pypdf markdown-it-py mdit_plain tika 'azure-ai-formrecognizer>=3.2.0b2'
 
       - name: Run
         run: pytest --maxfail=5 -m "integration" test/preview -k 'not tika'

diff --git a/haystack/nodes/audio/whisper_transcriber.py b/haystack/nodes/audio/whisper_transcriber.py
@@ -28,8 +28,6 @@ class WhisperTranscriber(BaseComponent):
 
     To use Whisper locally, install it following the instructions on
     the Whisper [GitHub repo](https://github.com/openai/whisper) and omit the `api_key` parameter.
-    You can work around a dependency conflict caused by openai-whisper pinning an older tiktoken version than required
-    by Haystack if you install via `pip install --no-deps numba llvmlite 'openai-whisper>=20230918'`.
 
     To use the API implementation, provide an api_key. You can get one by signing up
     for an [OpenAI account](https://beta.openai.com/).

diff --git a/haystack/preview/components/audio/whisper_local.py b/haystack/preview/components/audio/whisper_local.py
@@ -8,7 +8,7 @@
 
 with LazyImport(
     "Run 'pip install transformers[torch]' to install torch and "
-    "'pip install --no-deps numba llvmlite \"openai-whisper>=20230918\"' to install whisper."
+    "'pip install \"openai-whisper>=20231106\"' to install whisper."
 ) as whisper_import:
     import torch
     import whisper

diff --git a/pyproject.toml b/pyproject.toml
@@ -234,12 +234,16 @@ formatting = [
   "black[jupyter]~=23.0",
 ]
 
+audio = [
+  "openai-whisper>=20231106"
+]
+
 all = [
-  "farm-haystack[inference,docstores,crawler,preprocessing,file-conversion,pdf,ocr,metrics,aws,preview]",
+  "farm-haystack[inference,docstores,crawler,preprocessing,file-conversion,pdf,ocr,metrics,aws,preview,audio]",
 ]
 all-gpu = [
   # beir is incompatible with faiss-gpu: https://github.com/beir-cellar/beir/issues/71
-  "farm-haystack[inference,docstores-gpu,crawler,preprocessing,file-conversion,pdf,ocr,metrics,aws,preview]",
+  "farm-haystack[inference,docstores-gpu,crawler,preprocessing,file-conversion,pdf,ocr,metrics,aws,preview,audio]",
 ]
 
 [project.scripts]

diff --git a/releasenotes/notes/simplify-whisper-installation-1e347e2527cbf913.yaml b/releasenotes/notes/simplify-whisper-installation-1e347e2527cbf913.yaml
@@ -0,0 +1,5 @@
+---
+enhancements:
+  - |
+    Upgraded openai-whisper to version 20231106 and simplified installation through re-introduced audio extra.
+    The latest openai-whisper version unpins its tiktoken dependency, which resolved a version conflict with Haystack's dependencies.