From f88f4d5e4cf631aa49525c380e4e92dd5860402e Mon Sep 17 00:00:00 2001 From: Ankit raj <113342181+ankit-v2-3@users.noreply.github.com> Date: Mon, 5 Aug 2024 12:47:47 +0530 Subject: [PATCH 1/2] feat: add transcript params --- videodb/video.py | 52 +++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 45 insertions(+), 7 deletions(-) diff --git a/videodb/video.py b/videodb/video.py index 3fb5da8..21a1da4 100644 --- a/videodb/video.py +++ b/videodb/video.py @@ -124,23 +124,61 @@ def get_thumbnails(self) -> List[Image]: ) return [Image(self._connection, **thumbnail) for thumbnail in thumbnails_data] - def _fetch_transcript(self, force: bool = False) -> None: - if self.transcript and not force: + def _fetch_transcript( + self, + start: int = None, + end: int = None, + segmenter: str = None, + count: int = None, + force: bool = None, + ) -> None: + if ( + self.transcript + and not start + and not end + and not segmenter + and not count + and not force + ): return transcript_data = self._connection.get( path=f"{ApiPath.video}/{self.id}/{ApiPath.transcription}", - params={"force": "true" if force else "false"}, + params={ + "start": start, + "end": end, + "segmenter": segmenter, + "count": count, + "force": "true" if force else "false", + }, show_progress=True, ) self.transcript = transcript_data.get("word_timestamps", []) self.transcript_text = transcript_data.get("text", "") - def get_transcript(self, force: bool = False) -> List[Dict]: - self._fetch_transcript(force) + def get_transcript( + self, + start: int = None, + end: int = None, + segmenter: str = None, + count: int = None, + force: bool = None, + ) -> List[Dict]: + self._fetch_transcript( + start=start, end=end, segmenter=segmenter, count=count, force=force + ) return self.transcript - def get_transcript_text(self, force: bool = False) -> str: - self._fetch_transcript(force) + def get_transcript_text( + self, + start: int = None, + end: int = None, + segmenter: str = None, + count: int = None, + force: bool = None, + ) -> str: + self._fetch_transcript( + start=start, end=end, segmenter=segmenter, count=count, force=force + ) return self.transcript_text def index_spoken_words( From c32c144c4b84814b64fc0c85cebee2f8d0bde5ee Mon Sep 17 00:00:00 2001 From: Ankit raj <113342181+ankit-v2-3@users.noreply.github.com> Date: Mon, 5 Aug 2024 15:40:23 +0530 Subject: [PATCH 2/2] fix: transcript params --- videodb/__about__.py | 2 +- videodb/__init__.py | 2 ++ videodb/_constants.py | 6 ++++++ videodb/video.py | 21 +++++++++++---------- 4 files changed, 20 insertions(+), 11 deletions(-) diff --git a/videodb/__about__.py b/videodb/__about__.py index bc4492a..8947bf1 100644 --- a/videodb/__about__.py +++ b/videodb/__about__.py @@ -1,7 +1,7 @@ """ About information for videodb sdk""" -__version__ = "0.2.3" +__version__ = "0.2.4" __title__ = "videodb" __author__ = "videodb" __email__ = "contact@videodb.io" diff --git a/videodb/__init__.py b/videodb/__init__.py index 7aa45a7..6f13816 100644 --- a/videodb/__init__.py +++ b/videodb/__init__.py @@ -11,6 +11,7 @@ SceneExtractionType, MediaType, SearchType, + Segmenter, SubtitleAlignment, SubtitleBorderStyle, SubtitleStyle, @@ -41,6 +42,7 @@ "SubtitleStyle", "TextStyle", "SceneExtractionType", + "Segmenter", ] diff --git a/videodb/_constants.py b/videodb/_constants.py index e2039b7..447f4a2 100644 --- a/videodb/_constants.py +++ b/videodb/_constants.py @@ -36,6 +36,12 @@ class SemanticSearchDefaultValues: score_threshold = 0.2 +class Segmenter: + time = "time" + word = "word" + sentence = "sentence" + + class ApiPath: collection = "collection" upload = "upload" diff --git a/videodb/video.py b/videodb/video.py index 21a1da4..e006a39 100644 --- a/videodb/video.py +++ b/videodb/video.py @@ -5,6 +5,7 @@ IndexType, SceneExtractionType, SearchType, + Segmenter, SubtitleStyle, Workflows, ) @@ -128,8 +129,8 @@ def _fetch_transcript( self, start: int = None, end: int = None, - segmenter: str = None, - count: int = None, + segmenter: str = Segmenter.word, + length: int = 1, force: bool = None, ) -> None: if ( @@ -137,7 +138,7 @@ def _fetch_transcript( and not start and not end and not segmenter - and not count + and not length and not force ): return @@ -147,7 +148,7 @@ def _fetch_transcript( "start": start, "end": end, "segmenter": segmenter, - "count": count, + "length": length, "force": "true" if force else "false", }, show_progress=True, @@ -159,12 +160,12 @@ def get_transcript( self, start: int = None, end: int = None, - segmenter: str = None, - count: int = None, + segmenter: str = Segmenter.word, + length: int = 1, force: bool = None, ) -> List[Dict]: self._fetch_transcript( - start=start, end=end, segmenter=segmenter, count=count, force=force + start=start, end=end, segmenter=segmenter, length=length, force=force ) return self.transcript @@ -172,12 +173,12 @@ def get_transcript_text( self, start: int = None, end: int = None, - segmenter: str = None, - count: int = None, + segmenter: str = Segmenter.word, + length: int = 1, force: bool = None, ) -> str: self._fetch_transcript( - start=start, end=end, segmenter=segmenter, count=count, force=force + start=start, end=end, segmenter=segmenter, length=length, force=force ) return self.transcript_text