Skip to content

Commit

Permalink
Merge pull request #25 from video-db/ankit/add-transcript-params
Browse files Browse the repository at this point in the history
Ankit/add transcript params
  • Loading branch information
ankit-v2-3 authored Aug 8, 2024
2 parents b947eff + c32c144 commit f9963ca
Show file tree
Hide file tree
Showing 4 changed files with 55 additions and 8 deletions.
2 changes: 1 addition & 1 deletion videodb/__about__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
""" About information for videodb sdk"""


__version__ = "0.2.3"
__version__ = "0.2.4"
__title__ = "videodb"
__author__ = "videodb"
__email__ = "[email protected]"
Expand Down
2 changes: 2 additions & 0 deletions videodb/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
SceneExtractionType,
MediaType,
SearchType,
Segmenter,
SubtitleAlignment,
SubtitleBorderStyle,
SubtitleStyle,
Expand Down Expand Up @@ -41,6 +42,7 @@
"SubtitleStyle",
"TextStyle",
"SceneExtractionType",
"Segmenter",
]


Expand Down
6 changes: 6 additions & 0 deletions videodb/_constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,12 @@ class SemanticSearchDefaultValues:
score_threshold = 0.2


class Segmenter:
time = "time"
word = "word"
sentence = "sentence"


class ApiPath:
collection = "collection"
upload = "upload"
Expand Down
53 changes: 46 additions & 7 deletions videodb/video.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
IndexType,
SceneExtractionType,
SearchType,
Segmenter,
SubtitleStyle,
Workflows,
)
Expand Down Expand Up @@ -124,23 +125,61 @@ def get_thumbnails(self) -> List[Image]:
)
return [Image(self._connection, **thumbnail) for thumbnail in thumbnails_data]

def _fetch_transcript(self, force: bool = False) -> None:
if self.transcript and not force:
def _fetch_transcript(
self,
start: int = None,
end: int = None,
segmenter: str = Segmenter.word,
length: int = 1,
force: bool = None,
) -> None:
if (
self.transcript
and not start
and not end
and not segmenter
and not length
and not force
):
return
transcript_data = self._connection.get(
path=f"{ApiPath.video}/{self.id}/{ApiPath.transcription}",
params={"force": "true" if force else "false"},
params={
"start": start,
"end": end,
"segmenter": segmenter,
"length": length,
"force": "true" if force else "false",
},
show_progress=True,
)
self.transcript = transcript_data.get("word_timestamps", [])
self.transcript_text = transcript_data.get("text", "")

def get_transcript(self, force: bool = False) -> List[Dict]:
self._fetch_transcript(force)
def get_transcript(
self,
start: int = None,
end: int = None,
segmenter: str = Segmenter.word,
length: int = 1,
force: bool = None,
) -> List[Dict]:
self._fetch_transcript(
start=start, end=end, segmenter=segmenter, length=length, force=force
)
return self.transcript

def get_transcript_text(self, force: bool = False) -> str:
self._fetch_transcript(force)
def get_transcript_text(
self,
start: int = None,
end: int = None,
segmenter: str = Segmenter.word,
length: int = 1,
force: bool = None,
) -> str:
self._fetch_transcript(
start=start, end=end, segmenter=segmenter, length=length, force=force
)
return self.transcript_text

def index_spoken_words(
Expand Down

0 comments on commit f9963ca

Please sign in to comment.