Skip to content

Commit

Permalink
Use stop sequences when building Context.
Browse files Browse the repository at this point in the history
  • Loading branch information
zh-plus committed Jun 26, 2024
1 parent 5c5f86e commit 4f247fe
Show file tree
Hide file tree
Showing 4 changed files with 15 additions and 9 deletions.
4 changes: 3 additions & 1 deletion openlrc/agents.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,9 @@ def build_context(self, texts, title='', glossary: Optional[dict] = None) -> str
{'role': 'system', 'content': self.prompter.system()},
{'role': 'user', 'content': self.prompter.user(text_content, title=title, given_glossary=glossary)},
]
resp = self.chatbot.message(messages_list, output_checker=self.prompter.check_format)[0]
resp = self.chatbot.message(
messages_list, stop_sequences=[self.prompter.stop_sequence], output_checker=self.prompter.check_format
)[0]
context = self.chatbot.get_content(resp)

context_pool = [context]
Expand Down
6 changes: 3 additions & 3 deletions openlrc/openlrc.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,7 @@ def consumer_worker(self, transcription_queue, target_lang, skip_trans, bilingua
subtitle_path = getattr(final_subtitle, f'to_{subtitle_format}')()
result_path = subtitle_path.parents[1] / subtitle_path.name.replace(f'_preprocessed.{subtitle_format}',
f'.{subtitle_format}')
shutil.copy(subtitle_path, result_path)
shutil.move(subtitle_path, result_path)

if not skip_trans and bilingual_sub:
bilingual_subtitle = BilingualSubtitle.from_preprocessed(
Expand All @@ -199,14 +199,14 @@ def consumer_worker(self, transcription_queue, target_lang, skip_trans, bilingua
# TODO: consider the edge case (audio file name contains _preprocessed)
getattr(bilingual_subtitle, f'to_{subtitle_format}')()
bilingual_lrc_path = bilingual_subtitle.filename.with_suffix(bilingual_subtitle.suffix)
shutil.copy(bilingual_lrc_path, result_path.parent / bilingual_lrc_path.name)
shutil.move(bilingual_lrc_path, result_path.parent / bilingual_lrc_path.name)

non_translated_subtitle = transcribed_opt_sub
optimizer = SubtitleOptimizer(non_translated_subtitle)
optimizer.extend_time() # Extend 0.5s like what translated do
getattr(non_translated_subtitle, f'to_{subtitle_format}')()
non_translated_lrc_path = non_translated_subtitle.filename.with_suffix(non_translated_subtitle.suffix)
shutil.copy(
shutil.move(
non_translated_lrc_path,
result_path.parent / subtitle_path.name.replace(
f'_preprocessed.{subtitle_format}',
Expand Down
8 changes: 6 additions & 2 deletions openlrc/prompter.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,8 @@ def __init__(self, src_lang, target_lang):
self.src_lang_display = Language.get(src_lang).display_name('en')
self.target_lang_display = Language.get(target_lang).display_name('en')

self.stop_sequence = '<*--END-OF-CONTEXT--*>'

def system(self):
return f'''You are a context reviewer responsible for ensuring the consistency and accuracy of translations between two languages. Your task involves reviewing and providing necessary contextual information for translations.
Expand Down Expand Up @@ -221,7 +223,6 @@ def system(self):
Then, they prepare to start their investigation.
Example Output:
### Glossary:
- suspect: 嫌疑人
- uptown: 市中心
Expand All @@ -238,6 +239,8 @@ def system(self):
### Target Audience:
The target audience is adult viewers with an interest in crime dramas. They are likely to be familiar with police procedurals and enjoy suspenseful storytelling.
{self.stop_sequence}
</example>
Note:
Expand All @@ -247,7 +250,8 @@ def system(self):
DO NOT include any translation segment.
Sample Translation is NOT required for this task.
You should adhere to the same format as the previous response, add or delete section is not allowed.
Remember to include the glossary, characters, summary, tone and style, and target audience sections in your response.'''
Remember to include the glossary, characters, summary, tone and style, and target audience sections in your response.
Remember to add {self.stop_sequence} after the generated contexts.'''

def user(self, text, title='', given_glossary: Optional[dict] = None):
glossary_text = f'Given glossary: {given_glossary}' if given_glossary else ''
Expand Down
6 changes: 3 additions & 3 deletions tests/test_validators.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,14 +53,14 @@ class TestAtomicTranslateValidator(unittest.TestCase):

def test_validate_returns_true_when_generated_content_matches_target_language(self):
validator = AtomicTranslateValidator(target_lang='en')
user_input = "Hello"
generated_content = "Hello"
user_input = "你有什么问题?"
generated_content = "What's your problem?"

result = validator.validate(user_input, generated_content)
self.assertTrue(result)

def test_validate_returns_false_when_generated_content_not_matches_target_language(self):
validator = AtomicTranslateValidator(target_lang='en')
validator = AtomicTranslateValidator(target_lang='cn-zh')
user_input = "Hello"
generated_content = "你好"

Expand Down

0 comments on commit 4f247fe

Please sign in to comment.