Minor improvement on ContextReviewer prompt.

zh-plus · Jun 24, 2024 · 97cd6ac · 97cd6ac
1 parent e126c38
commit 97cd6ac
Show file tree

Hide file tree

Showing 5 changed files with 45 additions and 25 deletions.
diff --git a/openlrc/agents.py b/openlrc/agents.py
@@ -33,7 +33,7 @@ class ChunkedTranslatorAgent(Agent):
     TEMPERATURE = 1.0
 
     def __init__(self, src_lang, target_lang, info: TranslateInfo = TranslateInfo(),
-                 chatbot_model: str = 'gpt-3.5-turbo', fee_limit: float = 0.25, proxy: str = None,
+                 chatbot_model: str = 'gpt-3.5-turbo', fee_limit: float = 0.3, proxy: str = None,
                  base_url_config: Optional[dict] = None):
         super().__init__()
         self.chatbot_model = chatbot_model
@@ -111,7 +111,7 @@ class ContextReviewerAgent(Agent):
 
     def __init__(self, src_lang, target_lang, info: TranslateInfo = TranslateInfo(),
                  chatbot_model: str = 'gpt-3.5-turbo', retry_model=None,
-                 fee_limit: float = 0.25, proxy: str = None,
+                 fee_limit: float = 0.3, proxy: str = None,
                  base_url_config: Optional[dict] = None):
         super().__init__()
         self.src_lang = src_lang
@@ -146,13 +146,15 @@ def build_context(self, texts, title='', glossary: Optional[dict] = None) -> str
         resp = self.chatbot.message(messages_list, output_checker=self.prompter.check_format)[0]
         context = self.chatbot.get_content(resp)
 
+        context_pool = [context]
         # Validate
         if not self._validate_context(context):
             validated = False
             if self.retry_chatbot:
                 logger.info(f'Failed to validate the context using {self.chatbot}, retrying with {self.retry_chatbot}')
                 resp = self.retry_chatbot.message(messages_list, output_checker=self.validate_prompter.check_format)[0]
                 context = self.retry_chatbot.get_content(resp)
+                context_pool.append(context)
                 if self._validate_context(context):
                     validated = True
                 else:
@@ -163,12 +165,16 @@ def build_context(self, texts, title='', glossary: Optional[dict] = None) -> str
                     logger.warning(f'Retry to generate the context using {self.chatbot} at {i} reties.')
                     resp = self.chatbot.message(messages_list, output_checker=self.validate_prompter.check_format)[0]
                     context = self.chatbot.get_content(resp)
+                    context_pool.append(context)
                     if self._validate_context(context):
                         validated = True
                         break
 
             if not validated:
-                logger.warning(f'Finally failed to validate the context: {context}, check the context manually.')
+                logger.warning(
+                    f'Finally failed to validate the context: {context}, you may check the context manually.')
+                context = max(context_pool, key=len)
+                logger.info(f'Now using the longest context: {context}')
 
         return context
 
@@ -180,7 +186,7 @@ class ProofreaderAgent(Agent):
     TEMPERATURE = 0.8
 
     def __init__(self, src_lang, target_lang, info: TranslateInfo = TranslateInfo(),
-                 chatbot_model: str = 'gpt-3.5-turbo', fee_limit: float = 0.25, proxy: str = None,
+                 chatbot_model: str = 'gpt-3.5-turbo', fee_limit: float = 0.3, proxy: str = None,
                  base_url_config: Optional[dict] = None):
         super().__init__()
         self.src_lang = src_lang

diff --git a/openlrc/chatbot.py b/openlrc/chatbot.py
@@ -65,7 +65,7 @@ def route_chatbot(model):
 class ChatBot:
     pricing = None
 
-    def __init__(self, pricing, temperature=1, top_p=1, retry=8, max_async=16, fee_limit=0.25):
+    def __init__(self, pricing, temperature=1, top_p=1, retry=8, max_async=16, fee_limit=0.3):
         self.pricing = pricing
         self._model = None
 
@@ -254,7 +254,7 @@ class ClaudeBot(ChatBot):
         'claude-3-5-sonnet-20240620': (3, 15),
     }
 
-    def __init__(self, model='claude-3-sonnet-20240229', temperature=1, top_p=1, retry=8, max_async=16, fee_limit=0.25,
+    def __init__(self, model='claude-3-sonnet-20240229', temperature=1, top_p=1, retry=8, max_async=16, fee_limit=0.3,
                  proxy=None, base_url_config=None):
 
         # clamp temperature to 0-1

diff --git a/openlrc/openlrc.py b/openlrc/openlrc.py
@@ -52,7 +52,7 @@ class LRCer:
     """
 
     def __init__(self, whisper_model='large-v3', compute_type='float16', device='cuda',
-                 chatbot_model: str = 'gpt-3.5-turbo', fee_limit=0.25, consumer_thread=4, asr_options=None,
+                 chatbot_model: str = 'gpt-3.5-turbo', fee_limit=0.3, consumer_thread=4, asr_options=None,
                  vad_options=None, preprocess_options=None, proxy=None, base_url_config=None,
                  glossary: Union[dict, str, Path] = None, retry_model=None):
         self.chatbot_model = chatbot_model

diff --git a/openlrc/prompter.py b/openlrc/prompter.py
@@ -191,8 +191,7 @@ def __init__(self, src_lang, target_lang):
         self.target_lang_display = Language.get(target_lang).display_name('en')
 
     def system(self):
-        return f'''Context:
-You are a context reviewer responsible for ensuring the consistency and accuracy of translations between two languages. Your task involves reviewing and providing necessary contextual information for translations.
+        return f'''You are a context reviewer responsible for ensuring the consistency and accuracy of translations between two languages. Your task involves reviewing and providing necessary contextual information for translations.
 
 Objective:
 1. Build a comprehensive glossary of key terms and phrases used in the {self.src_lang_display} to {self.target_lang_display} translations. The glossary should include technical terms, slang, and culturally specific references that need consistent translation or localization, focusing on terms that may cause confusion or inconsistency.
@@ -202,7 +201,7 @@ def system(self):
 5. Identify the target audience for the subtitles, considering factors such as age, cultural background, and language proficiency, and provide insights on how to tailor the subtitles accordingly.
 
 Style:
-Formal and professional, with clear and precise language suitable for translation and localization contexts.
+Formal and professional, with clear and precise language suitable for translation and localization contexts. Be concise and informative in your instructions.
 
 Tone:
 Informative and authoritative to ensure clarity and reliability in the instructions.
@@ -211,8 +210,9 @@ def system(self):
 Translators, localization specialists, and proofreaders who need a detailed and consistent reference document for subtitling.
 
 Response Format:
-The output should include the following sections: Glossary, Characters, Summary, Tone and Style, Target Audience.
+The output should include the following sections: Glossary, Characters, Summary, Tone and Style, Target Audience. DO NOT include any other sections in the response.
 
+<example>
 Example Input:
 Please review the following text (title: The Detectors) and provide the necessary context for the translation from English to Chinese:
 John and Sarah discuss their plan to locate a suspect, deducing that he is likely in the uptown area.
@@ -238,7 +238,16 @@ def system(self):
 
 ### Target Audience:
 The target audience is adult viewers with an interest in crime dramas. They are likely to be familiar with police procedurals and enjoy suspenseful storytelling.
-'''
+</example>
+
+Note:
+There was an issue with the previous translation. 
+
+DO NOT add the translated sample text in the response.
+DO NOT include any translation segment.
+Sample Translation is NOT required for this task.
+You should adhere to the same format as the previous response, add or delete section is not allowed.
+Remember to include the glossary, characters, summary, tone and style, and target audience sections in your response.'''
 
     def user(self, text, title='', given_glossary: Optional[dict] = None):
         glossary_text = f'Given glossary: {given_glossary}' if given_glossary else ''
@@ -293,8 +302,7 @@ def __init__(self):
 
     def system(self):
         return f'''Ignore all previous instructions.
-You are a context validator, responsible for validating the context provided by the Context Reviewer. Your role is to validate if the context is good.
-A good context should include a comprehensive glossary of key terms and phrases, character name translations, a concise story summary, tone and style guidelines, and target audience insights.
+You are a context validator responsible for verifying the context provided by the context reviewers. Your duty is to initially confirm whether these contexts meet the most basic requirements.
 Only output True/False based on the provided context.
 
 # Example 1:
@@ -330,20 +338,26 @@ def system(self):
 
 # Example 3:
 Input:
-Key points for translation:
+### Glossary:
+- obedience: 服从
+- opinions: 意见
+- treasured: 珍贵的
+
+### Characters:
+- Mistress: 女主人，主导者
+- Listener: 听众
 
-1. The opening lines are a joke, likely setting a humorous tone for the video.
-2. The main topic is about cable management in PC building.
-3. There's a trend of moving cable connectors to the back of the motherboard to reduce clutter.
-4. The speaker seems to approve of this trend.
-5. The text mentions that not everyone likes this new trend.
+### Summary:
+In "Mistress and Listener," a powerful sorceress named Elara and a perceptive bard named Kael join forces to decipher a prophecy that threatens Elara's future, uncovering dark secrets and facing formidable adversaries along the way. Their journey transforms their lives, forging a deep bond and revealing the true extent of their powers.
 
-When translating, maintain the casual, slightly humorous tone of the original text. Technical terms like "PC hardware," "gaming rigs," and "motherboard" should be translated using their standard Chinese equivalents. The joke at the beginning should be translated in a way that preserves the humor if possible, but cultural adaptation may be necessary.
+### Tone and Style:
+The tone of "Mistress and Listener" is dark and mysterious, filled with suspense. The style is richly descriptive and immersive, blending fantasy with deep character exploration.
 
-Output:
-False
+### Target Audience:
+The target audience is young adults and adults who enjoy dark fantasy, those who enjoy themes of hypnosis, submission. The content is explicitly sexual and intended for mature listeners only.
 
-'''
+Output:
+True'''
 
     def user(self, context):
         return f'''Input:\n{context}\nOutput:'''
diff --git a/openlrc/translate.py b/openlrc/translate.py
@@ -29,7 +29,7 @@ def translate(self, texts: Union[str, List[str]], src_lang: str, target_lang: st
 class LLMTranslator(Translator):
     CHUNK_SIZE = 30
 
-    def __init__(self, chatbot_model: str = 'gpt-3.5-turbo', fee_limit: float = 0.25, chunk_size: int = CHUNK_SIZE,
+    def __init__(self, chatbot_model: str = 'gpt-3.5-turbo', fee_limit: float = 0.3, chunk_size: int = CHUNK_SIZE,
                  intercept_line: Optional[int] = None, proxy: Optional[str] = None,
                  base_url_config: Optional[dict] = None,
                  retry_model: Optional[str] = None):