Skip to content

Commit

Permalink
Minor improvement on ContextReviewer prompt.
Browse files Browse the repository at this point in the history
  • Loading branch information
zh-plus committed Jun 24, 2024
1 parent e126c38 commit 97cd6ac
Show file tree
Hide file tree
Showing 5 changed files with 45 additions and 25 deletions.
14 changes: 10 additions & 4 deletions openlrc/agents.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ class ChunkedTranslatorAgent(Agent):
TEMPERATURE = 1.0

def __init__(self, src_lang, target_lang, info: TranslateInfo = TranslateInfo(),
chatbot_model: str = 'gpt-3.5-turbo', fee_limit: float = 0.25, proxy: str = None,
chatbot_model: str = 'gpt-3.5-turbo', fee_limit: float = 0.3, proxy: str = None,
base_url_config: Optional[dict] = None):
super().__init__()
self.chatbot_model = chatbot_model
Expand Down Expand Up @@ -111,7 +111,7 @@ class ContextReviewerAgent(Agent):

def __init__(self, src_lang, target_lang, info: TranslateInfo = TranslateInfo(),
chatbot_model: str = 'gpt-3.5-turbo', retry_model=None,
fee_limit: float = 0.25, proxy: str = None,
fee_limit: float = 0.3, proxy: str = None,
base_url_config: Optional[dict] = None):
super().__init__()
self.src_lang = src_lang
Expand Down Expand Up @@ -146,13 +146,15 @@ def build_context(self, texts, title='', glossary: Optional[dict] = None) -> str
resp = self.chatbot.message(messages_list, output_checker=self.prompter.check_format)[0]
context = self.chatbot.get_content(resp)

context_pool = [context]
# Validate
if not self._validate_context(context):
validated = False
if self.retry_chatbot:
logger.info(f'Failed to validate the context using {self.chatbot}, retrying with {self.retry_chatbot}')
resp = self.retry_chatbot.message(messages_list, output_checker=self.validate_prompter.check_format)[0]
context = self.retry_chatbot.get_content(resp)
context_pool.append(context)
if self._validate_context(context):
validated = True
else:
Expand All @@ -163,12 +165,16 @@ def build_context(self, texts, title='', glossary: Optional[dict] = None) -> str
logger.warning(f'Retry to generate the context using {self.chatbot} at {i} reties.')
resp = self.chatbot.message(messages_list, output_checker=self.validate_prompter.check_format)[0]
context = self.chatbot.get_content(resp)
context_pool.append(context)
if self._validate_context(context):
validated = True
break

if not validated:
logger.warning(f'Finally failed to validate the context: {context}, check the context manually.')
logger.warning(
f'Finally failed to validate the context: {context}, you may check the context manually.')
context = max(context_pool, key=len)
logger.info(f'Now using the longest context: {context}')

return context

Expand All @@ -180,7 +186,7 @@ class ProofreaderAgent(Agent):
TEMPERATURE = 0.8

def __init__(self, src_lang, target_lang, info: TranslateInfo = TranslateInfo(),
chatbot_model: str = 'gpt-3.5-turbo', fee_limit: float = 0.25, proxy: str = None,
chatbot_model: str = 'gpt-3.5-turbo', fee_limit: float = 0.3, proxy: str = None,
base_url_config: Optional[dict] = None):
super().__init__()
self.src_lang = src_lang
Expand Down
4 changes: 2 additions & 2 deletions openlrc/chatbot.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ def route_chatbot(model):
class ChatBot:
pricing = None

def __init__(self, pricing, temperature=1, top_p=1, retry=8, max_async=16, fee_limit=0.25):
def __init__(self, pricing, temperature=1, top_p=1, retry=8, max_async=16, fee_limit=0.3):
self.pricing = pricing
self._model = None

Expand Down Expand Up @@ -254,7 +254,7 @@ class ClaudeBot(ChatBot):
'claude-3-5-sonnet-20240620': (3, 15),
}

def __init__(self, model='claude-3-sonnet-20240229', temperature=1, top_p=1, retry=8, max_async=16, fee_limit=0.25,
def __init__(self, model='claude-3-sonnet-20240229', temperature=1, top_p=1, retry=8, max_async=16, fee_limit=0.3,
proxy=None, base_url_config=None):

# clamp temperature to 0-1
Expand Down
2 changes: 1 addition & 1 deletion openlrc/openlrc.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ class LRCer:
"""

def __init__(self, whisper_model='large-v3', compute_type='float16', device='cuda',
chatbot_model: str = 'gpt-3.5-turbo', fee_limit=0.25, consumer_thread=4, asr_options=None,
chatbot_model: str = 'gpt-3.5-turbo', fee_limit=0.3, consumer_thread=4, asr_options=None,
vad_options=None, preprocess_options=None, proxy=None, base_url_config=None,
glossary: Union[dict, str, Path] = None, retry_model=None):
self.chatbot_model = chatbot_model
Expand Down
48 changes: 31 additions & 17 deletions openlrc/prompter.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,8 +191,7 @@ def __init__(self, src_lang, target_lang):
self.target_lang_display = Language.get(target_lang).display_name('en')

def system(self):
return f'''Context:
You are a context reviewer responsible for ensuring the consistency and accuracy of translations between two languages. Your task involves reviewing and providing necessary contextual information for translations.
return f'''You are a context reviewer responsible for ensuring the consistency and accuracy of translations between two languages. Your task involves reviewing and providing necessary contextual information for translations.
Objective:
1. Build a comprehensive glossary of key terms and phrases used in the {self.src_lang_display} to {self.target_lang_display} translations. The glossary should include technical terms, slang, and culturally specific references that need consistent translation or localization, focusing on terms that may cause confusion or inconsistency.
Expand All @@ -202,7 +201,7 @@ def system(self):
5. Identify the target audience for the subtitles, considering factors such as age, cultural background, and language proficiency, and provide insights on how to tailor the subtitles accordingly.
Style:
Formal and professional, with clear and precise language suitable for translation and localization contexts.
Formal and professional, with clear and precise language suitable for translation and localization contexts. Be concise and informative in your instructions.
Tone:
Informative and authoritative to ensure clarity and reliability in the instructions.
Expand All @@ -211,8 +210,9 @@ def system(self):
Translators, localization specialists, and proofreaders who need a detailed and consistent reference document for subtitling.
Response Format:
The output should include the following sections: Glossary, Characters, Summary, Tone and Style, Target Audience.
The output should include the following sections: Glossary, Characters, Summary, Tone and Style, Target Audience. DO NOT include any other sections in the response.
<example>
Example Input:
Please review the following text (title: The Detectors) and provide the necessary context for the translation from English to Chinese:
John and Sarah discuss their plan to locate a suspect, deducing that he is likely in the uptown area.
Expand All @@ -238,7 +238,16 @@ def system(self):
### Target Audience:
The target audience is adult viewers with an interest in crime dramas. They are likely to be familiar with police procedurals and enjoy suspenseful storytelling.
'''
</example>
Note:
There was an issue with the previous translation.
DO NOT add the translated sample text in the response.
DO NOT include any translation segment.
Sample Translation is NOT required for this task.
You should adhere to the same format as the previous response, add or delete section is not allowed.
Remember to include the glossary, characters, summary, tone and style, and target audience sections in your response.'''

def user(self, text, title='', given_glossary: Optional[dict] = None):
glossary_text = f'Given glossary: {given_glossary}' if given_glossary else ''
Expand Down Expand Up @@ -293,8 +302,7 @@ def __init__(self):

def system(self):
return f'''Ignore all previous instructions.
You are a context validator, responsible for validating the context provided by the Context Reviewer. Your role is to validate if the context is good.
A good context should include a comprehensive glossary of key terms and phrases, character name translations, a concise story summary, tone and style guidelines, and target audience insights.
You are a context validator responsible for verifying the context provided by the context reviewers. Your duty is to initially confirm whether these contexts meet the most basic requirements.
Only output True/False based on the provided context.
# Example 1:
Expand Down Expand Up @@ -330,20 +338,26 @@ def system(self):
# Example 3:
Input:
Key points for translation:
### Glossary:
- obedience: 服从
- opinions: 意见
- treasured: 珍贵的
### Characters:
- Mistress: 女主人,主导者
- Listener: 听众
1. The opening lines are a joke, likely setting a humorous tone for the video.
2. The main topic is about cable management in PC building.
3. There's a trend of moving cable connectors to the back of the motherboard to reduce clutter.
4. The speaker seems to approve of this trend.
5. The text mentions that not everyone likes this new trend.
### Summary:
In "Mistress and Listener," a powerful sorceress named Elara and a perceptive bard named Kael join forces to decipher a prophecy that threatens Elara's future, uncovering dark secrets and facing formidable adversaries along the way. Their journey transforms their lives, forging a deep bond and revealing the true extent of their powers.
When translating, maintain the casual, slightly humorous tone of the original text. Technical terms like "PC hardware," "gaming rigs," and "motherboard" should be translated using their standard Chinese equivalents. The joke at the beginning should be translated in a way that preserves the humor if possible, but cultural adaptation may be necessary.
### Tone and Style:
The tone of "Mistress and Listener" is dark and mysterious, filled with suspense. The style is richly descriptive and immersive, blending fantasy with deep character exploration.
Output:
False
### Target Audience:
The target audience is young adults and adults who enjoy dark fantasy, those who enjoy themes of hypnosis, submission. The content is explicitly sexual and intended for mature listeners only.
'''
Output:
True'''

def user(self, context):
return f'''Input:\n{context}\nOutput:'''
2 changes: 1 addition & 1 deletion openlrc/translate.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def translate(self, texts: Union[str, List[str]], src_lang: str, target_lang: st
class LLMTranslator(Translator):
CHUNK_SIZE = 30

def __init__(self, chatbot_model: str = 'gpt-3.5-turbo', fee_limit: float = 0.25, chunk_size: int = CHUNK_SIZE,
def __init__(self, chatbot_model: str = 'gpt-3.5-turbo', fee_limit: float = 0.3, chunk_size: int = CHUNK_SIZE,
intercept_line: Optional[int] = None, proxy: Optional[str] = None,
base_url_config: Optional[dict] = None,
retry_model: Optional[str] = None):
Expand Down

0 comments on commit 97cd6ac

Please sign in to comment.