-
-
Notifications
You must be signed in to change notification settings - Fork 1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add support for Azure OpenAI #12168
Comments
That file contains defaults and the actual settings are in Would you mind contributing this machinery to Weblate? You would not have to maintain it then ;-). |
It is basically similar to OpenAI's logic. I am happy to contribute if possible. |
Yes, contributions are welcome. In case it's very similar, maybe we can have a base class for AI-based translation, sharing at least prompt building? |
I may temporarily be unable to directly submit code to the Weblate repository, so I'll put the code I implemented here. from __future__ import annotations
from itertools import chain
from typing import TYPE_CHECKING, Literal
from django.core.cache import cache
from weblate.glossary.models import get_glossary_terms, render_glossary_units_tsv
from weblate.utils.errors import report_error
from .base import (
BatchMachineTranslation,
DownloadMultipleTranslations,
MachineTranslationError,
)
from .forms import AzureOpenAIMachineryForm
from weblate.logger import LOGGER
if TYPE_CHECKING:
from weblate.trans.models import Unit
PROMPT = """
You are a highly skilled translation assistant, adept at translating text
from language '{source_language}'
to language '{target_language}'
with precision and nuance.
{persona}
{style}
You always reply with translated string only.
You do not include transliteration.
{separator}
{placeables}
{glossary}
{example}
"""
SEPARATOR = "\n==WEBLATE_PART==\n"
SEPARATOR_PROMPT = f"""
You receive an input as strings separated by {SEPARATOR} and
your answer separates strings by {SEPARATOR}.
"""
GLOSSARY_PROMPT = """
Use the following glossary during the translation:
{}
"""
EXAMPLE_PROMPT = """
The following example is a fixed mapping and must be followed strictly during the translation:
{}
"""
PLACEABLES_PROMPT = """
You treat strings like {placeable_1} or {placeable_2} as placeables for user input and keep them intact.
"""
class AzureOpenAITranslation(BatchMachineTranslation):
name = "AzureOpenAITranslation"
max_score = 90
request_timeout = 60
mode = "fuzzy"
settings_form = AzureOpenAIMachineryForm
def __init__(self, settings=None) -> None:
from openai.lib.azure import AzureOpenAI
super().__init__(settings)
self.client = AzureOpenAI(
azure_endpoint=self.settings["endpoint_url"],
api_key=self.settings["key"],
api_version="2024-02-01",
timeout=self.request_timeout,
)
self._models = None
# 从表单设置中读取 batch_size
if "batchSize" in self.settings:
self.batch_size = self.settings["batchSize"]
def is_supported(self, source, language) -> bool:
return True
def get_deployment(self) -> str:
return self.settings.get("deployment", "")
def get_model(self) -> str:
if self._models is None:
cache_key = self.get_cache_key("models")
self._models = cache.get(cache_key)
if self._models is None:
self._models = {model.id for model in self.client.models.list()}
cache.set(cache_key, self._models, 3600)
if self.settings["model"] in self._models:
return self.settings["model"]
if self.settings["model"] == "auto":
for model, _name in self.settings_form.MODEL_CHOICES:
if model == "auto":
continue
if model in self._models:
return model
raise MachineTranslationError(f"Unsupported model: {self.settings['model']}")
def format_prompt_part(self, name: Literal["style", "persona"]):
text = self.settings[name]
text = text.strip()
if text and not text.endswith("."):
text = f"{text}."
return text
def get_prompt(
self, source_language: str, target_language: str, texts: list[str], units: list
) -> str:
glossary = ""
if any(units):
glossary = render_glossary_units_tsv(
chain.from_iterable(
get_glossary_terms(unit) for unit in units
)
)
if glossary:
glossary = GLOSSARY_PROMPT.format(glossary)
separator = SEPARATOR_PROMPT if len(units) > 1 else ""
placeables = ""
if any(self.replacement_start in text for text in texts):
placeables = PLACEABLES_PROMPT.format(
placeable_1=self.format_replacement(0, -1, "", None),
placeable_2=self.format_replacement(123, -1, "", None),
)
example = self.settings["translation_example"]
if example:
example = EXAMPLE_PROMPT.format(example)
return PROMPT.format(
source_language=source_language,
target_language=target_language,
persona=self.format_prompt_part("persona"),
style=self.format_prompt_part("style"),
glossary=glossary,
separator=separator,
placeables=placeables,
example=example,
)
def download_multiple_translations(
self,
source,
language,
sources: list[tuple[str, Unit | None]],
user=None,
threshold: int = 75,
) -> DownloadMultipleTranslations:
from openai.types.chat import (
ChatCompletionSystemMessageParam,
ChatCompletionUserMessageParam,
)
texts = [text for text, _unit in sources]
units = [unit for _text, unit in sources]
prompt = self.get_prompt(source, language, texts, units)
messages = [
ChatCompletionSystemMessageParam(role="system", content=prompt),
ChatCompletionUserMessageParam(role="user", content=SEPARATOR.join(texts)),
]
response = self.client.chat.completions.create(
model=self.get_deployment(),
messages=messages,
temperature=float(self.settings.get("temperature", 0.3)),
frequency_penalty=0,
presence_penalty=0,
)
print(f"azure openai translate response: {response}")
result: DownloadMultipleTranslations = {}
translations_string = response.choices[0].message.content
if translations_string is None:
self.report_error(
"Blank assistant reply",
extra_log=translations_string,
message=True,
)
raise MachineTranslationError("Blank assistant reply")
# 打印原始助手回复内容
print(f"Original assistant reply: {translations_string}")
# 分割翻译字符串
translations = translations_string.split(SEPARATOR)
# 打印分割后的翻译数组
print(f"Translations after split: {translations}")
if len(translations) != len(texts):
self.report_error(
"Failed to parse assistant reply",
extra_log=translations_string,
message=True,
)
raise MachineTranslationError(
f"Could not parse assistant reply, expected={len(texts)}, received={len(translations)}"
)
for index, text in enumerate(texts):
# Extract the assistant's reply from the response
try:
translation = translations[index]
except IndexError:
self.report_error("Missing assistant reply")
continue
result[text] = [
{
"text": translation,
"quality": self.max_score,
"service": self.name,
"source": text,
}
]
return result |
This issue seems to be a good fit for newbie contributors. You are welcome to contribute to Weblate! Don't hesitate to ask any questions you would have while implementing this. You can learn about how to get started in our contributors documentation. |
Thank you for your report; the issue you have reported has just been fixed.
|
1 similar comment
Thank you for your report; the issue you have reported has just been fixed.
|
Describe the issue
After updating to version 5.6.2, my custom machinery is not showing up correctly in the available suggestion list.
Did I miss something?
I saw in the release logs that
the Docker container accepts WEBLATE_REMOVE_ADDONS and WEBLATE_ADD_MACHINERY to customize automatic suggestions.
I already tried
Steps to reproduce the behavior
open the menu : Manage -> Automatic suggestions
Expected behavior
No response
Screenshots
Exception traceback
No response
How do you run Weblate?
Docker container
Weblate versions
5.6.2
Weblate deploy checks
No response
Additional context
No response
The text was updated successfully, but these errors were encountered: