Skip to content

Commit

Permalink
Remove redundant tags from translation.
Browse files Browse the repository at this point in the history
  • Loading branch information
zh-plus committed Dec 24, 2023
1 parent dacd5f1 commit 0d5e52e
Showing 1 changed file with 7 additions and 0 deletions.
7 changes: 7 additions & 0 deletions openlrc/translate.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,13 @@ def parse_responses(self, response):

translation = re.findall(r'Translation>\n*(.*?)(?:#\d+|<summary>|\n*$)', content, re.DOTALL)

# Remove "</summary>\nxxx</scene>" tags (or some wierd tags like </p> ❓) from translation
if any([re.search(r'(<.*?>|</.*?>)', t) for t in translation]):
logger.warning(f'The extracted translation from response contains tags: {content}, removed')
translation = [
re.sub(r'(</summary>|<summary>|</translation>|<translation>|</p>|</div>).*', '', t, flags=re.DOTALL)
for t in translation]

return summary.strip(), scene.strip(), [t.strip() for t in translation]

except Exception as e:
Expand Down

0 comments on commit 0d5e52e

Please sign in to comment.