From 81daf7649219acc2c2e01fd26ea4b7c3426a8701 Mon Sep 17 00:00:00 2001 From: "P.J. Finlay" Date: Sat, 5 Aug 2023 09:05:08 -0500 Subject: [PATCH 1/2] Translate "OEBPS" filenames We're expecting inzipinfo.filename == "OPS/content.opf" while this value is "OEBPS/content.opf" for an EPUB file created with LibreOffice. https://github.com/LibreTranslate/argos-translate-files/issues/1 --- argostranslatefiles/formats/epub.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/argostranslatefiles/formats/epub.py b/argostranslatefiles/formats/epub.py index daa39b5..55749c1 100644 --- a/argostranslatefiles/formats/epub.py +++ b/argostranslatefiles/formats/epub.py @@ -24,7 +24,8 @@ def translate(self, underlying_translation: ITranslation, file_path: str): for inzipinfo in inzip.infolist(): with inzip.open(inzipinfo) as infile: - if inzipinfo.filename == "OPS/content.opf" or inzipinfo.filename == "OPS/toc.ncx": + translatable_xml_filenames = ["OPS/content.opf", "OPS/toc.ncx", "OEBPS/content.opf", "OEBPS/toc.ncx"] + if inzipinfo.filename in translatable_xml_filenames: soup = BeautifulSoup(infile.read(), 'xml') itag = self.itag_of_soup(soup) From 67359176bcbcaaeabcfca2214d8da65f3c512934 Mon Sep 17 00:00:00 2001 From: "P.J. Finlay" Date: Sat, 5 Aug 2023 09:13:43 -0500 Subject: [PATCH 2/2] More permissively translate HTML The current logic that decides wether HTML should be translated will often not translate text in an EPUB that the user would want translated. This commite will translate any html or xhtml files in the EPUB document. Example of a filename that isn't translated with the current code but is with this commit: OEBPS/sections/section0003.xhtml --- argostranslatefiles/formats/epub.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/argostranslatefiles/formats/epub.py b/argostranslatefiles/formats/epub.py index 55749c1..8c99f1a 100644 --- a/argostranslatefiles/formats/epub.py +++ b/argostranslatefiles/formats/epub.py @@ -33,7 +33,7 @@ def translate(self, underlying_translation: ITranslation, file_path: str): translated_soup = self.soup_of_itag(translated_tag) outzip.writestr(inzipinfo.filename, str(translated_soup)) - elif re.search(r'OPS\/[a-zA-Z0-9\_]*.xhtml', inzipinfo.filename): + elif inzipinfo.filename.endswith('.html') or inzipinfo.filename.endswith('.xhtml'): head = '\n' content = str(infile.read(), 'utf-8') head_present = content.startswith(head)