From 6724204977e7c466a463595736cc5299c8f01829 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20=C4=8Ciha=C5=99?= Date: Wed, 27 Feb 2019 14:43:12 +0100 Subject: [PATCH] Add workaround for tesseract 4.0 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It needs to have C locale for it's execution due to code relying on locale dependant functions for parsing trained data (eg. sscanf). This is really a workaround, but given that tesseract 4.0 is shipped with upcoming Debian stable, we will have to live with this for quite some time. Fixes #2581 Signed-off-by: Michal Čihař --- docs/changes.rst | 1 + weblate/screenshots/views.py | 7 +++++-- weblate/utils/locale.py | 36 ++++++++++++++++++++++++++++++++++++ 3 files changed, 42 insertions(+), 2 deletions(-) create mode 100644 weblate/utils/locale.py diff --git a/docs/changes.rst b/docs/changes.rst index 01f39acc677c..8ace7f10f67d 100644 --- a/docs/changes.rst +++ b/docs/changes.rst @@ -20,6 +20,7 @@ Released on ? 2019. * Added check for Kashida letters. * Added option to squash commits based on authors. * Improved support for xlsx file format. +* Compatibility with tesseract 4.0. weblate 3.4 ----------- diff --git a/weblate/screenshots/views.py b/weblate/screenshots/views.py index 6fa674a9becc..a5614535e8af 100644 --- a/weblate/screenshots/views.py +++ b/weblate/screenshots/views.py @@ -30,8 +30,11 @@ from PIL import Image +from weblate.utils.locale import c_locale + try: - from tesserocr import PyTessBaseAPI, RIL + with c_locale(): + from tesserocr import PyTessBaseAPI, RIL HAS_OCR = True except ImportError: HAS_OCR = False @@ -273,7 +276,7 @@ def ocr_search(request, pk): results = set() # Extract and match strings - with PyTessBaseAPI() as api: + with c_locale(), PyTessBaseAPI() as api: for image in (original_image, scaled_image): for match in ocr_extract(api, image, strings): results.add(sources[match]) diff --git a/weblate/utils/locale.py b/weblate/utils/locale.py new file mode 100644 index 000000000000..6c1d85e019e1 --- /dev/null +++ b/weblate/utils/locale.py @@ -0,0 +1,36 @@ +# -*- coding: utf-8 -*- +# +# Copyright © 2012 - 2019 Michal Čihař +# +# This file is part of Weblate +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# + +from __future__ import absolute_import + +from locale import setlocale, getlocale, LC_ALL +from contextlib import contextmanager + + +@contextmanager +def c_locale(): + """Context to execute something in C locale.""" + try: + currlocale = getlocale() + except ValueError: + currlocale = ('C', 'UTF-8') + setlocale(LC_ALL, "C") + yield + setlocale(LC_ALL, currlocale)