diff --git a/babel/messages/pofile.py b/babel/messages/pofile.py index 741e25f9f..696ec3e97 100644 --- a/babel/messages/pofile.py +++ b/babel/messages/pofile.py @@ -73,6 +73,23 @@ def denormalize(string): return unescape(string) +class _NormalizedString(object): + + def __init__(self, *args): + self._strs = [] + for arg in args: + self.append(arg) + + def append(self, s): + self._strs.append(s.strip()) + + def denormalize(self): + return ''.join(map(unescape, self._strs)) + + def __nonzero__(self): + return bool(self._strs) + + class PoFileParser(object): """Support class to read messages from a ``gettext`` PO (portable object) file and add them to a `Catalog` @@ -80,19 +97,29 @@ class PoFileParser(object): See `read_po` for simple cases. """ + _keywords = [ + 'msgid', + 'msgstr', + 'msgctxt', + 'msgid_plural', + ] + def __init__(self, catalog, ignore_obsolete=False): self.catalog = catalog self.ignore_obsolete = ignore_obsolete self.counter = 0 self.offset = 0 + self._reset_message_state() + + def _reset_message_state(self): self.messages = [] self.translations = [] self.locations = [] self.flags = [] self.user_comments = [] self.auto_comments = [] + self.context = None self.obsolete = False - self.context = [] self.in_msgid = False self.in_msgstr = False self.in_msgctxt = False @@ -104,21 +131,21 @@ def _add_message(self): """ self.translations.sort() if len(self.messages) > 1: - msgid = tuple([denormalize(m) for m in self.messages]) + msgid = tuple([m.denormalize() for m in self.messages]) else: - msgid = denormalize(self.messages[0]) + msgid = self.messages[0].denormalize() if isinstance(msgid, (list, tuple)): - string = [] - for idx in range(self.catalog.num_plurals): - try: - string.append(self.translations[idx]) - except IndexError: - string.append((idx, '')) - string = tuple([denormalize(t[1]) for t in string]) + string = ['' for _ in range(self.catalog.num_plurals)] + for idx, translation in self.translations: + if idx >= self.catalog.num_plurals: + self._invalid_pofile("", self.offset, "msg has more translations than num_plurals of catalog") + continue + string[idx] = translation.denormalize() + string = tuple(string) else: - string = denormalize(self.translations[0][1]) + string = self.translations[0][1].denormalize() if self.context: - msgctxt = denormalize('\n'.join(self.context)) + msgctxt = self.context.denormalize() else: msgctxt = None message = Message(msgid, string, list(self.locations), set(self.flags), @@ -129,55 +156,73 @@ def _add_message(self): self.catalog.obsolete[msgid] = message else: self.catalog[msgid] = message - del self.messages[:] - del self.translations[:] - del self.context[:] - del self.locations[:] - del self.flags[:] - del self.auto_comments[:] - del self.user_comments[:] - self.obsolete = False self.counter += 1 + self._reset_message_state() - def _process_message_line(self, lineno, line): - if line.startswith('msgid_plural'): - self.in_msgid = True - msg = line[12:].lstrip() - self.messages.append(msg) - elif line.startswith('msgid'): - self.in_msgid = True + def _finish_current_message(self): + if self.messages: + self._add_message() + + def _process_message_line(self, lineno, line, obsolete=False): + if line.startswith('"'): + self._process_string_continuation_line(line, lineno) + else: + self._process_keyword_line(lineno, line, obsolete) + + def _process_keyword_line(self, lineno, line, obsolete=False): + + for keyword in self._keywords: + if line.startswith(keyword) and line[len(keyword)] in [' ', '[']: + arg = line[len(keyword):] + break + else: + self._invalid_pofile(line, lineno, "Start of line didn't match any expected keyword.") + return + + if keyword in ['msgid', 'msgctxt']: + self._finish_current_message() + + self.obsolete = obsolete + + # The line that has the msgid is stored as the offset of the msg + # should this be the msgctxt if it has one? + if keyword == 'msgid': self.offset = lineno - txt = line[5:].lstrip() - if self.messages: - self._add_message() - self.messages.append(txt) - elif line.startswith('msgstr'): + + if keyword in ['msgid', 'msgid_plural']: + self.in_msgctxt = False + self.in_msgid = True + self.messages.append(_NormalizedString(arg)) + + elif keyword == 'msgstr': self.in_msgid = False self.in_msgstr = True - msg = line[6:].lstrip() - if msg.startswith('['): - idx, msg = msg[1:].split(']', 1) - self.translations.append([int(idx), msg.lstrip()]) + if arg.startswith('['): + idx, msg = arg[1:].split(']', 1) + self.translations.append([int(idx), _NormalizedString(msg)]) else: - self.translations.append([0, msg]) - elif line.startswith('msgctxt'): - if self.messages: - self._add_message() - self.in_msgid = self.in_msgstr = False - self.context.append(line[7:].lstrip()) - elif line.startswith('"'): - if self.in_msgid: - self.messages[-1] += u'\n' + line.rstrip() - elif self.in_msgstr: - self.translations[-1][1] += u'\n' + line.rstrip() - elif self.in_msgctxt: - self.context.append(line.rstrip()) + self.translations.append([0, _NormalizedString(arg)]) + + elif keyword == 'msgctxt': + self.in_msgctxt = True + self.context = _NormalizedString(arg) + + def _process_string_continuation_line(self, line, lineno): + if self.in_msgid: + s = self.messages[-1] + elif self.in_msgstr: + s = self.translations[-1][1] + elif self.in_msgctxt: + s = self.context + else: + self._invalid_pofile(line, lineno, "Got line starting with \" but not in msgid, msgstr or msgctxt") + return + s.append(line) def _process_comment(self, line): - self.in_msgid = self.in_msgstr = False - if self.messages and self.translations: - self._add_message() + self._finish_current_message() + if line[1:].startswith(':'): for location in line[2:].lstrip().split(): pos = location.rfind(':') @@ -211,25 +256,29 @@ def parse(self, fileobj): line = line.strip() if not isinstance(line, text_type): line = line.decode(self.catalog.charset) + if not line: + continue if line.startswith('#'): if line[1:].startswith('~'): - self.obsolete = True - self._process_message_line(lineno, line[2:].lstrip()) + self._process_message_line(lineno, line[2:].lstrip(), obsolete=True) else: self._process_comment(line) else: self._process_message_line(lineno, line) - if self.messages: - self._add_message() + self._finish_current_message() # No actual messages found, but there was some info in comments, from which # we'll construct an empty header message - elif not self.counter and (self.flags or self.user_comments or self.auto_comments): - self.messages.append(u'') - self.translations.append([0, u'']) + if not self.counter and (self.flags or self.user_comments or self.auto_comments): + self.messages.append(_NormalizedString(u'""')) + self.translations.append([0, _NormalizedString(u'""')]) self._add_message() + def _invalid_pofile(self, line, lineno, msg): + print("WARNING:", msg) + print("WARNING: Problem on line {0}: {1}".format(lineno + 1, line)) + def read_po(fileobj, locale=None, domain=None, ignore_obsolete=False, charset=None): """Read messages from a ``gettext`` PO (portable object) file from the given diff --git a/tests/messages/test_pofile.py b/tests/messages/test_pofile.py index d2a10dd05..cef691387 100644 --- a/tests/messages/test_pofile.py +++ b/tests/messages/test_pofile.py @@ -79,7 +79,7 @@ def test_read_multiline(self): message.id) def test_fuzzy_header(self): - buf = StringIO(r'''\ + buf = StringIO(r''' # Translations template for AReallyReallyLongNameForAProject. # Copyright (C) 2007 ORGANIZATION # This file is distributed under the same license as the @@ -93,7 +93,7 @@ def test_fuzzy_header(self): self.assertEqual(True, list(catalog)[0].fuzzy) def test_not_fuzzy_header(self): - buf = StringIO(r'''\ + buf = StringIO(r''' # Translations template for AReallyReallyLongNameForAProject. # Copyright (C) 2007 ORGANIZATION # This file is distributed under the same license as the @@ -106,7 +106,7 @@ def test_not_fuzzy_header(self): self.assertEqual(False, list(catalog)[0].fuzzy) def test_header_entry(self): - buf = StringIO(r'''\ + buf = StringIO(r''' # SOME DESCRIPTIVE TITLE. # Copyright (C) 2007 THE PACKAGE'S COPYRIGHT HOLDER # This file is distributed under the same license as the PACKAGE package. @@ -208,6 +208,28 @@ def test_unit_following_multi_line_obsolete_message(self): #: main.py:1 msgid "bar" msgstr "Bahr" +''') + catalog = pofile.read_po(buf) + self.assertEqual(1, len(catalog)) + message = catalog[u'bar'] + self.assertEqual(u'bar', message.id) + self.assertEqual(u'Bahr', message.string) + self.assertEqual(['This message is not obsolete'], message.user_comments) + + def test_unit_before_obsolete_is_not_obsoleted(self): + buf = StringIO(r''' +# This message is not obsolete +#: main.py:1 +msgid "bar" +msgstr "Bahr" + +# This is an obsolete message +#~ msgid "" +#~ "foo" +#~ "fooooooo" +#~ msgstr "" +#~ "Voh" +#~ "Vooooh" ''') catalog = pofile.read_po(buf) self.assertEqual(1, len(catalog)) @@ -242,6 +264,43 @@ def test_with_context(self): assert out_buf.getvalue().strip() == buf.getvalue().strip(), \ out_buf.getvalue() + def test_obsolete_message_with_context(self): + buf = StringIO(''' +# This message is not obsolete +msgid "baz" +msgstr "Bazczch" + +# This is an obsolete message +#~ msgctxt "other" +#~ msgid "foo" +#~ msgstr "Voh" + +# This message is not obsolete +#: main.py:1 +msgid "bar" +msgstr "Bahr" +''') + catalog = pofile.read_po(buf) + self.assertEqual(2, len(catalog)) + self.assertEqual(1, len(catalog.obsolete)) + message = catalog.obsolete[u"foo"] + self.assertEqual(message.context, "other") + self.assertEqual(message.string, "Voh") + + def test_multiline_context(self): + buf = StringIO(''' +msgctxt "a really long " +"message context " +"why?" +msgid "mid" +msgstr "mst" + ''') + catalog = pofile.read_po(buf) + self.assertEqual(1, len(catalog)) + message = catalog.get('mid', context="a really long message context why?") + assert message is not None + self.assertEqual("a really long message context why?", message.context) + def test_with_context_two(self): buf = BytesIO(b'''msgctxt "Menu" msgid "foo" @@ -308,6 +367,62 @@ def test_plural_with_square_brackets(self): message = catalog['foo'] self.assertEqual(2, len(message.string)) + def test_obsolete_plural_with_square_brackets(self): + buf = StringIO('''\ +#~ msgid "foo" +#~ msgid_plural "foos" +#~ msgstr[0] "Voh [text]" +#~ msgstr[1] "Vohs [text]" +''') + catalog = pofile.read_po(buf, locale='nb_NO') + self.assertEqual(0, len(catalog)) + self.assertEqual(1, len(catalog.obsolete)) + self.assertEqual(2, catalog.num_plurals) + message = catalog.obsolete[('foo', 'foos')] + self.assertEqual(2, len(message.string)) + self.assertEqual("Voh [text]", message.string[0]) + self.assertEqual("Vohs [text]", message.string[1]) + + def test_missing_plural(self): + buf = StringIO('''\ +msgid "" +msgstr "" +"Plural-Forms: nplurals=3; plural=(n < 2) ? n : 2\n" + +msgid "foo" +msgid_plural "foos" +msgstr[0] "Voh [text]" +msgstr[1] "Vohs [text]" +''') + catalog = pofile.read_po(buf, locale='nb_NO') + self.assertEqual(1, len(catalog)) + self.assertEqual(3, catalog.num_plurals) + message = catalog['foo'] + self.assertEqual(3, len(message.string)) + self.assertEqual("Voh [text]", message.string[0]) + self.assertEqual("Vohs [text]", message.string[1]) + self.assertEqual("", message.string[2]) + + def test_missing_plural_in_the_middle(self): + buf = StringIO('''\ +msgid "" +msgstr "" +"Plural-Forms: nplurals=3; plural=(n < 2) ? n : 2\n" + +msgid "foo" +msgid_plural "foos" +msgstr[0] "Voh [text]" +msgstr[2] "Vohs [text]" +''') + catalog = pofile.read_po(buf, locale='nb_NO') + self.assertEqual(1, len(catalog)) + self.assertEqual(3, catalog.num_plurals) + message = catalog['foo'] + self.assertEqual(3, len(message.string)) + self.assertEqual("Voh [text]", message.string[0]) + self.assertEqual("", message.string[1]) + self.assertEqual("Vohs [text]", message.string[2]) + class WritePoTestCase(unittest.TestCase):