From cd1ba397bcc50d80dd6606ac036f4b1f3d6b4bd7 Mon Sep 17 00:00:00 2001 From: Igor Afanasyev Date: Fri, 28 Oct 2016 22:19:51 -0700 Subject: [PATCH] Add support for all escaped characters as defined in .po format (fix #44) --- lib/Serge/Util.pm | 18 ++++++++- .../00/reference-output/database/files | 2 +- .../00/reference-output/database/items | 37 +++++++++++-------- .../00/reference-output/database/properties | 24 ++++++------ .../00/reference-output/database/strings | 5 ++- .../00/reference-output/database/translations | 23 +++++++----- .../localized-resources/test/template.pot | 14 ++++++- .../reference-output/po/test/template.pot.po | 20 ++++++++++ .../parse_pot/00/resources/template.pot | 8 ++++ 9 files changed, 109 insertions(+), 42 deletions(-) diff --git a/lib/Serge/Util.pm b/lib/Serge/Util.pm index 4b025d92..857de2dd 100644 --- a/lib/Serge/Util.pm +++ b/lib/Serge/Util.pm @@ -386,19 +386,35 @@ sub normalize_strref { $$ref =~ s/[ ]$//g; # remove trailing whitespace } +# .PO-specific string escaping +# see gettext po parser for a list of control sequences +# https://github.com/autotools-mirror/gettext/blob/master/gettext-tools/src/po-lex.c#L749-L856 sub escape_strref { my $ref = shift; $$ref =~ s/\\/\x1/g; # convert backslashes to a temporary symbol $$ref =~ s/\"/\\"/g; - $$ref =~ s/\n/\\n/sg; + $$ref =~ s/\n/\\n/sg; # newline + $$ref =~ s/\t/\\t/g; # horizontal tab + $$ref =~ s/\x8/\\b/g; # 0x08 (bs) backspace + $$ref =~ s/\r/\\r/sg; # carriage return + $$ref =~ s/\xC/\\f/g; # 0x0C (np) formfeed + $$ref =~ s/\xB/\\v/g; # 0x0B (vt) vertical tab + $$ref =~ s/\x7/\\a/g; # 0x07 (bel) bel character $$ref =~ s/\x1/\\\\/g; # restore backslashes (and escape them) } +# .PO-specific string unescaping sub unescape_strref { my $ref = shift; $$ref =~ s/\\\\/\x1/g; $$ref =~ s/\\"/\"/g; $$ref =~ s/\\n/\n/sg; + $$ref =~ s/\\t/\t/g; + $$ref =~ s/\\b/\x8/g; + $$ref =~ s/\\r/\r/sg; + $$ref =~ s/\\f/\xC/g; + $$ref =~ s/\\v/\xB/g; + $$ref =~ s/\\a/\x7/g; $$ref =~ s/\x1/\\/g; } diff --git a/t/data/engine/parse_pot/00/reference-output/database/files b/t/data/engine/parse_pot/00/reference-output/database/files index f3f4d0fb..85c86442 100644 --- a/t/data/engine/parse_pot/00/reference-output/database/files +++ b/t/data/engine/parse_pot/00/reference-output/database/files @@ -1,5 +1,5 @@ files { 0 1 1 test_job test_namespace template.pot 0 - 1 2 12 test_job test_namespace translated.po 0 + 1 2 16 test_job test_namespace translated.po 0 } diff --git a/t/data/engine/parse_pot/00/reference-output/database/items b/t/data/engine/parse_pot/00/reference-output/database/items index 876124de..83de201d 100644 --- a/t/data/engine/parse_pot/00/reference-output/database/items +++ b/t/data/engine/parse_pot/00/reference-output/database/items @@ -1,27 +1,32 @@ items { - 0 1 3 1 1 /file/path/filename0.php:106 NO 0 - 1 2 5 1 2 /file/path/filename1.php:75 NO 0 - 2 3 7 1 3 /file/path/filename2.php:13;30;45 NO 0 - 3 4 9 1 4 - `/file/path/filename3.php:6 + 0 1 3 1 1 /file/path/filename0.php:106 NO 0 + 1 2 5 1 2 /file/path/filename1.php:75 NO 0 + 2 3 7 1 3 /file/path/filename2.php:13;30;45 NO 0 + 3 4 9 1 4 + `/file/path/filename3.php:6 /file/path/filename4.php:6 /file/path/filename5.php:6` - NO 0 - 4 5 11 1 5 `translator-comments + NO 0 + 4 5 11 1 5 `translator-comments automatic-comments reference` - NO 0 - 5 6 13 2 1 /file/path/filename0.php:106 NO 0 - 6 7 14 2 2 /file/path/filename1.php:75 NO 0 - 7 8 15 2 3 /file/path/filename2.php:13;30;45 NO 0 - 8 9 16 2 4 - `/file/path/filename3.php:6 + NO 0 + 5 6 13 1 6 + `see gettext po parser for a list of control sequences +https://github.com/autotools-mirror/gettext/blob/master/gettext-tools/src/po-lex.c#L749-L856` + NO 0 + 6 7 15 1 7 NO NO 0 + 7 8 17 2 1 /file/path/filename0.php:106 NO 0 + 8 9 18 2 2 /file/path/filename1.php:75 NO 0 + 9 10 19 2 3 /file/path/filename2.php:13;30;45 NO 0 + 10 11 20 2 4 + `/file/path/filename3.php:6 /file/path/filename4.php:6 /file/path/filename5.php:6` - NO 0 - 9 10 18 2 6 `translator-comments + NO 0 + 11 12 22 2 8 `translator-comments automatic-comments reference` - NO 0 + NO 0 } diff --git a/t/data/engine/parse_pot/00/reference-output/database/properties b/t/data/engine/parse_pot/00/reference-output/database/properties index 90d43676..03ae8964 100644 --- a/t/data/engine/parse_pot/00/reference-output/database/properties +++ b/t/data/engine/parse_pot/00/reference-output/database/properties @@ -1,24 +1,24 @@ properties { - 0 1 source:1 84f66359a5212b8d496d8b4bf9d4a103 - 1 2 hash:1 84f66359a5212b8d496d8b4bf9d4a103 - 2 3 size:1 935 - 3 4 items:1 1,2,3,4,5 + 0 1 source:1 a513a59948cafc3a7a5bf924ca0f3cde + 1 2 hash:1 a513a59948cafc3a7a5bf924ca0f3cde + 2 3 size:1 1187 + 3 4 items:1 1,2,3,4,5,6,7 4 5 source:2 91375f70ba74beac5f744935d1caeb12 5 6 hash:2 91375f70ba74beac5f744935d1caeb12 6 7 size:2 1034 - 7 8 items:2 6,7,8,9,10 - 8 9 ts:1:test:count 5 - 9 10 usn:1:test 23 - 10 11 ts:1:test 3a7bb487055c7a0d991908f2ce5a7359 + 7 8 items:2 8,9,10,11,12 + 8 9 ts:1:test:count 7 + 9 10 usn:1:test 29 + 10 11 ts:1:test c0a2dd6e1b0bd9bc652982cb6d3c0114 11 12 ts:2:test:count 5 - 12 13 usn:2:test 28 + 12 13 usn:2:test 34 13 14 ts:2:test 6c9f25d8ded929da16eb7cd7485e4c9c - 14 15 target:1:test_job:test c424cb99684ae14eba712f9bcf6bf118 + 14 15 target:1:test_job:test 3daa2699c50626d46f50b5251ed54c35 15 16 target:mtime:1:test_job:test 12345678 - 16 17 source:1:test_job:test 84f66359a5212b8d496d8b4bf9d4a103 + 16 17 source:1:test_job:test a513a59948cafc3a7a5bf924ca0f3cde 17 18 source:ts:1:test_job:test - 3a7bb487055c7a0d991908f2ce5a7359 + c0a2dd6e1b0bd9bc652982cb6d3c0114 18 19 target:2:test_job:test b1e12029f404d864524459529a47240a 19 20 target:mtime:2:test_job:test 12345678 20 21 source:2:test_job:test 91375f70ba74beac5f744935d1caeb12 diff --git a/t/data/engine/parse_pot/00/reference-output/database/strings b/t/data/engine/parse_pot/00/reference-output/database/strings index 4584462c..bdf46fa3 100644 --- a/t/data/engine/parse_pot/00/reference-output/database/strings +++ b/t/data/engine/parse_pot/00/reference-output/database/strings @@ -5,5 +5,8 @@ strings 2 3 6 `Value 3` NO 0 3 4 8 `Value 4` NO 0 4 5 10 `Value 1 - SingularValue 5 - Plural` NO 0 - 5 6 17 `Value 5 - SingularValue 5 - Plural` NO 0 + 5 6 12 `Value1 +  "\` NO 0 + 6 7 14 `Value2 \n\t\b\r\f\v\a\"\\` NO 0 + 7 8 21 `Value 5 - SingularValue 5 - Plural` NO 0 } diff --git a/t/data/engine/parse_pot/00/reference-output/database/translations b/t/data/engine/parse_pot/00/reference-output/database/translations index 72963a27..65077216 100644 --- a/t/data/engine/parse_pot/00/reference-output/database/translations +++ b/t/data/engine/parse_pot/00/reference-output/database/translations @@ -1,13 +1,16 @@ translations { - 0 1 19 1 test `Ṽáļũē 1` NO 0 0 - 1 2 20 2 test `Ṽáļũē 2` NO 0 0 - 2 3 21 3 test `Ṽáļũē 3` NO 0 0 - 3 4 22 4 test `Ṽáļũē 4` NO 0 0 - 4 5 23 5 test `Ṽáļũē 1 - ŠĩŋğũļáŕṼáļũē 5 - Ṕļũŕáļ` NO 0 0 - 5 6 24 6 test `Ṽáļũē 1` NO 0 0 - 6 7 25 7 test `Ṽáļũē 2` NO 0 0 - 7 8 26 8 test `Ṽáļũē 3` NO 0 0 - 8 9 27 9 test `Ṽáļũē 4` NO 0 0 - 9 10 28 10 test `Ṽáļũē 5 - ŠĩŋğũļáŕṼáļũē 5 - Ṕļũŕáļ` NO 0 0 + 0 1 23 1 test `Ṽáļũē 1` NO 0 0 + 1 2 24 2 test `Ṽáļũē 2` NO 0 0 + 2 3 25 3 test `Ṽáļũē 3` NO 0 0 + 3 4 26 4 test `Ṽáļũē 4` NO 0 0 + 4 5 27 5 test `Ṽáļũē 1 - ŠĩŋğũļáŕṼáļũē 5 - Ṕļũŕáļ` NO 0 0 + 5 6 28 6 test `Ṽáļũē1 +  "\` NO 0 0 + 6 7 29 7 test `Ṽáļũē2 \n\t\b\r\f\v\a\"\\` NO 0 0 + 7 8 30 8 test `Ṽáļũē 1` NO 0 0 + 8 9 31 9 test `Ṽáļũē 2` NO 0 0 + 9 10 32 10 test `Ṽáļũē 3` NO 0 0 + 10 11 33 11 test `Ṽáļũē 4` NO 0 0 + 11 12 34 12 test `Ṽáļũē 5 - ŠĩŋğũļáŕṼáļũē 5 - Ṕļũŕáļ` NO 0 0 } diff --git a/t/data/engine/parse_pot/00/reference-output/localized-resources/test/template.pot b/t/data/engine/parse_pot/00/reference-output/localized-resources/test/template.pot index 2131c55c..94e845ea 100644 --- a/t/data/engine/parse_pot/00/reference-output/localized-resources/test/template.pot +++ b/t/data/engine/parse_pot/00/reference-output/localized-resources/test/template.pot @@ -39,4 +39,16 @@ msgstr "Ṽáļũē 4" msgid "Value 1 - Singular" msgid_plural "Value 5 - Plural" msgstr[0] "Ṽáļũē 1 - Šĩŋğũļáŕ" -msgstr[1] "Ṽáļũē 5 - Ṕļũŕáļ" \ No newline at end of file +msgstr[1] "Ṽáļũē 5 - Ṕļũŕáļ" + +# see gettext po parser for a list of control sequences +# https://github.com/autotools-mirror/gettext/blob/master/gettext-tools/src/po-lex.c#L749-L856 +msgid "Value1 \n\t\b\r\f\v\a\"\\" +msgstr "" +"Ṽáļũē1 \n" +"\t\b\r\f\v\a\"\\" + +msgid "Value2 \\n\\t\\b\\r\\f\\v\\a\\\"\\\\" +msgstr "" +"Ṽáļũē2 \\n" +"\\t\\b\\r\\f\\v\\a\\\"\\\\" \ No newline at end of file diff --git a/t/data/engine/parse_pot/00/reference-output/po/test/template.pot.po b/t/data/engine/parse_pot/00/reference-output/po/test/template.pot.po index 7d248049..4c635f37 100644 --- a/t/data/engine/parse_pot/00/reference-output/po/test/template.pot.po +++ b/t/data/engine/parse_pot/00/reference-output/po/test/template.pot.po @@ -40,3 +40,23 @@ msgid "Value 1 - Singular" msgid_plural "Value 5 - Plural" msgstr[0] "Ṽáļũē 1 - Šĩŋğũļáŕ" msgstr[1] "Ṽáļũē 5 - Ṕļũŕáļ" + +#. see gettext po parser for a list of control sequences +#. https://github.com/autotools-mirror/gettext/blob/master/gettext-tools/src/po-lex.c#L749-L856 +#: File: template.pot +#: ID: 6407b6d519834995a5bcd6c7162488ec +msgid "" +"Value1 \n" +"\t\b\r\f\v\a\"\\" +msgstr "" +"Ṽáļũē1 \n" +"\t\b\r\f\v\a\"\\" + +#: File: template.pot +#: ID: ea6701ea1cf1a308ceabf7d825a7581a +msgid "" +"Value2 \\n" +"\\t\\b\\r\\f\\v\\a\\\"\\\\" +msgstr "" +"Ṽáļũē2 \\n" +"\\t\\b\\r\\f\\v\\a\\\"\\\\" diff --git a/t/data/engine/parse_pot/00/resources/template.pot b/t/data/engine/parse_pot/00/resources/template.pot index 52dd0fb9..00f15917 100644 --- a/t/data/engine/parse_pot/00/resources/template.pot +++ b/t/data/engine/parse_pot/00/resources/template.pot @@ -40,3 +40,11 @@ msgid "Value 1 - Singular" msgid_plural "Value 5 - Plural" msgstr[0] "" msgstr[1] "" + +# see gettext po parser for a list of control sequences +# https://github.com/autotools-mirror/gettext/blob/master/gettext-tools/src/po-lex.c#L749-L856 +msgid "Value1 \n\t\b\r\f\v\a\"\\" +msgstr "" + +msgid "Value2 \\n\\t\\b\\r\\f\\v\\a\\\"\\\\" +msgstr ""