diff --git a/internal/parser/grammar/RFC2047Lexer.g4 b/internal/parser/grammar/RFC2047Lexer.g4 index f5171db9..675669e9 100644 --- a/internal/parser/grammar/RFC2047Lexer.g4 +++ b/internal/parser/grammar/RFC2047Lexer.g4 @@ -1,133 +1,66 @@ lexer grammar RFC2047Lexer; // Printable (0x20-0x7E) -fragment Exclamation: '!'; // \u0021 -fragment DQuote: '"'; // \u0022 -fragment Hash: '#'; // \u0023 -fragment Dollar: '$'; // \u0024 -fragment Percent: '%'; // \u0025 -fragment Ampersand: '&'; // \u0026 -fragment SQuote: '\''; // \u0027 -fragment LParens: '('; // \u0028 -fragment RParens: ')'; // \u0029 -fragment Asterisk: '*'; // \u002A -fragment Plus: '+'; // \u002B -fragment Comma: ','; // \u002C -fragment Minus: '-'; // \u002D -fragment Period: '.'; // \u002E -fragment Slash: '/'; // \u002F -fragment Digit: [0-9]; // \u0030 -- \u0039 -fragment Colon: ':'; // \u003A -fragment Semicolon: ';'; // \u003B -fragment Less: '<'; // \u003C -fragment Equal: '='; // \u003D -fragment Greater: '>'; // \u003E +Exclamation: '!'; // \u0021 +DQuote: '"'; // \u0022 +Hash: '#'; // \u0023 +Dollar: '$'; // \u0024 +Percent: '%'; // \u0025 +Ampersand: '&'; // \u0026 +SQuote: '\''; // \u0027 +LParens: '('; // \u0028 +RParens: ')'; // \u0029 +Asterisk: '*'; // \u002A +Plus: '+'; // \u002B +Comma: ','; // \u002C +Minus: '-'; // \u002D +Period: '.'; // \u002E +Slash: '/'; // \u002F +Digit: [0-9]; // \u0030 -- \u0039 +Colon: ':'; // \u003A +Semicolon: ';'; // \u003B +Less: '<'; // \u003C +Equal: '='; // \u003D +Greater: '>'; // \u003E Question: '?'; // \u003F -fragment At: '@'; // \u0040 -fragment LBracket: '['; // \u005B -fragment Backslash: '\\'; // \u005C -fragment RBracket: ']'; // \u005D -fragment Caret: '^'; // \u005E -fragment Underscore: '_'; // \u005F -fragment Backtick: '`'; // \u0060 -fragment LCurly: '{'; // \u007B -fragment Pipe: '|'; // \u007C -fragment RCurly: '}'; // \u007D -fragment Tilde: '~'; // \u007E +At: '@'; // \u0040 +LBracket: '['; // \u005B +Backslash: '\\'; // \u005C +RBracket: ']'; // \u005D +Caret: '^'; // \u005E +Underscore: '_'; // \u005F +Backtick: '`'; // \u0060 +LCurly: '{'; // \u007B +Pipe: '|'; // \u007C +RCurly: '}'; // \u007D +Tilde: '~'; // \u007E -fragment A: 'A'|'a'; -fragment B: 'B'|'b'; -fragment C: 'C'|'c'; -fragment D: 'D'|'d'; -fragment E: 'E'|'e'; -fragment F: 'F'|'f'; -fragment G: 'G'|'g'; -fragment H: 'H'|'h'; -fragment I: 'I'|'i'; -fragment J: 'J'|'j'; -fragment K: 'K'|'k'; -fragment L: 'L'|'l'; -fragment M: 'M'|'m'; -fragment N: 'N'|'n'; -fragment O: 'O'|'o'; -fragment P: 'P'|'p'; -fragment Q: 'Q'|'q'; -fragment R: 'R'|'r'; -fragment S: 'S'|'s'; -fragment T: 'T'|'t'; -fragment U: 'U'|'u'; -fragment V: 'V'|'v'; -fragment W: 'W'|'w'; -fragment X: 'X'|'x'; -fragment Y: 'Y'|'y'; -fragment Z: 'Z'|'z'; +A: 'A'|'a'; +B: 'B'|'b'; +C: 'C'|'c'; +D: 'D'|'d'; +E: 'E'|'e'; +F: 'F'|'f'; +G: 'G'|'g'; +H: 'H'|'h'; +I: 'I'|'i'; +J: 'J'|'j'; +K: 'K'|'k'; +L: 'L'|'l'; +M: 'M'|'m'; +N: 'N'|'n'; +O: 'O'|'o'; +P: 'P'|'p'; +Q: 'Q'|'q'; +R: 'R'|'r'; +S: 'S'|'s'; +T: 'T'|'t'; +U: 'U'|'u'; +V: 'V'|'v'; +W: 'W'|'w'; +X: 'X'|'x'; +Y: 'Y'|'y'; +Z: 'Z'|'z'; -fragment Alpha: A | B | C | D | E | F | G | H | I | J | K | L | M | N | O | P | Q | R | S | T | U | V | W | X | Y | Z ; -EncodeBegin: Equal Question; -EncodeEnd : Question Equal; -Encoding: Q|B; - -fragment TokenChar - : Alpha - | Exclamation - | Hash - | Dollar - | Percent - | Ampersand - | SQuote - | Asterisk - | Plus - | Minus - | Digit - | Backslash - | Caret - | Underscore - | Backtick - | LCurly - | Pipe - | RCurly - | Tilde - ; - - -Token: TokenChar+; - -fragment EncodedChar - : Alpha - | Exclamation - | DQuote - | Hash - | Dollar - | Percent - | Ampersand - | SQuote - | LParens - | RParens - | Asterisk - | Plus - | Comma - | Minus - | Period - | Slash - | Digit - | Colon - | Semicolon - | Less - | Equal - | Greater - | At - | LBracket - | Backslash - | RBracket - | Caret - | Underscore - | Backtick - | LCurly - | Pipe - | RCurly - | Tilde - ; - -EncodedText: EncodedChar+; diff --git a/internal/parser/grammar/RFC2047Parser.g4 b/internal/parser/grammar/RFC2047Parser.g4 index 0b84a1b6..cf5690c9 100644 --- a/internal/parser/grammar/RFC2047Parser.g4 +++ b/internal/parser/grammar/RFC2047Parser.g4 @@ -8,6 +8,70 @@ options { tokenVocab=RFC2047Lexer; } encodedWordList: encodedWord+; -encodedWord: EncodeBegin Token Question Encoding Question encodedText EncodeEnd; +encodedWord: Equal Question token Question encoding Question encodedText Question Equal; -encodedText: EncodedText | Token; +encoding: Q | B; + +token: tokenChar+; + +alpha: A | B | C | D | E | F | G | H | I | J | K | L | M | N | O | P | Q | R | S | T | U | V | W | X | Y | Z ; + +tokenChar + : alpha + | Exclamation + | Hash + | Dollar + | Percent + | Ampersand + | SQuote + | Asterisk + | Plus + | Minus + | Digit + | Backslash + | Caret + | Underscore + | Backtick + | LCurly + | Pipe + | RCurly + | Tilde + ; + +encodedChar + : alpha + | Exclamation + | DQuote + | Hash + | Dollar + | Percent + | Ampersand + | SQuote + | LParens + | RParens + | Asterisk + | Plus + | Comma + | Minus + | Period + | Slash + | Digit + | Colon + | Semicolon + | Less + | Equal + | Greater + | At + | LBracket + | Backslash + | RBracket + | Caret + | Underscore + | Backtick + | LCurly + | Pipe + | RCurly + | Tilde + ; + +encodedText: encodedChar+; diff --git a/internal/parser/src/rfc2047/rfc2047_parser.cpp b/internal/parser/src/rfc2047/rfc2047_parser.cpp index 2aa04102..90f2706e 100644 --- a/internal/parser/src/rfc2047/rfc2047_parser.cpp +++ b/internal/parser/src/rfc2047/rfc2047_parser.cpp @@ -141,8 +141,8 @@ std::string parse(std::string_view input) { std::string result; for (const auto& word: encodedContext->encodedWord()) { - const auto encoding = word->Encoding()->getText(); - const auto charset = word->Token()->getText(); + const auto encoding = word->encoding()->getText(); + const auto charset = word->token()->getText(); const auto text = word->encodedText()->getText(); const auto decodedText = decodeText(encoding, text); diff --git a/internal/parser/tests/parser/address_list_test.cpp b/internal/parser/tests/parser/address_list_test.cpp index 39f84e8b..4926f6a9 100644 --- a/internal/parser/tests/parser/address_list_test.cpp +++ b/internal/parser/tests/parser/address_list_test.cpp @@ -665,4 +665,20 @@ TEST(AddressList, CAPI) { EXPECT_EQ(address.address, nullptr); } } +} + +TEST(AddressList, Emoji) { + const TestInput input = { + R"(=?utf-8?q?Goce_Test_=F0=9F=A4=A6=F0=9F=8F=BB=E2=99=82=F0=9F=99=88?= =?utf-8?q?=F0=9F=8C=B2=E2=98=98=F0=9F=8C=B4?= , "Proton GMX Edit" , "beta@bar.com" , "testios12" , "random@bar.com" , =?utf-8?q?=C3=9C=C3=A4=C3=B6_Jakdij?= , =?utf-8?q?Q=C3=A4_T=C3=B6=C3=BCst_12_Edit?= , =?utf-8?q?=E2=98=98=EF=B8=8F=F0=9F=8C=B2=F0=9F=8C=B4=F0=9F=99=82=E2=98=BA?= =?utf-8?q?=EF=B8=8F=F0=9F=98=83?= , "Somebody Outlook" )", + {{"Goce Test 🤦🏻♂🙈🌲☘🌴", "foo@bar.com"}, + {"Proton GMX Edit", "z@bar.com"}, + {"beta@bar.com", "beta@bar.com"}, + {"testios12", "random@bar.com"}, + {"random@bar.com", "random@bar.com"}, + {"Üäö Jakdij", "another@bar.com"}, + {"Qä Töüst 12 Edit", "random2@bar.com"}, + {"☘️🌲🌴🙂☺️😃", "dust@bar.com"}, + {"Somebody Outlook", "hotmal@bar.com"}}}; + + validateTest(input); } \ No newline at end of file