Skip to content

Commit

Permalink
fix(GODT-2032): AddressList parsing with encoded text that starts with =
Browse files Browse the repository at this point in the history
Rewrite grammar so that it correctly handles the case where the encode
text starts with '='.
  • Loading branch information
LBeernaertProton committed Nov 9, 2022
1 parent 0bddd1d commit e501260
Show file tree
Hide file tree
Showing 4 changed files with 142 additions and 129 deletions.
183 changes: 58 additions & 125 deletions internal/parser/grammar/RFC2047Lexer.g4
Original file line number Diff line number Diff line change
@@ -1,133 +1,66 @@
lexer grammar RFC2047Lexer;

// Printable (0x20-0x7E)
fragment Exclamation: '!'; // \u0021
fragment DQuote: '"'; // \u0022
fragment Hash: '#'; // \u0023
fragment Dollar: '$'; // \u0024
fragment Percent: '%'; // \u0025
fragment Ampersand: '&'; // \u0026
fragment SQuote: '\''; // \u0027
fragment LParens: '('; // \u0028
fragment RParens: ')'; // \u0029
fragment Asterisk: '*'; // \u002A
fragment Plus: '+'; // \u002B
fragment Comma: ','; // \u002C
fragment Minus: '-'; // \u002D
fragment Period: '.'; // \u002E
fragment Slash: '/'; // \u002F
fragment Digit: [0-9]; // \u0030 -- \u0039
fragment Colon: ':'; // \u003A
fragment Semicolon: ';'; // \u003B
fragment Less: '<'; // \u003C
fragment Equal: '='; // \u003D
fragment Greater: '>'; // \u003E
Exclamation: '!'; // \u0021
DQuote: '"'; // \u0022
Hash: '#'; // \u0023
Dollar: '$'; // \u0024
Percent: '%'; // \u0025
Ampersand: '&'; // \u0026
SQuote: '\''; // \u0027
LParens: '('; // \u0028
RParens: ')'; // \u0029
Asterisk: '*'; // \u002A
Plus: '+'; // \u002B
Comma: ','; // \u002C
Minus: '-'; // \u002D
Period: '.'; // \u002E
Slash: '/'; // \u002F
Digit: [0-9]; // \u0030 -- \u0039
Colon: ':'; // \u003A
Semicolon: ';'; // \u003B
Less: '<'; // \u003C
Equal: '='; // \u003D
Greater: '>'; // \u003E
Question: '?'; // \u003F
fragment At: '@'; // \u0040
fragment LBracket: '['; // \u005B
fragment Backslash: '\\'; // \u005C
fragment RBracket: ']'; // \u005D
fragment Caret: '^'; // \u005E
fragment Underscore: '_'; // \u005F
fragment Backtick: '`'; // \u0060
fragment LCurly: '{'; // \u007B
fragment Pipe: '|'; // \u007C
fragment RCurly: '}'; // \u007D
fragment Tilde: '~'; // \u007E
At: '@'; // \u0040
LBracket: '['; // \u005B
Backslash: '\\'; // \u005C
RBracket: ']'; // \u005D
Caret: '^'; // \u005E
Underscore: '_'; // \u005F
Backtick: '`'; // \u0060
LCurly: '{'; // \u007B
Pipe: '|'; // \u007C
RCurly: '}'; // \u007D
Tilde: '~'; // \u007E

fragment A: 'A'|'a';
fragment B: 'B'|'b';
fragment C: 'C'|'c';
fragment D: 'D'|'d';
fragment E: 'E'|'e';
fragment F: 'F'|'f';
fragment G: 'G'|'g';
fragment H: 'H'|'h';
fragment I: 'I'|'i';
fragment J: 'J'|'j';
fragment K: 'K'|'k';
fragment L: 'L'|'l';
fragment M: 'M'|'m';
fragment N: 'N'|'n';
fragment O: 'O'|'o';
fragment P: 'P'|'p';
fragment Q: 'Q'|'q';
fragment R: 'R'|'r';
fragment S: 'S'|'s';
fragment T: 'T'|'t';
fragment U: 'U'|'u';
fragment V: 'V'|'v';
fragment W: 'W'|'w';
fragment X: 'X'|'x';
fragment Y: 'Y'|'y';
fragment Z: 'Z'|'z';
A: 'A'|'a';
B: 'B'|'b';
C: 'C'|'c';
D: 'D'|'d';
E: 'E'|'e';
F: 'F'|'f';
G: 'G'|'g';
H: 'H'|'h';
I: 'I'|'i';
J: 'J'|'j';
K: 'K'|'k';
L: 'L'|'l';
M: 'M'|'m';
N: 'N'|'n';
O: 'O'|'o';
P: 'P'|'p';
Q: 'Q'|'q';
R: 'R'|'r';
S: 'S'|'s';
T: 'T'|'t';
U: 'U'|'u';
V: 'V'|'v';
W: 'W'|'w';
X: 'X'|'x';
Y: 'Y'|'y';
Z: 'Z'|'z';

fragment Alpha: A | B | C | D | E | F | G | H | I | J | K | L | M | N | O | P | Q | R | S | T | U | V | W | X | Y | Z ;

EncodeBegin: Equal Question;
EncodeEnd : Question Equal;

Encoding: Q|B;

fragment TokenChar
: Alpha
| Exclamation
| Hash
| Dollar
| Percent
| Ampersand
| SQuote
| Asterisk
| Plus
| Minus
| Digit
| Backslash
| Caret
| Underscore
| Backtick
| LCurly
| Pipe
| RCurly
| Tilde
;


Token: TokenChar+;

fragment EncodedChar
: Alpha
| Exclamation
| DQuote
| Hash
| Dollar
| Percent
| Ampersand
| SQuote
| LParens
| RParens
| Asterisk
| Plus
| Comma
| Minus
| Period
| Slash
| Digit
| Colon
| Semicolon
| Less
| Equal
| Greater
| At
| LBracket
| Backslash
| RBracket
| Caret
| Underscore
| Backtick
| LCurly
| Pipe
| RCurly
| Tilde
;

EncodedText: EncodedChar+;
68 changes: 66 additions & 2 deletions internal/parser/grammar/RFC2047Parser.g4
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,70 @@ options { tokenVocab=RFC2047Lexer; }

encodedWordList: encodedWord+;

encodedWord: EncodeBegin Token Question Encoding Question encodedText EncodeEnd;
encodedWord: Equal Question token Question encoding Question encodedText Question Equal;

encodedText: EncodedText | Token;
encoding: Q | B;

token: tokenChar+;

alpha: A | B | C | D | E | F | G | H | I | J | K | L | M | N | O | P | Q | R | S | T | U | V | W | X | Y | Z ;

tokenChar
: alpha
| Exclamation
| Hash
| Dollar
| Percent
| Ampersand
| SQuote
| Asterisk
| Plus
| Minus
| Digit
| Backslash
| Caret
| Underscore
| Backtick
| LCurly
| Pipe
| RCurly
| Tilde
;

encodedChar
: alpha
| Exclamation
| DQuote
| Hash
| Dollar
| Percent
| Ampersand
| SQuote
| LParens
| RParens
| Asterisk
| Plus
| Comma
| Minus
| Period
| Slash
| Digit
| Colon
| Semicolon
| Less
| Equal
| Greater
| At
| LBracket
| Backslash
| RBracket
| Caret
| Underscore
| Backtick
| LCurly
| Pipe
| RCurly
| Tilde
;

encodedText: encodedChar+;
4 changes: 2 additions & 2 deletions internal/parser/src/rfc2047/rfc2047_parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -141,8 +141,8 @@ std::string parse(std::string_view input) {
std::string result;

for (const auto& word: encodedContext->encodedWord()) {
const auto encoding = word->Encoding()->getText();
const auto charset = word->Token()->getText();
const auto encoding = word->encoding()->getText();
const auto charset = word->token()->getText();
const auto text = word->encodedText()->getText();

const auto decodedText = decodeText(encoding, text);
Expand Down
16 changes: 16 additions & 0 deletions internal/parser/tests/parser/address_list_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -665,4 +665,20 @@ TEST(AddressList, CAPI) {
EXPECT_EQ(address.address, nullptr);
}
}
}

TEST(AddressList, Emoji) {
const TestInput input = {
R"(=?utf-8?q?Goce_Test_=F0=9F=A4=A6=F0=9F=8F=BB=E2=99=82=F0=9F=99=88?= =?utf-8?q?=F0=9F=8C=B2=E2=98=98=F0=9F=8C=B4?= <[email protected]>, "Proton GMX Edit" <[email protected]>, "[email protected]" <[email protected]>, "testios12" <[email protected]>, "[email protected]" <[email protected]>, =?utf-8?q?=C3=9C=C3=A4=C3=B6_Jakdij?= <[email protected]>, =?utf-8?q?Q=C3=A4_T=C3=B6=C3=BCst_12_Edit?= <[email protected]>, =?utf-8?q?=E2=98=98=EF=B8=8F=F0=9F=8C=B2=F0=9F=8C=B4=F0=9F=99=82=E2=98=BA?= =?utf-8?q?=EF=B8=8F=F0=9F=98=83?= <[email protected]>, "Somebody Outlook" <[email protected]>)",
{{"Goce Test 🤦🏻♂🙈🌲☘🌴", "[email protected]"},
{"Proton GMX Edit", "[email protected]"},
{"[email protected]", "[email protected]"},
{"testios12", "[email protected]"},
{"[email protected]", "[email protected]"},
{"Üäö Jakdij", "[email protected]"},
{"Qä Töüst 12 Edit", "[email protected]"},
{"☘️🌲🌴🙂☺️😃", "[email protected]"},
{"Somebody Outlook", "[email protected]"}}};

validateTest(input);
}

0 comments on commit e501260

Please sign in to comment.