From 72aeeb96dde1c26078d7f3979d6b9528705754c9 Mon Sep 17 00:00:00 2001 From: Leander Beernaert Date: Thu, 2 Mar 2023 11:30:17 +0100 Subject: [PATCH] fix(GODT-2427): Fix header parsing logic Ensure the header parser treats the sequence `\r\n ` as a fold in accordance to RFC5322. The original tests for the header parser did not adhere to this convention and led to the incorrect parser logic. --- rfc822/header_parser.go | 45 ++++++++++++++++++++++++-------- rfc822/header_test.go | 58 ++++++++++++++++++++++++++++++++++------- 2 files changed, 82 insertions(+), 21 deletions(-) diff --git a/rfc822/header_parser.go b/rfc822/header_parser.go index 5132a9a2..57581a4d 100644 --- a/rfc822/header_parser.go +++ b/rfc822/header_parser.go @@ -1,6 +1,9 @@ package rfc822 -import "io" +import ( + "fmt" + "io" +) type headerParser struct { header []byte @@ -67,22 +70,38 @@ func (hp *headerParser) next() (parsedHeaderEntry, error) { result.valueStart = searchOffset for searchOffset < headerLen { - // consume all content in between two quotes. - if hp.header[searchOffset] == '"' { + b := hp.header[searchOffset] + + if b == '\r' { searchOffset++ - for searchOffset < headerLen && hp.header[searchOffset] != '"' { - searchOffset++ + if searchOffset >= headerLen { + return parsedHeaderEntry{}, io.ErrUnexpectedEOF + } + + if hp.header[searchOffset] != '\n' { + return parsedHeaderEntry{}, fmt.Errorf(`expected \n after \n`) } searchOffset++ - continue - } else if hp.header[searchOffset] == '\n' { + // If the next character after new line is a space, it's a fold + if searchOffset < headerLen && isWSP(hp.header[searchOffset]) { + continue + } + + result.valueEnd = searchOffset + + break + } else if b == '\n' { searchOffset++ - // if folding the next line has to start with space or tab. - if searchOffset < headerLen && (hp.header[searchOffset] != ' ' && hp.header[searchOffset] != '\t') { - result.valueEnd = searchOffset - break + + // If the next character after new line is a space, it's a fold + if searchOffset < headerLen && isWSP(hp.header[searchOffset]) { + continue } + + result.valueEnd = searchOffset + + break } else { searchOffset++ } @@ -98,6 +117,10 @@ func (hp *headerParser) next() (parsedHeaderEntry, error) { return result, nil } +func isWSP(b byte) bool { + return b == ' ' || b == '\t' +} + type parsedHeaderEntry struct { keyStart int keyEnd int diff --git a/rfc822/header_test.go b/rfc822/header_test.go index 5eed8f3e..48ad007b 100644 --- a/rfc822/header_test.go +++ b/rfc822/header_test.go @@ -1,6 +1,7 @@ package rfc822 import ( + "strings" "testing" "github.com/bradenaw/juniper/xslices" @@ -188,35 +189,35 @@ func TestParseHeaderFoldedLine(t *testing.T) { } func TestParseHeaderMultilineFilename(t *testing.T) { - const literal = "Content-Type: application/msword; name=\"this is a very long\nfilename.doc\"" + const literal = "Content-Type: application/msword; name=\"this is a very long\n filename.doc\"" header, err := NewHeader([]byte(literal)) require.NoError(t, err) assert.Equal(t, [][]byte{ - []byte("Content-Type: application/msword; name=\"this is a very long\nfilename.doc\""), + []byte("Content-Type: application/msword; name=\"this is a very long\n filename.doc\""), }, header.getLines()) } func TestParseHeaderMultilineFilenameWithColon(t *testing.T) { - const literal = "Content-Type: application/msword; name=\"this is a very long\nfilename: too long.doc\"" + const literal = "Content-Type: application/msword; name=\"this is a very long\n filename: too long.doc\"" header, err := NewHeader([]byte(literal)) require.NoError(t, err) assert.Equal(t, [][]byte{ - []byte("Content-Type: application/msword; name=\"this is a very long\nfilename: too long.doc\""), + []byte("Content-Type: application/msword; name=\"this is a very long\n filename: too long.doc\""), }, header.getLines()) } func TestParseHeaderMultilineFilenameWithColonAndNewline(t *testing.T) { - const literal = "Content-Type: application/msword; name=\"this is a very long\nfilename: too long.doc\"\n" + const literal = "Content-Type: application/msword; name=\"this is a very long\n filename: too long.doc\"\n" header, err := NewHeader([]byte(literal)) require.NoError(t, err) assert.Equal(t, [][]byte{ - []byte("Content-Type: application/msword; name=\"this is a very long\nfilename: too long.doc\"\n"), + []byte("Content-Type: application/msword; name=\"this is a very long\n filename: too long.doc\"\n"), }, header.getLines()) } @@ -234,10 +235,10 @@ func TestParseHeaderMultilineIndent(t *testing.T) { func TestParseHeaderMultipleMultilineFilenames(t *testing.T) { const literal = `Content-Type: application/msword; name="=E5=B8=B6=E6=9C=89=E5=A4=96=E5=9C=8B=E5=AD=97=E7=AC=A6=E7=9A=84=E9=99=84=E4= -=BB=B6.DOC" + =BB=B6.DOC" Content-Transfer-Encoding: base64 Content-Disposition: attachment; filename="=E5=B8=B6=E6=9C=89=E5=A4=96=E5=9C=8B=E5=AD=97=E7=AC=A6=E7=9A=84=E9=99=84=E4= -=BB=B6.DOC" + =BB=B6.DOC" Content-ID: <> ` @@ -245,9 +246,9 @@ Content-ID: <> require.NoError(t, err) assert.Equal(t, [][]byte{ - []byte("Content-Type: application/msword; name=\"=E5=B8=B6=E6=9C=89=E5=A4=96=E5=9C=8B=E5=AD=97=E7=AC=A6=E7=9A=84=E9=99=84=E4=\n=BB=B6.DOC\"\n"), + []byte("Content-Type: application/msword; name=\"=E5=B8=B6=E6=9C=89=E5=A4=96=E5=9C=8B=E5=AD=97=E7=AC=A6=E7=9A=84=E9=99=84=E4=\n =BB=B6.DOC\"\n"), []byte("Content-Transfer-Encoding: base64\n"), - []byte("Content-Disposition: attachment; filename=\"=E5=B8=B6=E6=9C=89=E5=A4=96=E5=9C=8B=E5=AD=97=E7=AC=A6=E7=9A=84=E9=99=84=E4=\n=BB=B6.DOC\"\n"), + []byte("Content-Disposition: attachment; filename=\"=E5=B8=B6=E6=9C=89=E5=A4=96=E5=9C=8B=E5=AD=97=E7=AC=A6=E7=9A=84=E9=99=84=E4=\n =BB=B6.DOC\"\n"), []byte("Content-ID: <>\n"), }, header.getLines()) } @@ -352,3 +353,40 @@ func TestHeader_Erase(t *testing.T) { assert.Equal(t, literal, newLiteral) } } + +func TestHeader_SubjectWithRandomQuote(t *testing.T) { + raw := lines(`Subject: All " your " random " brackets " ' ' : belong to us () {}`, + `Date: Sun, 30 Jan 2000 11:49:30 +0700`, + `Content-Type: multipart/alternative; boundary="----=_BOUNDARY_"`) + + header, err := NewHeader(raw) + require.NoError(t, err) + + require.Equal( + t, + `All " your " random " brackets " ' ' : belong to us () {}`, + header.Get("Subject"), + ) +} + +func lines(s ...string) []byte { + return append([]byte(strings.Join(s, "\r\n")), '\r', '\n') +} + +func TestHeader_WithTrailingSpaces(t *testing.T) { + const literal = `From: Nathaniel Borenstein +To: Ned Freed +Subject: Sample message +MIME-Version: 1.0 +Content-type: multipart/mixed; boundary="simple boundary" +` + + header, err := NewHeader([]byte(literal)) + require.NoError(t, err) + + require.Equal(t, "Nathaniel Borenstein ", header.Get("From")) + require.Equal(t, "Ned Freed ", header.Get("To")) + require.Equal(t, "Sample message", header.Get("Subject")) + require.Equal(t, "1.0", header.Get("MIME-Version")) + require.Equal(t, `multipart/mixed; boundary="simple boundary"`, header.Get("Content-type")) +}