From 72aeeb96dde1c26078d7f3979d6b9528705754c9 Mon Sep 17 00:00:00 2001
From: Leander Beernaert <leander.beernaert@proton.ch>
Date: Thu, 2 Mar 2023 11:30:17 +0100
Subject: [PATCH] fix(GODT-2427): Fix header parsing logic

Ensure the header parser treats the sequence `\r\n ` as a fold in
accordance to RFC5322. The original tests for the header parser did not
adhere to this convention and led to the incorrect parser logic.
---
 rfc822/header_parser.go | 45 ++++++++++++++++++++++++--------
 rfc822/header_test.go   | 58 ++++++++++++++++++++++++++++++++++-------
 2 files changed, 82 insertions(+), 21 deletions(-)

diff --git a/rfc822/header_parser.go b/rfc822/header_parser.go
index 5132a9a2..57581a4d 100644
--- a/rfc822/header_parser.go
+++ b/rfc822/header_parser.go
@@ -1,6 +1,9 @@
 package rfc822
 
-import "io"
+import (
+	"fmt"
+	"io"
+)
 
 type headerParser struct {
 	header []byte
@@ -67,22 +70,38 @@ func (hp *headerParser) next() (parsedHeaderEntry, error) {
 	result.valueStart = searchOffset
 
 	for searchOffset < headerLen {
-		// consume all content in between two quotes.
-		if hp.header[searchOffset] == '"' {
+		b := hp.header[searchOffset]
+
+		if b == '\r' {
 			searchOffset++
-			for searchOffset < headerLen && hp.header[searchOffset] != '"' {
-				searchOffset++
+			if searchOffset >= headerLen {
+				return parsedHeaderEntry{}, io.ErrUnexpectedEOF
+			}
+
+			if hp.header[searchOffset] != '\n' {
+				return parsedHeaderEntry{}, fmt.Errorf(`expected \n after \n`)
 			}
 			searchOffset++
 
-			continue
-		} else if hp.header[searchOffset] == '\n' {
+			// If the next character after new line is a space, it's a fold
+			if searchOffset < headerLen && isWSP(hp.header[searchOffset]) {
+				continue
+			}
+
+			result.valueEnd = searchOffset
+
+			break
+		} else if b == '\n' {
 			searchOffset++
-			// if folding the next line has to start with space or tab.
-			if searchOffset < headerLen && (hp.header[searchOffset] != ' ' && hp.header[searchOffset] != '\t') {
-				result.valueEnd = searchOffset
-				break
+
+			// If the next character after new line is a space, it's a fold
+			if searchOffset < headerLen && isWSP(hp.header[searchOffset]) {
+				continue
 			}
+
+			result.valueEnd = searchOffset
+
+			break
 		} else {
 			searchOffset++
 		}
@@ -98,6 +117,10 @@ func (hp *headerParser) next() (parsedHeaderEntry, error) {
 	return result, nil
 }
 
+func isWSP(b byte) bool {
+	return b == ' ' || b == '\t'
+}
+
 type parsedHeaderEntry struct {
 	keyStart   int
 	keyEnd     int
diff --git a/rfc822/header_test.go b/rfc822/header_test.go
index 5eed8f3e..48ad007b 100644
--- a/rfc822/header_test.go
+++ b/rfc822/header_test.go
@@ -1,6 +1,7 @@
 package rfc822
 
 import (
+	"strings"
 	"testing"
 
 	"github.com/bradenaw/juniper/xslices"
@@ -188,35 +189,35 @@ func TestParseHeaderFoldedLine(t *testing.T) {
 }
 
 func TestParseHeaderMultilineFilename(t *testing.T) {
-	const literal = "Content-Type: application/msword; name=\"this is a very long\nfilename.doc\""
+	const literal = "Content-Type: application/msword; name=\"this is a very long\n filename.doc\""
 
 	header, err := NewHeader([]byte(literal))
 	require.NoError(t, err)
 
 	assert.Equal(t, [][]byte{
-		[]byte("Content-Type: application/msword; name=\"this is a very long\nfilename.doc\""),
+		[]byte("Content-Type: application/msword; name=\"this is a very long\n filename.doc\""),
 	}, header.getLines())
 }
 
 func TestParseHeaderMultilineFilenameWithColon(t *testing.T) {
-	const literal = "Content-Type: application/msword; name=\"this is a very long\nfilename: too long.doc\""
+	const literal = "Content-Type: application/msword; name=\"this is a very long\n filename: too long.doc\""
 
 	header, err := NewHeader([]byte(literal))
 	require.NoError(t, err)
 
 	assert.Equal(t, [][]byte{
-		[]byte("Content-Type: application/msword; name=\"this is a very long\nfilename: too long.doc\""),
+		[]byte("Content-Type: application/msword; name=\"this is a very long\n filename: too long.doc\""),
 	}, header.getLines())
 }
 
 func TestParseHeaderMultilineFilenameWithColonAndNewline(t *testing.T) {
-	const literal = "Content-Type: application/msword; name=\"this is a very long\nfilename: too long.doc\"\n"
+	const literal = "Content-Type: application/msword; name=\"this is a very long\n filename: too long.doc\"\n"
 
 	header, err := NewHeader([]byte(literal))
 	require.NoError(t, err)
 
 	assert.Equal(t, [][]byte{
-		[]byte("Content-Type: application/msword; name=\"this is a very long\nfilename: too long.doc\"\n"),
+		[]byte("Content-Type: application/msword; name=\"this is a very long\n filename: too long.doc\"\n"),
 	}, header.getLines())
 }
 
@@ -234,10 +235,10 @@ func TestParseHeaderMultilineIndent(t *testing.T) {
 
 func TestParseHeaderMultipleMultilineFilenames(t *testing.T) {
 	const literal = `Content-Type: application/msword; name="=E5=B8=B6=E6=9C=89=E5=A4=96=E5=9C=8B=E5=AD=97=E7=AC=A6=E7=9A=84=E9=99=84=E4=
-=BB=B6.DOC"
+ =BB=B6.DOC"
 Content-Transfer-Encoding: base64
 Content-Disposition: attachment; filename="=E5=B8=B6=E6=9C=89=E5=A4=96=E5=9C=8B=E5=AD=97=E7=AC=A6=E7=9A=84=E9=99=84=E4=
-=BB=B6.DOC"
+ =BB=B6.DOC"
 Content-ID: <>
 `
 
@@ -245,9 +246,9 @@ Content-ID: <>
 	require.NoError(t, err)
 
 	assert.Equal(t, [][]byte{
-		[]byte("Content-Type: application/msword; name=\"=E5=B8=B6=E6=9C=89=E5=A4=96=E5=9C=8B=E5=AD=97=E7=AC=A6=E7=9A=84=E9=99=84=E4=\n=BB=B6.DOC\"\n"),
+		[]byte("Content-Type: application/msword; name=\"=E5=B8=B6=E6=9C=89=E5=A4=96=E5=9C=8B=E5=AD=97=E7=AC=A6=E7=9A=84=E9=99=84=E4=\n =BB=B6.DOC\"\n"),
 		[]byte("Content-Transfer-Encoding: base64\n"),
-		[]byte("Content-Disposition: attachment; filename=\"=E5=B8=B6=E6=9C=89=E5=A4=96=E5=9C=8B=E5=AD=97=E7=AC=A6=E7=9A=84=E9=99=84=E4=\n=BB=B6.DOC\"\n"),
+		[]byte("Content-Disposition: attachment; filename=\"=E5=B8=B6=E6=9C=89=E5=A4=96=E5=9C=8B=E5=AD=97=E7=AC=A6=E7=9A=84=E9=99=84=E4=\n =BB=B6.DOC\"\n"),
 		[]byte("Content-ID: <>\n"),
 	}, header.getLines())
 }
@@ -352,3 +353,40 @@ func TestHeader_Erase(t *testing.T) {
 		assert.Equal(t, literal, newLiteral)
 	}
 }
+
+func TestHeader_SubjectWithRandomQuote(t *testing.T) {
+	raw := lines(`Subject: All " your " random " brackets " ' ' : belong to us () {}`,
+		`Date: Sun, 30 Jan 2000 11:49:30 +0700`,
+		`Content-Type: multipart/alternative; boundary="----=_BOUNDARY_"`)
+
+	header, err := NewHeader(raw)
+	require.NoError(t, err)
+
+	require.Equal(
+		t,
+		`All " your " random " brackets " ' ' : belong to us () {}`,
+		header.Get("Subject"),
+	)
+}
+
+func lines(s ...string) []byte {
+	return append([]byte(strings.Join(s, "\r\n")), '\r', '\n')
+}
+
+func TestHeader_WithTrailingSpaces(t *testing.T) {
+	const literal = `From: Nathaniel Borenstein <nsb@bellcore.com> 
+To:  Ned Freed <ned@innosoft.com> 
+Subject: Sample message 
+MIME-Version: 1.0 
+Content-type: multipart/mixed; boundary="simple boundary" 
+`
+
+	header, err := NewHeader([]byte(literal))
+	require.NoError(t, err)
+
+	require.Equal(t, "Nathaniel Borenstein <nsb@bellcore.com>", header.Get("From"))
+	require.Equal(t, "Ned Freed <ned@innosoft.com>", header.Get("To"))
+	require.Equal(t, "Sample message", header.Get("Subject"))
+	require.Equal(t, "1.0", header.Get("MIME-Version"))
+	require.Equal(t, `multipart/mixed; boundary="simple boundary"`, header.Get("Content-type"))
+}