Skip to content

Commit

Permalink
Fix UTF-8 text diagnostic encoding.
Browse files Browse the repository at this point in the history
  • Loading branch information
zensh committed Jan 28, 2023
1 parent a455df5 commit f8033f8
Show file tree
Hide file tree
Showing 2 changed files with 134 additions and 18 deletions.
66 changes: 48 additions & 18 deletions diagnose.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ func Diag(data []byte, opts *DiagOptions) ([]byte, error) {
return di.diag()
}

// loosest decode options for diagnostic purpose.
var diagnoseDecMode, _ = DecOptions{
MaxNestedLevels: 256,
UTF8: UTF8DecodeInvalid,
Expand Down Expand Up @@ -446,43 +447,72 @@ func (di *diagnose) encodeByteString(val []byte) error {
var utf16SurrSelf = rune(0x10000)

// quote should be either `'` or `"`
func (di *diagnose) encodeTextString(val string, quote rune) error {
if err := di.writeByte(byte(quote)); err != nil {
func (di *diagnose) encodeTextString(val string, quote byte) error {
if err := di.writeByte(quote); err != nil {
return err
}

for _, r := range val {
switch {
case r == '\t', r == '\n', r == '\r', r == '\\', r == quote:
if err := di.writeByte('\\'); err != nil {
return err
}
if err := di.writeByte(byte(r)); err != nil {
return err
for i := 0; i < len(val); {
if b := val[i]; b < utf8.RuneSelf {
switch {
case b == '\t', b == '\n', b == '\r', b == '\\', b == quote:
if err := di.writeByte('\\'); err != nil {
return err
}

switch b {
case '\t':
b = 't'
case '\n':
b = 'n'
case '\r':
b = 'r'
}
if err := di.writeByte(b); err != nil {
return err
}

case b >= ' ' && b <= '~':
if err := di.writeByte(b); err != nil {
return err
}

default:
if err := di.writeU16(rune(b)); err != nil {
return err
}
}

case r >= ' ' && r <= '~':
if err := di.writeByte(byte(r)); err != nil {
i++
continue
}

c, size := utf8.DecodeRuneInString(val[i:])
switch {
case c == utf8.RuneError:
if err := di.writeU16(rune(val[i])); err != nil {
return err
}

case r < utf16SurrSelf:
if err := di.writeU16(r); err != nil {
case c < utf16SurrSelf:
if err := di.writeU16(c); err != nil {
return err
}

default:
r1, r2 := utf16.EncodeRune(r)
if err := di.writeU16(r1); err != nil {
c1, c2 := utf16.EncodeRune(c)
if err := di.writeU16(c1); err != nil {
return err
}
if err := di.writeU16(r2); err != nil {
if err := di.writeU16(c2); err != nil {
return err
}
}

i += size
}

return di.writeByte(byte(quote))
return di.writeByte(quote)
}

func (di *diagnose) encodeFloat(ai byte, val uint64) error {
Expand Down
86 changes: 86 additions & 0 deletions diagnose_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -549,6 +549,92 @@ func TestDiagnoseByteString(t *testing.T) {
})
}

func TestDiagnoseTextString(t *testing.T) {
testCases := []struct {
title string
cbor []byte
diag string
opts *DiagOptions
}{
{
"valid UTF-8 text in byte string",
hexDecode("4d68656c6c6f2c20e4bda0e5a5bd"),
`'hello, \u4f60\u597d'`,
&DiagOptions{
ByteStringText: true,
},
},
{
"valid UTF-8 text in text string",
hexDecode("6d68656c6c6f2c20e4bda0e5a5bd"),
`"hello, \u4f60\u597d"`, // "hello, 你好"
&DiagOptions{
ByteStringText: true,
},
},
{
"invalid UTF-8 text in byte string",
hexDecode("4d68656c6c6fffeee4bda0e5a5bd"),
`h'68656c6c6fffeee4bda0e5a5bd'`,
&DiagOptions{
ByteStringText: true,
},
},
{
"invalid UTF-8 text in text string",
hexDecode("6d68656c6c6fffeee4bda0e5a5bd"),
`"hello\u00ff\u00ee\u4f60\u597d"`,
&DiagOptions{
ByteStringText: true,
},
},
{
"valid grapheme cluster text in byte string",
hexDecode("583448656c6c6f2c2027e29da4efb88fe2808df09f94a5270ae4bda0e5a5bdefbc8c22f09fa791e2808df09fa49de2808df09fa79122"),
`'Hello, \'\u2764\ufe0f\u200d\ud83d\udd25\'\n\u4f60\u597d\uff0c"\ud83e\uddd1\u200d\ud83e\udd1d\u200d\ud83e\uddd1"'`,
&DiagOptions{
ByteStringText: true,
},
},
{
"valid grapheme cluster text in text string",
hexDecode("783448656c6c6f2c2027e29da4efb88fe2808df09f94a5270ae4bda0e5a5bdefbc8c22f09fa791e2808df09fa49de2808df09fa79122"),
`"Hello, '\u2764\ufe0f\u200d\ud83d\udd25'\n\u4f60\u597d\uff0c\"\ud83e\uddd1\u200d\ud83e\udd1d\u200d\ud83e\uddd1\""`, // "Hello, '❤️‍🔥'\n你好,\"🧑‍🤝‍🧑\""
&DiagOptions{
ByteStringText: true,
},
},
{
"invalid grapheme cluster text in byte string",
hexDecode("583448656c6c6feeff27e29da4efb88fe2808df09f94a5270de4bda0e5a5bdefbc8c22f09fa791e2808df09fa49de2808df09fa79122"),
`h'48656c6c6feeff27e29da4efb88fe2808df09f94a5270de4bda0e5a5bdefbc8c22f09fa791e2808df09fa49de2808df09fa79122'`,
&DiagOptions{
ByteStringText: true,
},
},
{
"invalid grapheme cluster text in text string",
hexDecode("783448656c6c6feeff27e29da4efb88fe2808df09f94a5270de4bda0e5a5bdefbc8c22f09fa791e2808df09fa49de2808df09fa79122"),
`"Hello\u00ee\u00ff'\u2764\ufe0f\u200d\ud83d\udd25'\r\u4f60\u597d\uff0c\"\ud83e\uddd1\u200d\ud83e\udd1d\u200d\ud83e\uddd1\""`,
&DiagOptions{
ByteStringText: true,
},
},
}

for _, tc := range testCases {
t.Run(tc.title, func(t *testing.T) {

data, err := Diag(tc.cbor, tc.opts)
if err != nil {
t.Errorf("Diag(0x%x) returned error %q", tc.cbor, err)
} else if string(data) != tc.diag {
t.Errorf("Diag(0x%x) returned `%s`, want %s", tc.cbor, string(data), tc.diag)
}
})
}
}

func TestDiagnoseFloatingPointNumber(t *testing.T) {
testCases := []struct {
title string
Expand Down

0 comments on commit f8033f8

Please sign in to comment.