diff --git a/diagnose.go b/diagnose.go new file mode 100644 index 00000000..9417aa63 --- /dev/null +++ b/diagnose.go @@ -0,0 +1,740 @@ +// Copyright (c) Faye Amacker. All rights reserved. +// Licensed under the MIT License. See LICENSE in the project root for license information. + +package cbor + +import ( + "bytes" + "encoding/base32" + "encoding/base64" + "encoding/hex" + "errors" + "io" + "math" + "math/big" + "strconv" + "unicode/utf16" + "unicode/utf8" + + "github.com/x448/float16" +) + +// DiagMode is the main interface for CBOR diagnostic notation. +type DiagMode interface { + // Diagnose returns extended diagnostic notation (EDN) of CBOR data items using this DiagMode. + Diagnose([]byte) (string, error) + + // DiagnoseFirst returns extended diagnostic notation (EDN) of the first CBOR data item using the DiagMode. Any remaining bytes are returned in rest. + DiagnoseFirst([]byte) (string, []byte, error) + + // DiagOptions returns user specified options used to create this DiagMode. + DiagOptions() DiagOptions +} + +// ByteStringEncoding specifies the base encoding that byte strings are notated. +type ByteStringEncoding uint8 + +const ( + // ByteStringBase16Encoding encodes byte strings in base16, without padding. + ByteStringBase16Encoding ByteStringEncoding = iota + + // ByteStringBase32Encoding encodes byte strings in base32, without padding. + ByteStringBase32Encoding + + // ByteStringBase32HexEncoding encodes byte strings in base32hex, without padding. + ByteStringBase32HexEncoding + + // ByteStringBase64Encoding encodes byte strings in base64url, without padding. + ByteStringBase64Encoding + + maxByteStringEncoding +) + +func (bse ByteStringEncoding) valid() error { + if bse >= maxByteStringEncoding { + return errors.New("cbor: invalid ByteStringEncoding " + strconv.Itoa(int(bse))) + } + return nil +} + +// DiagOptions specifies Diag options. +type DiagOptions struct { + // ByteStringEncoding specifies the base encoding that byte strings are notated. + // Default is ByteStringBase16Encoding. + ByteStringEncoding ByteStringEncoding + + // ByteStringHexWhitespace specifies notating with whitespace in byte string + // when ByteStringEncoding is ByteStringBase16Encoding. + ByteStringHexWhitespace bool + + // ByteStringText specifies notating with text in byte string + // if it is a valid UTF-8 text. + ByteStringText bool + + // ByteStringEmbeddedCBOR specifies notating embedded CBOR in byte string + // if it is a valid CBOR bytes. + ByteStringEmbeddedCBOR bool + + // CBORSequence specifies notating CBOR sequences. + // otherwise, it returns an error if there are more bytes after the first CBOR. + CBORSequence bool + + // FloatPrecisionIndicator specifies appending a suffix to indicate float precision. + // Refer to https://www.rfc-editor.org/rfc/rfc8949.html#name-encoding-indicators. + FloatPrecisionIndicator bool + + // MaxNestedLevels specifies the max nested levels allowed for any combination of CBOR array, maps, and tags. + // Default is 32 levels and it can be set to [4, 65535]. Note that higher maximum levels of nesting can + // require larger amounts of stack to deserialize. Don't increase this higher than you require. + MaxNestedLevels int + + // MaxArrayElements specifies the max number of elements for CBOR arrays. + // Default is 128*1024=131072 and it can be set to [16, 2147483647] + MaxArrayElements int + + // MaxMapPairs specifies the max number of key-value pairs for CBOR maps. + // Default is 128*1024=131072 and it can be set to [16, 2147483647] + MaxMapPairs int +} + +// DiagMode returns a DiagMode with immutable options. +func (opts DiagOptions) DiagMode() (DiagMode, error) { + return opts.diagMode() +} + +func (opts DiagOptions) diagMode() (*diagMode, error) { + if err := opts.ByteStringEncoding.valid(); err != nil { + return nil, err + } + + decMode, err := DecOptions{ + MaxNestedLevels: opts.MaxNestedLevels, + MaxArrayElements: opts.MaxArrayElements, + MaxMapPairs: opts.MaxMapPairs, + // loosest decode options for diagnostic purpose. + UTF8: UTF8DecodeInvalid, + }.decMode() + if err != nil { + return nil, err + } + + return &diagMode{ + byteStringEncoding: opts.ByteStringEncoding, + byteStringHexWhitespace: opts.ByteStringHexWhitespace, + byteStringText: opts.ByteStringText, + byteStringEmbeddedCBOR: opts.ByteStringEmbeddedCBOR, + cborSequence: opts.CBORSequence, + floatPrecisionIndicator: opts.FloatPrecisionIndicator, + decMode: decMode, + }, nil +} + +type diagMode struct { + byteStringEncoding ByteStringEncoding + byteStringHexWhitespace bool + byteStringText bool + byteStringEmbeddedCBOR bool + cborSequence bool + floatPrecisionIndicator bool + decMode *decMode +} + +// DiagOptions returns user specified options used to create this DiagMode. +func (dm *diagMode) DiagOptions() DiagOptions { + return DiagOptions{ + ByteStringEncoding: dm.byteStringEncoding, + ByteStringHexWhitespace: dm.byteStringHexWhitespace, + ByteStringText: dm.byteStringText, + ByteStringEmbeddedCBOR: dm.byteStringEmbeddedCBOR, + CBORSequence: dm.cborSequence, + FloatPrecisionIndicator: dm.floatPrecisionIndicator, + MaxNestedLevels: dm.decMode.maxNestedLevels, + MaxArrayElements: dm.decMode.maxArrayElements, + MaxMapPairs: dm.decMode.maxMapPairs, + } +} + +// Diagnose returns extended diagnostic notation (EDN) of CBOR data items using the DiagMode. +func (dm *diagMode) Diagnose(data []byte) (string, error) { + return newDiagnose(data, dm.decMode, dm).diag(dm.cborSequence) +} + +// DiagnoseFirst returns extended diagnostic notation (EDN) of the first CBOR data item using the DiagMode. Any remaining bytes are returned in rest. +func (dm *diagMode) DiagnoseFirst(data []byte) (string, []byte, error) { + return newDiagnose(data, dm.decMode, dm).diagFirst() +} + +var defaultDiagMode, _ = DiagOptions{}.diagMode() + +// Diagnose returns extended diagnostic notation (EDN) of CBOR data items +// using the default diagnostic mode. +// +// Refer to https://www.rfc-editor.org/rfc/rfc8949.html#name-diagnostic-notation. +func Diagnose(data []byte) (string, error) { + return defaultDiagMode.Diagnose(data) +} + +// Diagnose returns extended diagnostic notation (EDN) of the first CBOR data item using the DiagMode. Any remaining bytes are returned in rest. +func DiagnoseFirst(data []byte) (string, []byte, error) { + return defaultDiagMode.DiagnoseFirst(data) +} + +type diagnose struct { + dm *diagMode + d *decoder + w *bytes.Buffer +} + +func newDiagnose(data []byte, decm *decMode, diagm *diagMode) *diagnose { + return &diagnose{ + dm: diagm, + d: &decoder{data: data, dm: decm}, + w: &bytes.Buffer{}, + } +} + +func (di *diagnose) diag(cborSequence bool) (string, error) { + // CBOR Sequence + firstItem := true + for { + switch err := di.wellformed(cborSequence); err { + case nil: + if !firstItem { + if err = di.writeString(", "); err != nil { + return di.w.String(), err + } + } + firstItem = false + if err = di.item(); err != nil { + return di.w.String(), err + } + + case io.EOF: + return di.w.String(), nil + + default: + return di.w.String(), err + } + } +} + +func (di *diagnose) diagFirst() (string, []byte, error) { + err := di.wellformed(true) + if err == nil { + err = di.item() + } + + if err == nil { + // Return EDN and the rest of the data slice (which might be len 0) + return di.w.String(), di.d.data[di.d.off:], nil + } + + return di.w.String(), nil, err +} + +func (di *diagnose) wellformed(allowExtraData bool) error { + off := di.d.off + err := di.d.wellformed(allowExtraData) + di.d.off = off + return err +} + +func (di *diagnose) item() error { //nolint:gocyclo + initialByte := di.d.data[di.d.off] + switch initialByte { + case 0x5f, 0x7f: // indefinite-length byte/text string + di.d.off++ + if di.d.data[di.d.off] == 0xff { + di.d.off++ + switch initialByte { + case 0x5f: + // indefinite-length bytes with no chunks. + return di.writeString(`''_`) + case 0x7f: + // indefinite-length text with no chunks. + return di.writeString(`""_`) + } + } + + if err := di.writeString("(_ "); err != nil { + return err + } + + i := 0 + for !di.d.foundBreak() { + if i > 0 { + if err := di.writeString(", "); err != nil { + return err + } + } + + i++ + // wellformedIndefiniteString() already checked that the next item is a byte/text string. + if err := di.item(); err != nil { + return err + } + } + + return di.writeByte(')') + + case 0x9f: // indefinite-length array + di.d.off++ + if err := di.writeString("[_ "); err != nil { + return err + } + + i := 0 + for !di.d.foundBreak() { + if i > 0 { + if err := di.writeString(", "); err != nil { + return err + } + } + + i++ + if err := di.item(); err != nil { + return err + } + } + + return di.writeByte(']') + + case 0xbf: // indefinite-length map + di.d.off++ + if err := di.writeString("{_ "); err != nil { + return err + } + + i := 0 + for !di.d.foundBreak() { + if i > 0 { + if err := di.writeString(", "); err != nil { + return err + } + } + + i++ + // key + if err := di.item(); err != nil { + return err + } + + if err := di.writeString(": "); err != nil { + return err + } + + // value + if err := di.item(); err != nil { + return err + } + } + + return di.writeByte('}') + } + + t := di.d.nextCBORType() + switch t { + case cborTypePositiveInt: + _, _, val := di.d.getHead() + return di.writeString(strconv.FormatUint(val, 10)) + + case cborTypeNegativeInt: + _, _, val := di.d.getHead() + if val > math.MaxInt64 { + // CBOR negative integer overflows int64, use big.Int to store value. + bi := new(big.Int) + bi.SetUint64(val) + bi.Add(bi, big.NewInt(1)) + bi.Neg(bi) + return di.writeString(bi.String()) + } + + nValue := int64(-1) ^ int64(val) + return di.writeString(strconv.FormatInt(nValue, 10)) + + case cborTypeByteString: + b := di.d.parseByteString() + return di.encodeByteString(b) + + case cborTypeTextString: + b, err := di.d.parseTextString() + if err != nil { + return err + } + return di.encodeTextString(string(b), '"') + + case cborTypeArray: + _, _, val := di.d.getHead() + count := int(val) + if err := di.writeByte('['); err != nil { + return err + } + + for i := 0; i < count; i++ { + if i > 0 { + if err := di.writeString(", "); err != nil { + return err + } + } + if err := di.item(); err != nil { + return err + } + } + return di.writeByte(']') + + case cborTypeMap: + _, _, val := di.d.getHead() + count := int(val) + if err := di.writeByte('{'); err != nil { + return err + } + + for i := 0; i < count; i++ { + if i > 0 { + if err := di.writeString(", "); err != nil { + return err + } + } + // key + if err := di.item(); err != nil { + return err + } + if err := di.writeString(": "); err != nil { + return err + } + // value + if err := di.item(); err != nil { + return err + } + } + return di.writeByte('}') + + case cborTypeTag: + _, _, tagNum := di.d.getHead() + switch tagNum { + case 2: + if nt := di.d.nextCBORType(); nt != cborTypeByteString { + return errors.New("cbor: tag number 2 must be followed by byte string, got " + nt.String()) + } + + b := di.d.parseByteString() + bi := new(big.Int).SetBytes(b) + return di.writeString(bi.String()) + + case 3: + if nt := di.d.nextCBORType(); nt != cborTypeByteString { + return errors.New("cbor: tag number 3 must be followed by byte string, got " + nt.String()) + } + + b := di.d.parseByteString() + bi := new(big.Int).SetBytes(b) + bi.Add(bi, big.NewInt(1)) + bi.Neg(bi) + return di.writeString(bi.String()) + + default: + if err := di.writeString(strconv.FormatUint(tagNum, 10)); err != nil { + return err + } + if err := di.writeByte('('); err != nil { + return err + } + if err := di.item(); err != nil { + return err + } + return di.writeByte(')') + } + + case cborTypePrimitives: + _, ai, val := di.d.getHead() + switch ai { + case 20: + return di.writeString("false") + + case 21: + return di.writeString("true") + + case 22: + return di.writeString("null") + + case 23: + return di.writeString("undefined") + + case 25, 26, 27: + return di.encodeFloat(ai, val) + + default: + if err := di.writeString("simple("); err != nil { + return err + } + if err := di.writeString(strconv.FormatUint(val, 10)); err != nil { + return err + } + return di.writeByte(')') + } + } + + return nil +} + +func (di *diagnose) writeByte(val byte) error { + return di.w.WriteByte(val) +} + +func (di *diagnose) writeString(val string) error { + _, err := di.w.WriteString(val) + return err +} + +// writeU16 format a rune as "\uxxxx" +func (di *diagnose) writeU16(val rune) error { + if err := di.writeString("\\u"); err != nil { + return err + } + b := make([]byte, 2) + b[0] = byte(val >> 8) + b[1] = byte(val) + return di.writeString(hex.EncodeToString(b)) +} + +var rawBase32Encoding = base32.StdEncoding.WithPadding(base32.NoPadding) +var rawBase32HexEncoding = base32.HexEncoding.WithPadding(base32.NoPadding) + +func (di *diagnose) encodeByteString(val []byte) error { + if len(val) > 0 { + if di.dm.byteStringText && utf8.Valid(val) { + return di.encodeTextString(string(val), '\'') + } + + if di.dm.byteStringEmbeddedCBOR { + di2 := newDiagnose(val, di.dm.decMode, di.dm) + // should always notating embedded CBOR sequence. + if str, err := di2.diag(true); err == nil { + if err := di.writeString("<<"); err != nil { + return err + } + if err := di.writeString(str); err != nil { + return err + } + return di.writeString(">>") + } + } + } + + switch di.dm.byteStringEncoding { + case ByteStringBase16Encoding: + if err := di.writeString("h'"); err != nil { + return err + } + + encoder := hex.NewEncoder(di.w) + if di.dm.byteStringHexWhitespace { + for i, b := range val { + if i > 0 { + if err := di.writeByte(' '); err != nil { + return err + } + } + if _, err := encoder.Write([]byte{b}); err != nil { + return err + } + } + } else { + if _, err := encoder.Write(val); err != nil { + return err + } + } + return di.writeByte('\'') + + case ByteStringBase32Encoding: + if err := di.writeString("b32'"); err != nil { + return err + } + encoder := base32.NewEncoder(rawBase32Encoding, di.w) + if _, err := encoder.Write(val); err != nil { + return err + } + encoder.Close() + return di.writeByte('\'') + + case ByteStringBase32HexEncoding: + if err := di.writeString("h32'"); err != nil { + return err + } + encoder := base32.NewEncoder(rawBase32HexEncoding, di.w) + if _, err := encoder.Write(val); err != nil { + return err + } + encoder.Close() + return di.writeByte('\'') + + case ByteStringBase64Encoding: + if err := di.writeString("b64'"); err != nil { + return err + } + encoder := base64.NewEncoder(base64.RawURLEncoding, di.w) + if _, err := encoder.Write(val); err != nil { + return err + } + encoder.Close() + return di.writeByte('\'') + + default: + return di.dm.byteStringEncoding.valid() + } +} + +var utf16SurrSelf = rune(0x10000) + +// quote should be either `'` or `"` +func (di *diagnose) encodeTextString(val string, quote byte) error { + if err := di.writeByte(quote); err != nil { + return err + } + + for i := 0; i < len(val); { + if b := val[i]; b < utf8.RuneSelf { + switch { + case b == '\t', b == '\n', b == '\r', b == '\\', b == quote: + if err := di.writeByte('\\'); err != nil { + return err + } + + switch b { + case '\t': + b = 't' + case '\n': + b = 'n' + case '\r': + b = 'r' + } + if err := di.writeByte(b); err != nil { + return err + } + + case b >= ' ' && b <= '~': + if err := di.writeByte(b); err != nil { + return err + } + + default: + if err := di.writeU16(rune(b)); err != nil { + return err + } + } + + i++ + continue + } + + c, size := utf8.DecodeRuneInString(val[i:]) + switch { + case c == utf8.RuneError: + // if err := di.writeU16(rune(val[i])); err != nil { + // return err + // } + return &SemanticError{"cbor: invalid UTF-8 string"} + + case c < utf16SurrSelf: + if err := di.writeU16(c); err != nil { + return err + } + + default: + c1, c2 := utf16.EncodeRune(c) + if err := di.writeU16(c1); err != nil { + return err + } + if err := di.writeU16(c2); err != nil { + return err + } + } + + i += size + } + + return di.writeByte(quote) +} + +func (di *diagnose) encodeFloat(ai byte, val uint64) error { + f64 := float64(0) + switch ai { + case 25: + f16 := float16.Frombits(uint16(val)) + switch { + case f16.IsNaN(): + return di.writeString("NaN") + case f16.IsInf(1): + return di.writeString("Infinity") + case f16.IsInf(-1): + return di.writeString("-Infinity") + default: + f64 = float64(f16.Float32()) + } + + case 26: + f32 := math.Float32frombits(uint32(val)) + switch { + case f32 != f32: + return di.writeString("NaN") + case f32 > math.MaxFloat32: + return di.writeString("Infinity") + case f32 < -math.MaxFloat32: + return di.writeString("-Infinity") + default: + f64 = float64(f32) + } + + case 27: + f64 = math.Float64frombits(val) + switch { + case f64 != f64: + return di.writeString("NaN") + case f64 > math.MaxFloat64: + return di.writeString("Infinity") + case f64 < -math.MaxFloat64: + return di.writeString("-Infinity") + } + } + // Use ES6 number to string conversion which should match most JSON generators. + // Inspired by https://github.com/golang/go/blob/4df10fba1687a6d4f51d7238a403f8f2298f6a16/src/encoding/json/encode.go#L585 + b := make([]byte, 0, 32) + if abs := math.Abs(f64); abs != 0 && (abs < 1e-6 || abs >= 1e21) { + b = strconv.AppendFloat(b, f64, 'e', -1, 64) + // clean up e-09 to e-9 + n := len(b) + if n >= 4 && string(b[n-4:n-1]) == "e-0" { + b = append(b[:n-2], b[n-1]) + } + } else { + b = strconv.AppendFloat(b, f64, 'f', -1, 64) + } + + // add decimal point and trailing zero if needed + if bytes.IndexByte(b, '.') < 0 { + if i := bytes.IndexByte(b, 'e'); i < 0 { + b = append(b, '.', '0') + } else { + b = append(b[:i+2], b[i:]...) + b[i] = '.' + b[i+1] = '0' + } + } + + if err := di.writeString(string(b)); err != nil { + return err + } + + if di.dm.floatPrecisionIndicator { + switch ai { + case 25: + return di.writeString("_1") + case 26: + return di.writeString("_2") + case 27: + return di.writeString("_3") + } + } + + return nil +} diff --git a/diagnose_test.go b/diagnose_test.go new file mode 100644 index 00000000..2f6e09d0 --- /dev/null +++ b/diagnose_test.go @@ -0,0 +1,1028 @@ +// Copyright (c) Faye Amacker. All rights reserved. +// Licensed under the MIT License. See LICENSE in the project root for license information. + +package cbor + +import ( + "bytes" + "fmt" + "strings" + "testing" +) + +func TestDiagnosticNotationExamples(t *testing.T) { + // https://www.rfc-editor.org/rfc/rfc8949.html#name-examples-of-encoded-cbor-da + testCases := []struct { + cbor []byte + diag string + }{ + { + hexDecode("00"), + `0`, + }, + { + hexDecode("01"), + `1`, + }, + { + hexDecode("0a"), + `10`, + }, + { + hexDecode("17"), + `23`, + }, + { + hexDecode("1818"), + `24`, + }, + { + hexDecode("1819"), + `25`, + }, + { + hexDecode("1864"), + `100`, + }, + { + hexDecode("1903e8"), + `1000`, + }, + { + hexDecode("1a000f4240"), + `1000000`, + }, + { + hexDecode("1b000000e8d4a51000"), + `1000000000000`, + }, + { + hexDecode("1bffffffffffffffff"), + `18446744073709551615`, + }, + { + hexDecode("c249010000000000000000"), + `18446744073709551616`, + }, + { + hexDecode("3bffffffffffffffff"), + `-18446744073709551616`, + }, + { + hexDecode("c349010000000000000000"), + `-18446744073709551617`, + }, + { + hexDecode("20"), + `-1`, + }, + { + hexDecode("29"), + `-10`, + }, + { + hexDecode("3863"), + `-100`, + }, + { + hexDecode("3903e7"), + `-1000`, + }, + { + hexDecode("f90000"), + `0.0`, + }, + { + hexDecode("f98000"), + `-0.0`, + }, + { + hexDecode("f93c00"), + `1.0`, + }, + { + hexDecode("fb3ff199999999999a"), + `1.1`, + }, + { + hexDecode("f93e00"), + `1.5`, + }, + { + hexDecode("f97bff"), + `65504.0`, + }, + { + hexDecode("fa47c35000"), + `100000.0`, + }, + { + hexDecode("fa7f7fffff"), + `3.4028234663852886e+38`, + }, + { + hexDecode("fb7e37e43c8800759c"), + `1.0e+300`, + }, + { + hexDecode("f90001"), + `5.960464477539063e-8`, + }, + { + hexDecode("f90400"), + `0.00006103515625`, + }, + { + hexDecode("f9c400"), + `-4.0`, + }, + { + hexDecode("fbc010666666666666"), + `-4.1`, + }, + { + hexDecode("f97c00"), + `Infinity`, + }, + { + hexDecode("f97e00"), + `NaN`, + }, + { + hexDecode("f9fc00"), + `-Infinity`, + }, + { + hexDecode("fa7f800000"), + `Infinity`, + }, + { + hexDecode("fa7fc00000"), + `NaN`, + }, + { + hexDecode("faff800000"), + `-Infinity`, + }, + { + hexDecode("fb7ff0000000000000"), + `Infinity`, + }, + { + hexDecode("fb7ff8000000000000"), + `NaN`, + }, + { + hexDecode("fbfff0000000000000"), + `-Infinity`, + }, + { + hexDecode("f4"), + `false`, + }, + { + hexDecode("f5"), + `true`, + }, + { + hexDecode("f6"), + `null`, + }, + { + hexDecode("f7"), + `undefined`, + }, + { + hexDecode("f0"), + `simple(16)`, + }, + { + hexDecode("f8ff"), + `simple(255)`, + }, + { + hexDecode("c074323031332d30332d32315432303a30343a30305a"), + `0("2013-03-21T20:04:00Z")`, + }, + { + hexDecode("c11a514b67b0"), + `1(1363896240)`, + }, + { + hexDecode("c1fb41d452d9ec200000"), + `1(1363896240.5)`, + }, + { + hexDecode("d74401020304"), + `23(h'01020304')`, + }, + { + hexDecode("d818456449455446"), + `24(h'6449455446')`, + }, + { + hexDecode("d82076687474703a2f2f7777772e6578616d706c652e636f6d"), + `32("http://www.example.com")`, + }, + { + hexDecode("40"), + `h''`, + }, + { + hexDecode("4401020304"), + `h'01020304'`, + }, + { + hexDecode("60"), + `""`, + }, + { + hexDecode("6161"), + `"a"`, + }, + { + hexDecode("6449455446"), + `"IETF"`, + }, + { + hexDecode("62225c"), + `"\"\\"`, + }, + { + hexDecode("62c3bc"), + `"\u00fc"`, + }, + { + hexDecode("63e6b0b4"), + `"\u6c34"`, + }, + { + hexDecode("64f0908591"), + `"\ud800\udd51"`, + }, + { + hexDecode("80"), + `[]`, + }, + { + hexDecode("83010203"), + `[1, 2, 3]`, + }, + { + hexDecode("8301820203820405"), + `[1, [2, 3], [4, 5]]`, + }, + { + hexDecode("98190102030405060708090a0b0c0d0e0f101112131415161718181819"), + `[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]`, + }, + { + hexDecode("a0"), + `{}`, + }, + { + hexDecode("a201020304"), + `{1: 2, 3: 4}`, + }, + { + hexDecode("a26161016162820203"), + `{"a": 1, "b": [2, 3]}`, + }, + { + hexDecode("826161a161626163"), + `["a", {"b": "c"}]`, + }, + { + hexDecode("a56161614161626142616361436164614461656145"), + `{"a": "A", "b": "B", "c": "C", "d": "D", "e": "E"}`, + }, + { + hexDecode("5f42010243030405ff"), + `(_ h'0102', h'030405')`, + }, + { + hexDecode("7f657374726561646d696e67ff"), + `(_ "strea", "ming")`, + }, + { + hexDecode("9fff"), + `[_ ]`, + }, + { + hexDecode("9f018202039f0405ffff"), + `[_ 1, [2, 3], [_ 4, 5]]`, + }, + { + hexDecode("9f01820203820405ff"), + `[_ 1, [2, 3], [4, 5]]`, + }, + { + hexDecode("83018202039f0405ff"), + `[1, [2, 3], [_ 4, 5]]`, + }, + { + hexDecode("83019f0203ff820405"), + `[1, [_ 2, 3], [4, 5]]`, + }, + { + hexDecode("9f0102030405060708090a0b0c0d0e0f101112131415161718181819ff"), + `[_ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]`, + }, + { + hexDecode("bf61610161629f0203ffff"), + `{_ "a": 1, "b": [_ 2, 3]}`, + }, + { + hexDecode("826161bf61626163ff"), + `["a", {_ "b": "c"}]`, + }, + { + hexDecode("bf6346756ef563416d7421ff"), + `{_ "Fun": true, "Amt": -2}`, + }, + } + + for i, tc := range testCases { + t.Run(fmt.Sprintf("Diagnostic %d", i), func(t *testing.T) { + str, err := Diagnose(tc.cbor) + if err != nil { + t.Errorf("Diagnostic(0x%x) returned error %q", tc.cbor, err) + } else if str != tc.diag { + t.Errorf("Diagnostic(0x%x) returned `%s`, want `%s`", tc.cbor, str, tc.diag) + } + + str, rest, err := DiagnoseFirst(tc.cbor) + if err != nil { + t.Errorf("Diagnostic(0x%x) returned error %q", tc.cbor, err) + } else if str != tc.diag { + t.Errorf("Diagnostic(0x%x) returned `%s`, want `%s`", tc.cbor, str, tc.diag) + } + + if rest == nil { + t.Errorf("Diagnostic(0x%x) returned nil rest", tc.cbor) + } else if len(rest) != 0 { + t.Errorf("Diagnostic(0x%x) returned non-empty rest '%x'", tc.cbor, rest) + } + }) + } +} + +func TestDiagnoseByteString(t *testing.T) { + testCases := []struct { + title string + cbor []byte + diag string + opts *DiagOptions + }{ + { + "base16", + hexDecode("4412345678"), + `h'12345678'`, + &DiagOptions{ + ByteStringEncoding: ByteStringBase16Encoding, + }, + }, + { + "base32", + hexDecode("4412345678"), + `b32'CI2FM6A'`, + &DiagOptions{ + ByteStringEncoding: ByteStringBase32Encoding, + }, + }, + { + "base32hex", + hexDecode("4412345678"), + `h32'28Q5CU0'`, + &DiagOptions{ + ByteStringEncoding: ByteStringBase32HexEncoding, + }, + }, + { + "base64", + hexDecode("4412345678"), + `b64'EjRWeA'`, + &DiagOptions{ + ByteStringEncoding: ByteStringBase64Encoding, + }, + }, + { + "without ByteStringHexWhitespace option", + hexDecode("4b48656c6c6f20776f726c64"), + `h'48656c6c6f20776f726c64'`, + &DiagOptions{ + ByteStringHexWhitespace: false, + }, + }, + { + "with ByteStringHexWhitespace option", + hexDecode("4b48656c6c6f20776f726c64"), + `h'48 65 6c 6c 6f 20 77 6f 72 6c 64'`, + &DiagOptions{ + ByteStringHexWhitespace: true, + }, + }, + { + "without ByteStringText option", + hexDecode("4b68656c6c6f20776f726c64"), + `h'68656c6c6f20776f726c64'`, + &DiagOptions{ + ByteStringText: false, + }, + }, + { + "with ByteStringText option", + hexDecode("4b68656c6c6f20776f726c64"), + `'hello world'`, + &DiagOptions{ + ByteStringText: true, + }, + }, + { + "without ByteStringText option and with ByteStringHexWhitespace option", + hexDecode("4b68656c6c6f20776f726c64"), + `h'68 65 6c 6c 6f 20 77 6f 72 6c 64'`, + &DiagOptions{ + ByteStringText: false, + ByteStringHexWhitespace: true, + }, + }, + { + "without ByteStringEmbeddedCBOR and CBORSequence option", + hexDecode("4101"), + `h'01'`, + &DiagOptions{ + ByteStringEmbeddedCBOR: false, + CBORSequence: false, + }, + }, + { + "with ByteStringEmbeddedCBOR and CBORSequence option", + hexDecode("4101"), + `<<1>>`, + &DiagOptions{ + ByteStringEmbeddedCBOR: true, + CBORSequence: true, + }, + }, + { + "without ByteStringEmbeddedCBOR and CBORSequence option", + hexDecode("420102"), + `h'0102'`, + &DiagOptions{ + ByteStringEmbeddedCBOR: false, + CBORSequence: false, + }, + }, + { + "with ByteStringEmbeddedCBOR and CBORSequence option", + hexDecode("420102"), + `<<1, 2>>`, + &DiagOptions{ + ByteStringEmbeddedCBOR: true, + CBORSequence: true, + }, + }, + { + "with CBORSequence option", + hexDecode("0102"), + `1, 2`, + &DiagOptions{ + CBORSequence: true, + }, + }, + { + "with ByteStringEmbeddedCBOR and CBORSequence option", + hexDecode("4563666F6FF6"), + `h'63666f6ff6'`, + &DiagOptions{ + ByteStringEmbeddedCBOR: false, + CBORSequence: false, + }, + }, + { + "with ByteStringEmbeddedCBOR and CBORSequence option", + hexDecode("4563666F6FF6"), + `<<"foo", null>>`, + &DiagOptions{ + ByteStringEmbeddedCBOR: true, + CBORSequence: true, + }, + }, + { + "with ByteStringEmbeddedCBOR and without CBORSequence option", + hexDecode("4563666F6FF6"), + `<<"foo", null>>`, + &DiagOptions{ + ByteStringEmbeddedCBOR: true, + CBORSequence: false, + }, + }, + { + "with CBORSequence option", + hexDecode("63666F6FF6"), + `"foo", null`, + &DiagOptions{ + CBORSequence: true, + }, + }, + { + "indefinite length byte string with no chunks", + hexDecode("5fff"), + `''_`, + &DiagOptions{}, + }, + { + "indefinite length byte string with a empty byte string", + hexDecode("5f40ff"), + `(_ h'')`, // RFC 8949, Section 8.1 says `(_ '')` but it looks wrong and conflicts with Appendix A. + &DiagOptions{}, + }, + { + "indefinite length byte string with two empty byte string", + hexDecode("5f4040ff"), + `(_ h'', h'')`, + &DiagOptions{}, + }, + } + + for _, tc := range testCases { + t.Run(tc.title, func(t *testing.T) { + dm, err := tc.opts.DiagMode() + if err != nil { + t.Errorf("DiagMode() for 0x%x returned error %q", tc.cbor, err) + } + + str, err := dm.Diagnose(tc.cbor) + if err != nil { + t.Errorf("Diagnose(0x%x) returned error %q", tc.cbor, err) + } else if str != tc.diag { + t.Errorf("Diagnose(0x%x) returned `%s`, want %s", tc.cbor, str, tc.diag) + } + }) + } + + t.Run("invalid encoding", func(t *testing.T) { + opts := &DiagOptions{ + ByteStringEncoding: ByteStringBase64Encoding + 1, + } + _, err := opts.DiagMode() + if err == nil { + t.Errorf("DiagMode() with invalid ByteStringEncoding option didn't return error") + } + }) + + t.Run("without CBORSequence option", func(t *testing.T) { + cborData := hexDecode("63666F6FF6") + _, err := Diagnose(cborData) + if err == nil { + t.Errorf("Diagnose(0x%x) didn't return error", cborData) + } else if !strings.Contains(err.Error(), `extraneous data`) { + t.Errorf("Diagnose(0x%x) returned error %q", cborData, err) + } + }) + + t.Run("invalid indefinite length byte string", func(t *testing.T) { + cborData := hexDecode("5f4060ff") + _, err := Diagnose(cborData) + if err == nil { + t.Errorf("Diagnose(0x%x) didn't return error", cborData) + } else if !strings.Contains(err.Error(), `wrong element type`) { + t.Errorf("Diagnose(0x%x) returned error %q", cborData, err) + } + }) +} + +func TestDiagnoseTextString(t *testing.T) { + testCases := []struct { + title string + cbor []byte + diag string + opts *DiagOptions + }{ + { + "valid UTF-8 text in byte string", + hexDecode("4d68656c6c6f2c20e4bda0e5a5bd"), + `'hello, \u4f60\u597d'`, + &DiagOptions{ + ByteStringText: true, + }, + }, + { + "valid UTF-8 text in text string", + hexDecode("6d68656c6c6f2c20e4bda0e5a5bd"), + `"hello, \u4f60\u597d"`, // "hello, 你好" + &DiagOptions{ + ByteStringText: true, + }, + }, + { + "invalid UTF-8 text in byte string", + hexDecode("4d68656c6c6fffeee4bda0e5a5bd"), + `h'68656c6c6fffeee4bda0e5a5bd'`, + &DiagOptions{ + ByteStringText: true, + }, + }, + // { + // "invalid UTF-8 text in text string", + // hexDecode("6d68656c6c6fffeee4bda0e5a5bd"), + // `"hello\u00ff\u00ee\u4f60\u597d"`, + // &DiagOptions{ + // ByteStringText: true, + // }, + // }, + { + "valid grapheme cluster text in byte string", + hexDecode("583448656c6c6f2c2027e29da4efb88fe2808df09f94a5270ae4bda0e5a5bdefbc8c22f09fa791e2808df09fa49de2808df09fa79122"), + `'Hello, \'\u2764\ufe0f\u200d\ud83d\udd25\'\n\u4f60\u597d\uff0c"\ud83e\uddd1\u200d\ud83e\udd1d\u200d\ud83e\uddd1"'`, + &DiagOptions{ + ByteStringText: true, + }, + }, + { + "valid grapheme cluster text in text string", + hexDecode("783448656c6c6f2c2027e29da4efb88fe2808df09f94a5270ae4bda0e5a5bdefbc8c22f09fa791e2808df09fa49de2808df09fa79122"), + `"Hello, '\u2764\ufe0f\u200d\ud83d\udd25'\n\u4f60\u597d\uff0c\"\ud83e\uddd1\u200d\ud83e\udd1d\u200d\ud83e\uddd1\""`, // "Hello, '❤️‍🔥'\n你好,\"🧑‍🤝‍🧑\"" + &DiagOptions{ + ByteStringText: true, + }, + }, + { + "invalid grapheme cluster text in byte string", + hexDecode("583448656c6c6feeff27e29da4efb88fe2808df09f94a5270de4bda0e5a5bdefbc8c22f09fa791e2808df09fa49de2808df09fa79122"), + `h'48656c6c6feeff27e29da4efb88fe2808df09f94a5270de4bda0e5a5bdefbc8c22f09fa791e2808df09fa49de2808df09fa79122'`, + &DiagOptions{ + ByteStringText: true, + }, + }, + // { + // "invalid grapheme cluster text in text string", + // hexDecode("783448656c6c6feeff27e29da4efb88fe2808df09f94a5270de4bda0e5a5bdefbc8c22f09fa791e2808df09fa49de2808df09fa79122"), + // `"Hello\u00ee\u00ff'\u2764\ufe0f\u200d\ud83d\udd25'\r\u4f60\u597d\uff0c\"\ud83e\uddd1\u200d\ud83e\udd1d\u200d\ud83e\uddd1\""`, + // &DiagOptions{ + // ByteStringText: true, + // }, + // }, + { + "indefinite length text string with no chunks", + hexDecode("7fff"), + `""_`, + &DiagOptions{}, + }, + { + "indefinite length text string with a empty text string", + hexDecode("7f60ff"), + `(_ "")`, + &DiagOptions{}, + }, + { + "indefinite length text string with two empty text string", + hexDecode("7f6060ff"), + `(_ "", "")`, + &DiagOptions{}, + }, + } + + for _, tc := range testCases { + t.Run(tc.title, func(t *testing.T) { + dm, err := tc.opts.DiagMode() + if err != nil { + t.Errorf("DiagMode() for 0x%x returned error %q", tc.cbor, err) + } + + str, err := dm.Diagnose(tc.cbor) + if err != nil { + t.Errorf("Diagnose(0x%x) returned error %q", tc.cbor, err) + } else if str != tc.diag { + t.Errorf("Diagnose(0x%x) returned `%s`, want %s", tc.cbor, str, tc.diag) + } + }) + } +} + +func TestDiagnoseInvalidTextString(t *testing.T) { + testCases := []struct { + title string + cbor []byte + wantErrorMsg string + opts *DiagOptions + }{ + { + "invalid UTF-8 text in text string", + hexDecode("6d68656c6c6fffeee4bda0e5a5bd"), + "invalid UTF-8 string", + &DiagOptions{ + ByteStringText: true, + }, + }, + { + "invalid grapheme cluster text in text string", + hexDecode("783448656c6c6feeff27e29da4efb88fe2808df09f94a5270de4bda0e5a5bdefbc8c22f09fa791e2808df09fa49de2808df09fa79122"), + "invalid UTF-8 string", + &DiagOptions{ + ByteStringText: true, + }, + }, + { + "invalid indefinite length text string", + hexDecode("7f6040ff"), + `wrong element type`, + &DiagOptions{ + ByteStringText: true, + }, + }, + } + + for _, tc := range testCases { + t.Run(tc.title, func(t *testing.T) { + dm, err := tc.opts.DiagMode() + if err != nil { + t.Errorf("DiagMode() for 0x%x returned error %q", tc.cbor, err) + } + + _, err = dm.Diagnose(tc.cbor) + if err == nil { + t.Errorf("Diagnose(0x%x) didn't return error", tc.cbor) + } else if !strings.Contains(err.Error(), tc.wantErrorMsg) { + t.Errorf("Diagnose(0x%x) returned error %q", tc.cbor, err) + } + }) + } +} + +func TestDiagnoseFloatingPointNumber(t *testing.T) { + testCases := []struct { + title string + cbor []byte + diag string + opts *DiagOptions + }{ + { + "float16 without FloatPrecisionIndicator option", + hexDecode("f93e00"), + `1.5`, + &DiagOptions{ + FloatPrecisionIndicator: false, + }, + }, + { + "float16 with FloatPrecisionIndicator option", + hexDecode("f93e00"), + `1.5_1`, + &DiagOptions{ + FloatPrecisionIndicator: true, + }, + }, + { + "float32 without FloatPrecisionIndicator option", + hexDecode("fa47c35000"), + `100000.0`, + &DiagOptions{ + FloatPrecisionIndicator: false, + }, + }, + { + "float32 with FloatPrecisionIndicator option", + hexDecode("fa47c35000"), + `100000.0_2`, + &DiagOptions{ + FloatPrecisionIndicator: true, + }, + }, + { + "float64 without FloatPrecisionIndicator option", + hexDecode("fbc010666666666666"), + `-4.1`, + &DiagOptions{ + FloatPrecisionIndicator: false, + }, + }, + { + "float64 with FloatPrecisionIndicator option", + hexDecode("fbc010666666666666"), + `-4.1_3`, + &DiagOptions{ + FloatPrecisionIndicator: true, + }, + }, + { + "with FloatPrecisionIndicator option", + hexDecode("c1fb41d452d9ec200000"), + `1(1363896240.5_3)`, + &DiagOptions{ + FloatPrecisionIndicator: true, + }, + }, + } + + for _, tc := range testCases { + t.Run(tc.title, func(t *testing.T) { + dm, err := tc.opts.DiagMode() + if err != nil { + t.Errorf("DiagMode() for 0x%x returned error %q", tc.cbor, err) + } + + str, err := dm.Diagnose(tc.cbor) + if err != nil { + t.Errorf("Diagnose(0x%x) returned error %q", tc.cbor, err) + } else if str != tc.diag { + t.Errorf("Diagnose(0x%x) returned `%s`, want %s", tc.cbor, str, tc.diag) + } + }) + } +} + +func TestDiagnoseFirst(t *testing.T) { + testCases := []struct { + title string + cbor []byte + diag string + wantRest []byte + wantErrorMsg string + }{ + { + "with no trailing data", + hexDecode("f93e00"), + `1.5`, + []byte{}, + "", + }, + { + "with CBOR Sequences", + hexDecode("f93e0064494554464401020304"), + `1.5`, + hexDecode("64494554464401020304"), + "", + }, + { + "with invalid CBOR trailing data", + hexDecode("f93e00ff494554464401020304"), + `1.5`, + hexDecode("ff494554464401020304"), + "", + }, + { + "with invalid CBOR data", + hexDecode("f93e"), + ``, + nil, + "unexpected EOF", + }, + } + + for _, tc := range testCases { + t.Run(tc.title, func(t *testing.T) { + str, rest, err := DiagnoseFirst(tc.cbor) + if str != tc.diag { + t.Errorf("DiagnoseFirst(0x%x) returned `%s`, want %s", tc.cbor, str, tc.diag) + } + + if bytes.Equal(rest, tc.wantRest) == false { + if str != tc.diag { + t.Errorf("DiagnoseFirst(0x%x) returned rest `%x`, want rest %x", tc.cbor, rest, tc.wantRest) + } + } + + switch { + case tc.wantErrorMsg == "" && err != nil: + t.Errorf("DiagnoseFirst(0x%x) returned error %q", tc.cbor, err) + case tc.wantErrorMsg != "" && err == nil: + t.Errorf("DiagnoseFirst(0x%x) returned nil error, want error %q", tc.cbor, err) + case tc.wantErrorMsg != "" && !strings.Contains(err.Error(), tc.wantErrorMsg): + t.Errorf("DiagnoseFirst(0x%x) returned error %q, want error %q", tc.cbor, err, tc.wantErrorMsg) + } + }) + } +} + +func TestDiagnoseCBORSequences(t *testing.T) { + testCases := []struct { + title string + cbor []byte + diag string + opts *DiagOptions + returnError bool + }{ + { + "CBOR Sequences without CBORSequence option", + hexDecode("f93e0064494554464401020304"), + ``, + &DiagOptions{ + CBORSequence: false, + }, + true, + }, + { + "CBOR Sequences with CBORSequence option", + hexDecode("f93e0064494554464401020304"), + `1.5, "IETF", h'01020304'`, + &DiagOptions{ + CBORSequence: true, + }, + false, + }, + { + "partial/incomplete CBOR Sequences", + hexDecode("f93e00644945544644010203"), + `1.5, "IETF"`, + &DiagOptions{ + CBORSequence: true, + }, + true, + }, + } + + for _, tc := range testCases { + t.Run(tc.title, func(t *testing.T) { + dm, err := tc.opts.DiagMode() + if err != nil { + t.Errorf("DiagMode() for 0x%x returned error %q", tc.cbor, err) + } + + str, err := dm.Diagnose(tc.cbor) + if tc.returnError && err == nil { + t.Errorf("Diagnose(0x%x) returned error %q", tc.cbor, err) + } else if !tc.returnError && err != nil { + t.Errorf("Diagnose(0x%x) returned error %q", tc.cbor, err) + } + + if str != tc.diag { + t.Errorf("Diagnose(0x%x) returned `%s`, want %s", tc.cbor, str, tc.diag) + } + }) + } +} + +func TestDiagnoseTag(t *testing.T) { + testCases := []struct { + title string + cbor []byte + diag string + opts *DiagOptions + returnError bool + }{ + { + "CBOR tag number 2 with not well-formed encoded CBOR data item", + hexDecode("c201"), + ``, + &DiagOptions{}, + true, + }, + { + "CBOR tag number 3 with not well-formed encoded CBOR data item", + hexDecode("c301"), + ``, + &DiagOptions{}, + true, + }, + { + "CBOR tag number 2 with well-formed encoded CBOR data item", + hexDecode("c240"), + `0`, + &DiagOptions{}, + false, + }, + { + "CBOR tag number 3 with well-formed encoded CBOR data item", + hexDecode("c340"), + `-1`, // -1 - n + &DiagOptions{}, + false, + }, + { + "CBOR tag number 2 with well-formed encoded CBOR data item", + hexDecode("c249010000000000000000"), + `18446744073709551616`, + &DiagOptions{}, + false, + }, + { + "CBOR tag number 3 with well-formed encoded CBOR data item", + hexDecode("c349010000000000000000"), + `-18446744073709551617`, // -1 - n + &DiagOptions{}, + false, + }, + } + + for _, tc := range testCases { + t.Run(tc.title, func(t *testing.T) { + dm, err := tc.opts.DiagMode() + if err != nil { + t.Errorf("DiagMode() for 0x%x returned error %q", tc.cbor, err) + } + + str, err := dm.Diagnose(tc.cbor) + if tc.returnError && err == nil { + t.Errorf("Diagnose(0x%x) returned error %q", tc.cbor, err) + } else if !tc.returnError && err != nil { + t.Errorf("Diagnose(0x%x) returned error %q", tc.cbor, err) + } + + if str != tc.diag { + t.Errorf("Diagnose(0x%x) returned `%s`, want %s", tc.cbor, str, tc.diag) + } + }) + } +}