From cde504625a08d33a8d280829d2aa8b81de5caf0a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Mart=C3=AD?= Date: Mon, 13 May 2024 09:59:08 +0100 Subject: [PATCH] encoding/toml: validate the resulting value against toml.Unmarshal MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Given some input TOML, we can use our new encoding/toml.Decoder to obtain an ast.Node and then compile that to a cue.Value. Similarly, we can use go-toml's Unmarshal API on the same input TOML to obtain a Go value directly. Those two values should be equivalent, which we check via qt.JSONEquals. Two test cases fail this new check: the one involving duplicate keys, which is a known bug in our decoder, and the pair of tests involving empty TOML input, which we decoded as a CUE null. It seems like all TOML decoders decode empty input as an empty struct, and the TOML spec also hints that way as it allows empty tables which "simply have no key/value pairs within them". Moreover, since we create an ast.File, it seems best to leave it empty, which equals an empty struct, rather than add a "null" embedding. While here, add a TODO to remind myself to revisit literal decoding. Updates #68. Signed-off-by: Daniel Martí Change-Id: I83e34b939f1c2dd3b7928e14076f69c39e5054e0 --- encoding/toml/decode.go | 9 ++++---- encoding/toml/decode_test.go | 44 ++++++++++++++++++++++++++++++------ 2 files changed, 42 insertions(+), 11 deletions(-) diff --git a/encoding/toml/decode.go b/encoding/toml/decode.go index 8c00aee2e0f..dc11357d20b 100644 --- a/encoding/toml/decode.go +++ b/encoding/toml/decode.go @@ -68,6 +68,9 @@ func (d *Decoder) Decode() (ast.Node, error) { return nil, err } d.parser.Reset(data) + // Note that empty inputs, just like empty tables, + // result in a struct or map with zero key-values. + // The TOML spec and other decoders also work this way. file := &ast.File{} for d.parser.NextExpression() { if err := d.nextRootNode(d.parser.Expression()); err != nil { @@ -78,10 +81,6 @@ func (d *Decoder) Decode() (ast.Node, error) { file.Decls = append(file.Decls, field) } d.currentFields = d.currentFields[:0] - if len(file.Decls) == 0 { - // Empty inputs are decoded as null, much like JSON or YAML. - file.Decls = append(file.Decls, ast.NewNull()) - } return file, nil } @@ -147,6 +146,8 @@ func (d *Decoder) nextRootNode(tnode *toml.Node) error { // nextRootNode is called for every top-level expression from the TOML parser. func (d *Decoder) decodeExpr(tnode *toml.Node) (ast.Expr, error) { + // TODO(mvdan): we currently assume that TOML basic literals (string, int, float) + // are also valid CUE literals; we should double check this, perhaps via fuzzing. data := string(tnode.Data) switch tnode.Kind { case toml.String: diff --git a/encoding/toml/decode_test.go b/encoding/toml/decode_test.go index 009a84afd3e..e3b5e35c73d 100644 --- a/encoding/toml/decode_test.go +++ b/encoding/toml/decode_test.go @@ -15,13 +15,18 @@ package toml_test import ( + "encoding/json" "io" "strings" "testing" + "github.com/go-quicktest/qt" + gotoml "github.com/pelletier/go-toml/v2" + + "cuelang.org/go/cue/ast" + "cuelang.org/go/cue/cuecontext" "cuelang.org/go/cue/format" "cuelang.org/go/encoding/toml" - "github.com/go-quicktest/qt" ) func TestDecoder(t *testing.T) { @@ -36,13 +41,13 @@ func TestDecoder(t *testing.T) { }{{ name: "Empty", input: "", - want: "null", + want: "", }, { name: "LoneComment", input: ` # Just a comment `, - want: "null", + want: "", }, { name: "RootKeysOne", input: ` @@ -288,10 +293,35 @@ line two.\ qt.Assert(t, qt.IsNil(err)) qt.Assert(t, qt.Equals(string(formatted), string(wantFormatted))) - // TODO(mvdan): validate that the decoded CUE values are equivalent - // to the Go values that a direct TOML unmarshal would produce. - // For example, compare JSON equality between the CUE encoded as JSON - // and the TOML decoded into `any` and encoded as JSON. + // Ensure that the CUE node can be compiled into a cue.Value and validated. + ctx := cuecontext.New() + // TODO(mvdan): cue.Context can only build ast.Expr or ast.File, not ast.Node; + // it's then likely not the right choice for the interface to return ast.Node. + val := ctx.BuildFile(node.(*ast.File)) + qt.Assert(t, qt.IsNil(val.Err())) + qt.Assert(t, qt.IsNil(val.Validate())) + + // See the TODO above; go-toml rejects duplicate keys per the spec, + // but our decoder does not yet. + if test.name == "RootKeysDuplicate" { + return + } + + // Validate that the decoded CUE value is equivalent + // to the Go value that a direct TOML unmarshal produces. + // We use JSON equality as some details such as which integer types are used + // are not actually relevant to an "equal data" check. + var unmarshalTOML any + err = gotoml.Unmarshal([]byte(test.input), &unmarshalTOML) + qt.Assert(t, qt.IsNil(err)) + jsonTOML, err := json.Marshal(unmarshalTOML) + qt.Assert(t, qt.IsNil(err)) + t.Logf("json.Marshal via go-toml:\t%s\n", jsonTOML) + + jsonCUE, err := json.Marshal(val) + qt.Assert(t, qt.IsNil(err)) + t.Logf("json.Marshal via CUE:\t%s\n", jsonCUE) + qt.Assert(t, qt.JSONEquals(jsonCUE, unmarshalTOML)) }) } }