From 765695accc0215b90ce7b7560ec9a3966530b422 Mon Sep 17 00:00:00 2001 From: Eemeli Aro Date: Sat, 5 Oct 2024 17:37:29 +0300 Subject: [PATCH 1/2] feat: Add stringKeys parse option --- docs/03_options.md | 1 + src/compose/compose-node.ts | 12 ++++++++++++ src/compose/compose-scalar.ts | 15 +++++++++------ src/doc/Document.ts | 1 + src/errors.ts | 1 + src/options.ts | 7 +++++++ tests/doc/parse.ts | 36 +++++++++++++++++++++++++++++++++++ 7 files changed, 67 insertions(+), 6 deletions(-) diff --git a/docs/03_options.md b/docs/03_options.md index 92c7d627..7a885161 100644 --- a/docs/03_options.md +++ b/docs/03_options.md @@ -29,6 +29,7 @@ Used by: `parse()`, `parseDocument()`, `parseAllDocuments()`, `new Composer()`, | lineCounter | `LineCounter` | | If set, newlines will be tracked, to allow for `lineCounter.linePos(offset)` to provide the `{ line, col }` positions within the input. | | prettyErrors | `boolean` | `true` | Include line/col position in errors, along with an extract of the source string. | | strict | `boolean` | `true` | When parsing, do not ignore errors [required](#silencing-errors-and-warnings) by the YAML 1.2 spec, but caused by unambiguous content. | +| stringKeys | `boolean` | `false` | Parse all mapping keys as strings. Treat all non-scalar keys as errors. | | uniqueKeys | `boolean ⎮ (a, b) => boolean` | `true` | Whether key uniqueness is checked, or customised. If set to be a function, it will be passed two parsed nodes and should return a boolean value indicating their equality. | [bigint]: https://developer.mozilla.org/en/docs/Web/JavaScript/Reference/Global_Objects/BigInt diff --git a/src/compose/compose-node.ts b/src/compose/compose-node.ts index d07eb6d8..41349c35 100644 --- a/src/compose/compose-node.ts +++ b/src/compose/compose-node.ts @@ -1,5 +1,6 @@ import type { Directives } from '../doc/directives.js' import { Alias } from '../nodes/Alias.js' +import { isScalar } from '../nodes/identity.js' import type { ParsedNode } from '../nodes/Node.js' import type { ParseOptions } from '../options.js' import type { FlowScalar, SourceToken, Token } from '../parse/cst.js' @@ -36,6 +37,7 @@ export function composeNode( props: Props, onError: ComposeErrorHandler ) { + const atKey = ctx.atKey const { spaceBefore, comment, anchor, tag } = props let node: ParsedNode let isSrcToken = true @@ -81,6 +83,16 @@ export function composeNode( } if (anchor && node.anchor === '') onError(anchor, 'BAD_ALIAS', 'Anchor cannot be an empty string') + if ( + atKey && + ctx.options.stringKeys && + (!isScalar(node) || + typeof node.value !== 'string' || + (node.tag && node.tag !== 'tag:yaml.org,2002:str')) + ) { + const msg = 'With stringKeys, all keys must be strings' + onError(tag ?? token, 'NON_STRING_KEY', msg) + } if (spaceBefore) node.spaceBefore = true if (comment) { if (token.type === 'scalar' && token.source === '') node.comment = comment diff --git a/src/compose/compose-scalar.ts b/src/compose/compose-scalar.ts index 71fbc896..f1878880 100644 --- a/src/compose/compose-scalar.ts +++ b/src/compose/compose-scalar.ts @@ -24,12 +24,15 @@ export function composeScalar( onError(tagToken, 'TAG_RESOLVE_FAILED', msg) ) : null - const tag = - tagToken && tagName - ? findScalarTagByName(ctx.schema, value, tagName, tagToken, onError) - : token.type === 'scalar' - ? findScalarTagByTest(ctx, value, token, onError) - : ctx.schema[SCALAR] + + let tag: ScalarTag + if (ctx.options.stringKeys && ctx.atKey) { + tag = ctx.schema[SCALAR] + } else if (tagName) + tag = findScalarTagByName(ctx.schema, value, tagName, tagToken!, onError) + else if (token.type === 'scalar') + tag = findScalarTagByTest(ctx, value, token, onError) + else tag = ctx.schema[SCALAR] let scalar: Scalar try { diff --git a/src/doc/Document.ts b/src/doc/Document.ts index d54d25f5..0eb38cfe 100644 --- a/src/doc/Document.ts +++ b/src/doc/Document.ts @@ -122,6 +122,7 @@ export class Document< logLevel: 'warn', prettyErrors: true, strict: true, + stringKeys: false, uniqueKeys: true, version: '1.2' }, diff --git a/src/errors.ts b/src/errors.ts index 88653643..a5b62b0c 100644 --- a/src/errors.ts +++ b/src/errors.ts @@ -18,6 +18,7 @@ export type ErrorCode = | 'MULTIPLE_ANCHORS' | 'MULTIPLE_DOCS' | 'MULTIPLE_TAGS' + | 'NON_STRING_KEY' | 'TAB_AS_INDENT' | 'TAG_RESOLVE_FAILED' | 'UNEXPECTED_TOKEN' diff --git a/src/options.ts b/src/options.ts index 33cda9bd..b586e206 100644 --- a/src/options.ts +++ b/src/options.ts @@ -48,6 +48,13 @@ export type ParseOptions = { */ strict?: boolean + /** + * Parse all mapping keys as strings. Treat all non-scalar keys as errors. + * + * Default: `false` + */ + stringKeys?: boolean + /** * YAML requires map keys to be unique. By default, this is checked by * comparing scalar values with `===`; deep equality is not checked for diff --git a/tests/doc/parse.ts b/tests/doc/parse.ts index a71a1f80..a9fc9bed 100644 --- a/tests/doc/parse.ts +++ b/tests/doc/parse.ts @@ -880,3 +880,39 @@ describe('CRLF line endings', () => { expect(res).toBe('foo bar') }) }) + +describe('stringKeys', () => { + test('success', () => { + const doc = YAML.parseDocument( + source` + x: x + !!str y: y + 42: 42 + true: true + null: null + ~: ~ + : + `, + { stringKeys: true } + ) + expect(doc.contents.items).toMatchObject([ + { key: { value: 'x' }, value: { value: 'x' } }, + { key: { value: 'y' }, value: { value: 'y' } }, + { key: { value: '42' }, value: { value: 42 } }, + { key: { value: 'true' }, value: { value: true } }, + { key: { value: 'null' }, value: { value: null } }, + { key: { value: '~' }, value: { value: null } }, + { key: { value: '' }, value: { value: null } } + ]) + }) + + test('explicit non-string tag', () => { + const doc = YAML.parseDocument('!!int 42: 42', { stringKeys: true }) + expect(doc.errors).toMatchObject([{ code: 'NON_STRING_KEY' }]) + }) + + test('collection key', () => { + const doc = YAML.parseDocument('{ x, y }: 42', { stringKeys: true }) + expect(doc.errors).toMatchObject([{ code: 'NON_STRING_KEY' }]) + }) +}) From fc24eb83747792f9113801630fb5ef9e5711b164 Mon Sep 17 00:00:00 2001 From: Eemeli Aro Date: Sat, 5 Oct 2024 17:49:17 +0300 Subject: [PATCH 2/2] Update errors list in docs --- docs/08_errors.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/08_errors.md b/docs/08_errors.md index 4b1b4450..4a33cf10 100644 --- a/docs/08_errors.md +++ b/docs/08_errors.md @@ -26,6 +26,7 @@ To identify errors for special handling, you should primarily use `code` to diff | ------------------------ | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | `ALIAS_PROPS` | Unlike scalars and collections, alias nodes cannot have an anchor or tag associated with it. | | `BAD_ALIAS` | An alias identifier must be a non-empty sequence of valid characters. | +| `BAD_COLLECTION_TYPE` | Explicit collection tag used on a collection type it does not support. | | `BAD_DIRECTIVE` | Only the `%YAML` and `%TAG` directives are supported, and they need to follow the specified structure. | | `BAD_DQ_ESCAPE` | Double-quotes strings may include `\` escaped content, but that needs to be valid. | | `BAD_INDENT` | Indentation is important in YAML, and collection items need to all start at the same level. Block scalars are also picky about their leading content. | @@ -36,12 +37,12 @@ To identify errors for special handling, you should primarily use `code` to diff | `DUPLICATE_KEY` | Map keys must be unique. Use the `uniqueKeys` option to disable or customise this check when parsing. | | `IMPOSSIBLE` | This really should not happen. If you encounter this error code, please file a bug. | | `KEY_OVER_1024_CHARS` | Due to legacy reasons, implicit keys must have their following `:` indicator after at most 1k characters. | -| `MISSING_ANCHOR` | Aliases can only dereference anchors that are before them in the document. | | `MISSING_CHAR` | Some character or characters are missing here. See the error message for what you need to add. | | `MULTILINE_IMPLICIT_KEY` | Implicit keys need to be on a single line. Does the input include a plain scalar with a `:` followed by whitespace, which is getting parsed as a map key? | | `MULTIPLE_ANCHORS` | A node is only allowed to have one anchor. | | `MULTIPLE_DOCS` | A YAML stream may include multiple documents. If yours does, you'll need to use `parseAllDocuments()` to work with it. | | `MULTIPLE_TAGS` | A node is only allowed to have one tag. | +| `NON_STRING_KEY` | With the `stringKeys` option, all mapping keys must be strings | | `TAB_AS_INDENT` | Only spaces are allowed as indentation. | | `TAG_RESOLVE_FAILED` | Something went wrong when resolving a node's tag with the current schema. | | `UNEXPECTED_TOKEN` | A token was encountered in a place where it wasn't expected. |