feat: add encodedLength() function (#48)

rvagg · Mar 30, 2022 · e8be1c0 · e8be1c0
1 parent 8d346dd
commit e8be1c0
Show file tree

Hide file tree

Showing 13 changed files with 211 additions and 30 deletions.
diff --git a/README.md b/README.md
@@ -27,6 +27,7 @@
     * [Options](#options)
   * [`decode(data[, options])`](#decodedata-options)
     * [Options](#options-1)
+  * [`encodedLength(data[, options])`](#encodedlengthdata-options)
   * [Type encoders](#type-encoders)
   * [Tag decoders](#tag-decoders)
 * [Deterministic encoding recommendations](#deterministic-encoding-recommendations)
@@ -250,6 +251,20 @@ Decode valid CBOR bytes from a `Uint8Array` (or `Buffer`) and return a JavaScrip
 * `tags` (array): a mapping of tag number to tag decoder function. By default no tags are supported. See [Tag decoders](#tag-decoders).
 * `tokenizer` (object): an object with two methods, `next()` which returns a `Token` and `done()` which returns a `boolean`. Can be used to implement custom input decoding. See the source code for examples.
 
+### `encodedLength(data[, options])`
+
+```js
+import { encodedLength } from 'cborg/length'
+```
+
+```js
+const { encodedLength } = require('cborg/length')
+```
+
+Calculate the byte length of the given data when encoded as CBOR with the options provided. The options are the same as for an `encode()` call. This calculation will be accurate if the same options are used as when performing a normal `encode()`. Some encode options can change the encoding output length.
+
+A `tokensToLength()` function is available which deals directly with a tokenized form of the object, but this only recommended for advanced users.
+
 ### Type encoders
 
 The `typeEncoders` property to the `options` argument to `encode()` allows you to add additional functionality to cborg, or override existing functionality.

diff --git a/interface.ts b/interface.ts
@@ -16,6 +16,7 @@ export type StrictTypeEncoder = (data: any, typ: string, options: EncodeOptions,
 export type TokenTypeEncoder = {
   (buf: Bl, token: Token, options?: EncodeOptions): void;
   compareTokens(t1: Token, t2: Token): number;
+  // TODO: make this non-optional as a breaking change and remove the throw in length.js
   encodedSize?(token: Token, options?: EncodeOptions): number;
 }
 

diff --git a/lib/4array.js b/lib/4array.js
@@ -103,3 +103,11 @@ export function encodeArray (buf, token) {
 // using an array as a map key, are you sure about this? we can only sort
 // by map length here, it's up to the encoder to decide to look deeper
 encodeArray.compareTokens = uint.encodeUint.compareTokens
+
+/**
+ * @param {Token} token
+ * @returns {number}
+ */
+encodeArray.encodedSize = function encodedSize (token) {
+  return uint.encodeUintValue.encodedSize(token.value)
+}
diff --git a/lib/5map.js b/lib/5map.js
@@ -103,3 +103,11 @@ export function encodeMap (buf, token) {
 // using a map as a map key, are you sure about this? we can only sort
 // by map length here, it's up to the encoder to decide to look deeper
 encodeMap.compareTokens = uint.encodeUint.compareTokens
+
+/**
+ * @param {Token} token
+ * @returns {number}
+ */
+encodeMap.encodedSize = function encodedSize (token) {
+  return uint.encodeUintValue.encodedSize(token.value)
+}
diff --git a/lib/6tag.js b/lib/6tag.js
@@ -70,3 +70,11 @@ export function encodeTag (buf, token) {
 }
 
 encodeTag.compareTokens = uint.encodeUint.compareTokens
+
+/**
+ * @param {Token} token
+ * @returns {number}
+ */
+encodeTag.encodedSize = function encodedSize (token) {
+  return uint.encodeUintValue.encodedSize(token.value)
+}
diff --git a/lib/7float.js b/lib/7float.js
@@ -154,10 +154,9 @@ encodeFloat.encodedSize = function encodedSize (token, options) {
     return 1
   }
 
-  let decoded
   if (!options || options.float64 !== true) {
     encodeFloat16(float)
-    decoded = readFloat16(ui8a, 1)
+    let decoded = readFloat16(ui8a, 1)
     if (float === decoded || Number.isNaN(float)) {
       return 3
     }

diff --git a/lib/bl.js b/lib/bl.js
@@ -42,9 +42,11 @@ export class Bl {
   }
 
   reset () {
-    this.chunks = []
     this.cursor = 0
     this.maxCursor = -1
+    if (this.chunks.length) {
+      this.chunks = []
+    }
     if (this._initReuseChunk !== null) {
       this.chunks.push(this._initReuseChunk)
       this.maxCursor = this._initReuseChunk.length - 1

diff --git a/lib/encode.js b/lib/encode.js
@@ -30,16 +30,21 @@ const defaultEncodeOptions = {
   quickEncodeToken
 }
 
-/** @type {TokenTypeEncoder[]} */
-const cborEncoders = []
-cborEncoders[Type.uint.major] = encodeUint
-cborEncoders[Type.negint.major] = encodeNegint
-cborEncoders[Type.bytes.major] = encodeBytes
-cborEncoders[Type.string.major] = encodeString
-cborEncoders[Type.array.major] = encodeArray
-cborEncoders[Type.map.major] = encodeMap
-cborEncoders[Type.tag.major] = encodeTag
-cborEncoders[Type.float.major] = encodeFloat
+/** @returns {TokenTypeEncoder[]} */
+export function makeCborEncoders () {
+  const encoders = []
+  encoders[Type.uint.major] = encodeUint
+  encoders[Type.negint.major] = encodeNegint
+  encoders[Type.bytes.major] = encodeBytes
+  encoders[Type.string.major] = encodeString
+  encoders[Type.array.major] = encodeArray
+  encoders[Type.map.major] = encodeMap
+  encoders[Type.tag.major] = encodeTag
+  encoders[Type.float.major] = encodeFloat
+  return encoders
+}
+
+const cborEncoders = makeCborEncoders()
 
 const buf = new Bl()
 
@@ -441,6 +446,7 @@ function encodeCustom (data, encoders, options) {
       return asU8A(buf.chunks[0])
     }
   }
+  buf.reset()
   tokensToEncoded(buf, tokens, encoders, options)
   return buf.toBytes(true)
 }

diff --git a/lib/length.js b/lib/length.js
@@ -0,0 +1,61 @@
+import { makeCborEncoders, objectToTokens } from './encode.js'
+import { quickEncodeToken } from './jump.js'
+
+/**
+ * @typedef {import('../interface').EncodeOptions} EncodeOptions
+ * @typedef {import('../interface').TokenTypeEncoder} TokenTypeEncoder
+ * @typedef {import('../interface').TokenOrNestedTokens} TokenOrNestedTokens
+ */
+
+const cborEncoders = makeCborEncoders()
+
+/** @type {EncodeOptions} */
+const defaultEncodeOptions = {
+  float64: false,
+  quickEncodeToken
+}
+
+/**
+ * Calculate the byte length of the given data when encoded as CBOR with the
+ * options provided.
+ * This calculation will be accurate if the same options are used as when
+ * performing a normal encode. Some encode options can change the encoding
+ * output length.
+ *
+ * @param {any} data
+ * @param {EncodeOptions} [options]
+ * @returns {number}
+ */
+export function encodedLength (data, options) {
+  options = Object.assign({}, defaultEncodeOptions, options)
+  options.mapSorter = undefined // won't change the length
+  const tokens = objectToTokens(data, options)
+  return tokensToLength(tokens, cborEncoders, options)
+}
+
+/**
+ * Calculate the byte length of the data as represented by the given tokens when
+ * encoded as CBOR with the options provided.
+ * This function is for advanced users and would not normally be called
+ * directly. See `encodedLength()` for appropriate use.
+ *
+ * @param {TokenOrNestedTokens} tokens
+ * @param {TokenTypeEncoder[]} [encoders]
+ * @param {EncodeOptions} [options]
+ */
+export function tokensToLength (tokens, encoders = cborEncoders, options = defaultEncodeOptions) {
+  if (Array.isArray(tokens)) {
+    let len = 0
+    for (const token of tokens) {
+      len += tokensToLength(token, encoders, options)
+    }
+    return len
+  } else {
+    const encoder = encoders[tokens.type.major]
+    /* c8 ignore next 3 */
+    if (encoder.encodedSize === undefined || typeof encoder.encodedSize !== 'function') {
+      throw new Error(`Encoder for ${tokens.type.name} does not have an encodedSize()`)
+    }
+    return encoder.encodedSize(tokens, options)
+  }
+}
diff --git a/package.json b/package.json
@@ -46,6 +46,9 @@
     ".": {
       "import": "./cborg.js"
     },
+    "./length": {
+      "import": "./lib/length.js"
+    },
     "./taglib": {
       "import": "./taglib.js"
     },
@@ -59,6 +62,9 @@
       "json": [
         "types/lib/json/json.d.ts"
       ],
+      "length": [
+        "types/lib/length.d.ts"
+      ],
       "*": [
         "types/*"
       ],

diff --git a/test/common.js b/test/common.js
@@ -0,0 +1,18 @@
+import { Token, Type } from '../lib/token.js'
+
+export function dateDecoder (obj) {
+  if (typeof obj !== 'string') {
+    throw new Error('expected string for tag 1')
+  }
+  return new Date(obj)
+}
+
+export function dateEncoder (obj) {
+  if (!(obj instanceof Date)) {
+    throw new Error('expected Date for "Date" encoder')
+  }
+  return [
+    new Token(Type.tag, 0),
+    new Token(Type.string, obj.toISOString().replace(/\.000Z$/, 'Z'))
+  ]
+}
diff --git a/test/test-6tag.js b/test/test-6tag.js
@@ -5,26 +5,10 @@ import chai from 'chai'
 import { Token, Type } from '../lib/token.js'
 import { decode, encode } from '../cborg.js'
 import { fromHex, toHex } from '../lib/byte-utils.js'
+import { dateDecoder, dateEncoder } from './common.js'
 
 const { assert } = chai
 
-function dateDecoder (obj) {
-  if (typeof obj !== 'string') {
-    throw new Error('expected string for tag 1')
-  }
-  return new Date(obj)
-}
-
-function dateEncoder (obj) {
-  if (!(obj instanceof Date)) {
-    throw new Error('expected Date for "Date" encoder')
-  }
-  return [
-    new Token(Type.tag, 0),
-    new Token(Type.string, obj.toISOString().replace(/\.000Z$/, 'Z'))
-  ]
-}
-
 function Uint16ArrayDecoder (obj) {
   if (typeof obj !== 'string') {
     throw new Error('expected string for tag 23')

diff --git a/test/test-length.js b/test/test-length.js
@@ -0,0 +1,65 @@
+/* eslint-env mocha */
+
+import chai from 'chai'
+import { garbage } from 'ipld-garbage'
+import { uintBoundaries } from '../lib/0uint.js'
+import { encode } from '../cborg.js'
+import { encodedLength } from '../lib/length.js'
+import { dateEncoder } from './common.js'
+
+const { assert } = chai
+
+function verifyLength (object, options) {
+  const len = encodedLength(object, options)
+  const encoded = encode(object, options)
+  const actual = encoded.length
+  assert.strictEqual(actual, len, JSON.stringify(object))
+}
+
+describe('encodedLength', () => {
+  it('int boundaries', () => {
+    for (let ii = 0; ii < 4; ii++) {
+      verifyLength(uintBoundaries[ii])
+      verifyLength(uintBoundaries[ii] - 1)
+      verifyLength(uintBoundaries[ii] + 1)
+      verifyLength(-1 * uintBoundaries[ii])
+      verifyLength(-1 * uintBoundaries[ii] - 1)
+      verifyLength(-1 * uintBoundaries[ii] + 1)
+    }
+  })
+
+  it('tags', () => {
+    verifyLength({ date: new Date('2013-03-21T20:04:00Z') }, { typeEncoders: { Date: dateEncoder } })
+  })
+
+  it('floats', () => {
+    verifyLength(0.5)
+    verifyLength(0.5, { float64: true })
+    verifyLength(8.940696716308594e-08)
+    verifyLength(8.940696716308594e-08, { float64: true })
+  })
+
+  it('small garbage', function () {
+    this.timeout(10000)
+    for (let ii = 0; ii < 1000; ii++) {
+      const gbg = garbage(1 << 6, { weights: { CID: 0 } })
+      verifyLength(gbg)
+    }
+  })
+
+  it('medium garbage', function () {
+    this.timeout(10000)
+    for (let ii = 0; ii < 100; ii++) {
+      const gbg = garbage(1 << 16, { weights: { CID: 0 } })
+      verifyLength(gbg)
+    }
+  })
+
+  it('large garbage', function () {
+    this.timeout(10000)
+    for (let ii = 0; ii < 10; ii++) {
+      const gbg = garbage(1 << 20, { weights: { CID: 0 } })
+      verifyLength(gbg)
+    }
+  })
+})