Skip to content

Commit

Permalink
feat: add encodedLength() function (#48)
Browse files Browse the repository at this point in the history
  • Loading branch information
rvagg authored Mar 30, 2022
1 parent 8d346dd commit e8be1c0
Show file tree
Hide file tree
Showing 13 changed files with 211 additions and 30 deletions.
15 changes: 15 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
* [Options](#options)
* [`decode(data[, options])`](#decodedata-options)
* [Options](#options-1)
* [`encodedLength(data[, options])`](#encodedlengthdata-options)
* [Type encoders](#type-encoders)
* [Tag decoders](#tag-decoders)
* [Deterministic encoding recommendations](#deterministic-encoding-recommendations)
Expand Down Expand Up @@ -250,6 +251,20 @@ Decode valid CBOR bytes from a `Uint8Array` (or `Buffer`) and return a JavaScrip
* `tags` (array): a mapping of tag number to tag decoder function. By default no tags are supported. See [Tag decoders](#tag-decoders).
* `tokenizer` (object): an object with two methods, `next()` which returns a `Token` and `done()` which returns a `boolean`. Can be used to implement custom input decoding. See the source code for examples.

### `encodedLength(data[, options])`

```js
import { encodedLength } from 'cborg/length'
```

```js
const { encodedLength } = require('cborg/length')
```

Calculate the byte length of the given data when encoded as CBOR with the options provided. The options are the same as for an `encode()` call. This calculation will be accurate if the same options are used as when performing a normal `encode()`. Some encode options can change the encoding output length.

A `tokensToLength()` function is available which deals directly with a tokenized form of the object, but this only recommended for advanced users.

### Type encoders

The `typeEncoders` property to the `options` argument to `encode()` allows you to add additional functionality to cborg, or override existing functionality.
Expand Down
1 change: 1 addition & 0 deletions interface.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ export type StrictTypeEncoder = (data: any, typ: string, options: EncodeOptions,
export type TokenTypeEncoder = {
(buf: Bl, token: Token, options?: EncodeOptions): void;
compareTokens(t1: Token, t2: Token): number;
// TODO: make this non-optional as a breaking change and remove the throw in length.js
encodedSize?(token: Token, options?: EncodeOptions): number;
}

Expand Down
8 changes: 8 additions & 0 deletions lib/4array.js
Original file line number Diff line number Diff line change
Expand Up @@ -103,3 +103,11 @@ export function encodeArray (buf, token) {
// using an array as a map key, are you sure about this? we can only sort
// by map length here, it's up to the encoder to decide to look deeper
encodeArray.compareTokens = uint.encodeUint.compareTokens

/**
* @param {Token} token
* @returns {number}
*/
encodeArray.encodedSize = function encodedSize (token) {
return uint.encodeUintValue.encodedSize(token.value)
}
8 changes: 8 additions & 0 deletions lib/5map.js
Original file line number Diff line number Diff line change
Expand Up @@ -103,3 +103,11 @@ export function encodeMap (buf, token) {
// using a map as a map key, are you sure about this? we can only sort
// by map length here, it's up to the encoder to decide to look deeper
encodeMap.compareTokens = uint.encodeUint.compareTokens

/**
* @param {Token} token
* @returns {number}
*/
encodeMap.encodedSize = function encodedSize (token) {
return uint.encodeUintValue.encodedSize(token.value)
}
8 changes: 8 additions & 0 deletions lib/6tag.js
Original file line number Diff line number Diff line change
Expand Up @@ -70,3 +70,11 @@ export function encodeTag (buf, token) {
}

encodeTag.compareTokens = uint.encodeUint.compareTokens

/**
* @param {Token} token
* @returns {number}
*/
encodeTag.encodedSize = function encodedSize (token) {
return uint.encodeUintValue.encodedSize(token.value)
}
3 changes: 1 addition & 2 deletions lib/7float.js
Original file line number Diff line number Diff line change
Expand Up @@ -154,10 +154,9 @@ encodeFloat.encodedSize = function encodedSize (token, options) {
return 1
}

let decoded
if (!options || options.float64 !== true) {
encodeFloat16(float)
decoded = readFloat16(ui8a, 1)
let decoded = readFloat16(ui8a, 1)
if (float === decoded || Number.isNaN(float)) {
return 3
}
Expand Down
4 changes: 3 additions & 1 deletion lib/bl.js
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,11 @@ export class Bl {
}

reset () {
this.chunks = []
this.cursor = 0
this.maxCursor = -1
if (this.chunks.length) {
this.chunks = []
}
if (this._initReuseChunk !== null) {
this.chunks.push(this._initReuseChunk)
this.maxCursor = this._initReuseChunk.length - 1
Expand Down
26 changes: 16 additions & 10 deletions lib/encode.js
Original file line number Diff line number Diff line change
Expand Up @@ -30,16 +30,21 @@ const defaultEncodeOptions = {
quickEncodeToken
}

/** @type {TokenTypeEncoder[]} */
const cborEncoders = []
cborEncoders[Type.uint.major] = encodeUint
cborEncoders[Type.negint.major] = encodeNegint
cborEncoders[Type.bytes.major] = encodeBytes
cborEncoders[Type.string.major] = encodeString
cborEncoders[Type.array.major] = encodeArray
cborEncoders[Type.map.major] = encodeMap
cborEncoders[Type.tag.major] = encodeTag
cborEncoders[Type.float.major] = encodeFloat
/** @returns {TokenTypeEncoder[]} */
export function makeCborEncoders () {
const encoders = []
encoders[Type.uint.major] = encodeUint
encoders[Type.negint.major] = encodeNegint
encoders[Type.bytes.major] = encodeBytes
encoders[Type.string.major] = encodeString
encoders[Type.array.major] = encodeArray
encoders[Type.map.major] = encodeMap
encoders[Type.tag.major] = encodeTag
encoders[Type.float.major] = encodeFloat
return encoders
}

const cborEncoders = makeCborEncoders()

const buf = new Bl()

Expand Down Expand Up @@ -441,6 +446,7 @@ function encodeCustom (data, encoders, options) {
return asU8A(buf.chunks[0])
}
}
buf.reset()
tokensToEncoded(buf, tokens, encoders, options)
return buf.toBytes(true)
}
Expand Down
61 changes: 61 additions & 0 deletions lib/length.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
import { makeCborEncoders, objectToTokens } from './encode.js'
import { quickEncodeToken } from './jump.js'

/**
* @typedef {import('../interface').EncodeOptions} EncodeOptions
* @typedef {import('../interface').TokenTypeEncoder} TokenTypeEncoder
* @typedef {import('../interface').TokenOrNestedTokens} TokenOrNestedTokens
*/

const cborEncoders = makeCborEncoders()

/** @type {EncodeOptions} */
const defaultEncodeOptions = {
float64: false,
quickEncodeToken
}

/**
* Calculate the byte length of the given data when encoded as CBOR with the
* options provided.
* This calculation will be accurate if the same options are used as when
* performing a normal encode. Some encode options can change the encoding
* output length.
*
* @param {any} data
* @param {EncodeOptions} [options]
* @returns {number}
*/
export function encodedLength (data, options) {
options = Object.assign({}, defaultEncodeOptions, options)
options.mapSorter = undefined // won't change the length
const tokens = objectToTokens(data, options)
return tokensToLength(tokens, cborEncoders, options)
}

/**
* Calculate the byte length of the data as represented by the given tokens when
* encoded as CBOR with the options provided.
* This function is for advanced users and would not normally be called
* directly. See `encodedLength()` for appropriate use.
*
* @param {TokenOrNestedTokens} tokens
* @param {TokenTypeEncoder[]} [encoders]
* @param {EncodeOptions} [options]
*/
export function tokensToLength (tokens, encoders = cborEncoders, options = defaultEncodeOptions) {
if (Array.isArray(tokens)) {
let len = 0
for (const token of tokens) {
len += tokensToLength(token, encoders, options)
}
return len
} else {
const encoder = encoders[tokens.type.major]
/* c8 ignore next 3 */
if (encoder.encodedSize === undefined || typeof encoder.encodedSize !== 'function') {
throw new Error(`Encoder for ${tokens.type.name} does not have an encodedSize()`)
}
return encoder.encodedSize(tokens, options)
}
}
6 changes: 6 additions & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,9 @@
".": {
"import": "./cborg.js"
},
"./length": {
"import": "./lib/length.js"
},
"./taglib": {
"import": "./taglib.js"
},
Expand All @@ -59,6 +62,9 @@
"json": [
"types/lib/json/json.d.ts"
],
"length": [
"types/lib/length.d.ts"
],
"*": [
"types/*"
],
Expand Down
18 changes: 18 additions & 0 deletions test/common.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
import { Token, Type } from '../lib/token.js'

export function dateDecoder (obj) {
if (typeof obj !== 'string') {
throw new Error('expected string for tag 1')
}
return new Date(obj)
}

export function dateEncoder (obj) {
if (!(obj instanceof Date)) {
throw new Error('expected Date for "Date" encoder')
}
return [
new Token(Type.tag, 0),
new Token(Type.string, obj.toISOString().replace(/\.000Z$/, 'Z'))
]
}
18 changes: 1 addition & 17 deletions test/test-6tag.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,26 +5,10 @@ import chai from 'chai'
import { Token, Type } from '../lib/token.js'
import { decode, encode } from '../cborg.js'
import { fromHex, toHex } from '../lib/byte-utils.js'
import { dateDecoder, dateEncoder } from './common.js'

const { assert } = chai

function dateDecoder (obj) {
if (typeof obj !== 'string') {
throw new Error('expected string for tag 1')
}
return new Date(obj)
}

function dateEncoder (obj) {
if (!(obj instanceof Date)) {
throw new Error('expected Date for "Date" encoder')
}
return [
new Token(Type.tag, 0),
new Token(Type.string, obj.toISOString().replace(/\.000Z$/, 'Z'))
]
}

function Uint16ArrayDecoder (obj) {
if (typeof obj !== 'string') {
throw new Error('expected string for tag 23')
Expand Down
65 changes: 65 additions & 0 deletions test/test-length.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
/* eslint-env mocha */

import chai from 'chai'
import { garbage } from 'ipld-garbage'
import { uintBoundaries } from '../lib/0uint.js'
import { encode } from '../cborg.js'
import { encodedLength } from '../lib/length.js'
import { dateEncoder } from './common.js'

const { assert } = chai

function verifyLength (object, options) {
const len = encodedLength(object, options)
const encoded = encode(object, options)
const actual = encoded.length
assert.strictEqual(actual, len, JSON.stringify(object))
}

describe('encodedLength', () => {
it('int boundaries', () => {
for (let ii = 0; ii < 4; ii++) {
verifyLength(uintBoundaries[ii])
verifyLength(uintBoundaries[ii] - 1)
verifyLength(uintBoundaries[ii] + 1)
verifyLength(-1 * uintBoundaries[ii])
verifyLength(-1 * uintBoundaries[ii] - 1)
verifyLength(-1 * uintBoundaries[ii] + 1)
}
})

it('tags', () => {
verifyLength({ date: new Date('2013-03-21T20:04:00Z') }, { typeEncoders: { Date: dateEncoder } })
})

it('floats', () => {
verifyLength(0.5)
verifyLength(0.5, { float64: true })
verifyLength(8.940696716308594e-08)
verifyLength(8.940696716308594e-08, { float64: true })
})

it('small garbage', function () {
this.timeout(10000)
for (let ii = 0; ii < 1000; ii++) {
const gbg = garbage(1 << 6, { weights: { CID: 0 } })
verifyLength(gbg)
}
})

it('medium garbage', function () {
this.timeout(10000)
for (let ii = 0; ii < 100; ii++) {
const gbg = garbage(1 << 16, { weights: { CID: 0 } })
verifyLength(gbg)
}
})

it('large garbage', function () {
this.timeout(10000)
for (let ii = 0; ii < 10; ii++) {
const gbg = garbage(1 << 20, { weights: { CID: 0 } })
verifyLength(gbg)
}
})
})

0 comments on commit e8be1c0

Please sign in to comment.