From 91230af201df655c6204db88747d2a67780520bd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kat=20March=C3=A1n?= Date: Thu, 20 Apr 2017 01:29:05 -0700 Subject: [PATCH] feat(size): handle content size info (#49) --- README.md | 14 +++++++-- get.js | 8 ++++- lib/content/write.js | 22 +++++++------ lib/entry-index.js | 2 ++ lib/verify.js | 1 - package.json | 2 +- put.js | 17 +++++++--- test/get.js | 52 +++++++++++++++++------------- test/index.find.js | 19 +++++++---- test/index.insert.js | 68 ++++++++++++++++++++++++++-------------- test/ls.js | 12 ++++--- test/util/cache-index.js | 5 ++- 12 files changed, 144 insertions(+), 78 deletions(-) diff --git a/README.md b/README.md index 6790a3d..f6709dd 100644 --- a/README.md +++ b/README.md @@ -121,6 +121,7 @@ cacache.ls(cachePath).then(console.log) integrity: 'sha512-BaSe64/EnCoDED+HAsh==' path: '.testcache/content/deadbeef', // joined with `cachePath` time: 12345698490, + size: 4023948, metadata: { name: 'blah', version: '1.2.3', @@ -131,7 +132,8 @@ cacache.ls(cachePath).then(console.log) key: 'other-thing', integrity: 'sha1-ANothER+hasH=', path: '.testcache/content/bada55', - time: 11992309289 + time: 11992309289, + size: 111112 } } ``` @@ -153,6 +155,7 @@ cacache.ls.stream(cachePath).on('data', console.log) integrity: 'sha512-BaSe64HaSh', path: '.testcache/content/deadbeef', // joined with `cachePath` time: 12345698490, + size: 13423, metadata: { name: 'blah', version: '1.2.3', @@ -164,7 +167,8 @@ cacache.ls.stream(cachePath).on('data', console.log) key: 'other-thing', integrity: 'whirlpool-WoWSoMuchSupport', path: '.testcache/content/bada55', - time: 11992309289 + time: 11992309289, + size: 498023984029 } { @@ -208,7 +212,8 @@ cache.get(cachePath, 'my-thing').then(console.log) thingName: 'my' }, integrity: 'sha512-BaSe64HaSh', - data: Buffer# + data: Buffer#, + size: 9320 } // Look up by digest @@ -280,6 +285,7 @@ cacache.get.info(cachePath, 'my-thing').then(console.log) integrity: 'sha256-MUSTVERIFY+ALL/THINGS==' path: '.testcache/content/deadbeef', time: 12345698490, + size: 849234, metadata: { name: 'blah', version: '1.2.3', @@ -357,6 +363,8 @@ for inserted data. Can use any algorithm listed in `crypto.getHashes()` or `'omakase'`/`'お任せします'` to pick a random hash algorithm on each insertion. You may also use any anagram of `'modnar'` to use this feature. +Has no effect if `opts.integrity` is present. + ##### `opts.uid`/`opts.gid` If provided, cacache will do its best to make sure any new files added to the diff --git a/get.js b/get.js index 9d4c396..19ed4d1 100644 --- a/get.js +++ b/get.js @@ -26,7 +26,8 @@ function getData (byDigest, cache, key, opts) { return BB.resolve(byDigest ? memoized : { metadata: memoized.entry.metadata, data: memoized.data, - integrity: memoized.entry.integrity + integrity: memoized.entry.integrity, + size: memoized.entry.size }) } return ( @@ -41,6 +42,7 @@ function getData (byDigest, cache, key, opts) { }).then(data => byDigest ? data : { metadata: entry.metadata, data: data, + size: entry.size, integrity: entry.integrity }).then(res => { if (opts.memoize && byDigest) { @@ -62,6 +64,7 @@ function getStream (cache, key, opts) { stream.on('newListener', function (ev, cb) { ev === 'metadata' && cb(memoized.entry.metadata) ev === 'integrity' && cb(memoized.entry.integrity) + ev === 'size' && cb(memoized.entry.size) }) stream.write(memoized.data, () => stream.end()) return stream @@ -87,11 +90,14 @@ function getStream (cache, key, opts) { } else { memoStream = through() } + opts.size = opts.size == null ? entry.size : opts.size stream.emit('metadata', entry.metadata) stream.emit('integrity', entry.integrity) + stream.emit('size', entry.size) stream.on('newListener', function (ev, cb) { ev === 'metadata' && cb(entry.metadata) ev === 'integrity' && cb(entry.integrity) + ev === 'size' && cb(entry.size) }) pipe( read.readStream(cache, entry.integrity, opts), diff --git a/lib/content/write.js b/lib/content/write.js index df817b3..479d3e7 100644 --- a/lib/content/write.js +++ b/lib/content/write.js @@ -37,7 +37,7 @@ function write (cache, data, opts) { ).then(() => ( moveToDestination(tmp, cache, sri, opts) )) - )).then(() => sri) + )).then(() => ({integrity: sri, size: data.length})) } module.exports.stream = writeStream @@ -62,8 +62,9 @@ function writeStream (cache, opts) { e.code = 'ENODATA' return ret.emit('error', e) } - allDone.then(sri => { - sri && ret.emit('integrity', sri) + allDone.then(res => { + res.integrity && ret.emit('integrity', res.integrity) + res.size !== null && ret.emit('size', res.size) cb() }, e => { ret.emit('error', e) @@ -81,30 +82,33 @@ function handleContent (inputStream, cache, opts, errCheck) { errCheck() return pipeToTmp( inputStream, cache, tmp.target, opts, errCheck - ).then(sri => { + ).then(res => { return moveToDestination( - tmp, cache, sri, opts, errCheck - ).then(() => sri) + tmp, cache, res.integrity, opts, errCheck + ).then(() => res) }) }) } function pipeToTmp (inputStream, cache, tmpTarget, opts, errCheck) { return BB.resolve().then(() => { - let sri + let integrity + let size const hashStream = ssri.integrityStream({ integrity: opts.integrity, algorithms: opts.algorithms, size: opts.size }).on('integrity', s => { - sri = s + integrity = s + }).on('size', s => { + size = s }) const outStream = fs.createWriteStream(tmpTarget, { flags: 'wx' }) errCheck() return pipe(inputStream, hashStream, outStream).then(() => { - return sri + return {integrity, size} }, err => { return rimraf(tmpTarget).then(() => { throw err }) }) diff --git a/lib/entry-index.js b/lib/entry-index.js index 6631327..d95d045 100644 --- a/lib/entry-index.js +++ b/lib/entry-index.js @@ -36,6 +36,7 @@ function insert (cache, key, integrity, opts) { key, integrity: integrity && ssri.stringify(integrity), time: Date.now(), + size: opts.size, metadata: opts.metadata } return fixOwner.mkdirfix( @@ -206,6 +207,7 @@ function formatEntry (cache, entry) { key: entry.key, integrity: entry.integrity, path: contentPath(cache, entry.integrity), + size: entry.size, time: entry.time, metadata: entry.metadata } diff --git a/lib/verify.js b/lib/verify.js index 1c2941c..6a01004 100644 --- a/lib/verify.js +++ b/lib/verify.js @@ -9,7 +9,6 @@ const fs = require('graceful-fs') const glob = BB.promisify(require('glob')) const index = require('./entry-index') const path = require('path') -const pipe = BB.promisify(require('mississippi').pipe) const rimraf = BB.promisify(require('rimraf')) const ssri = require('ssri') diff --git a/package.json b/package.json index 95f2f44..bb112e2 100644 --- a/package.json +++ b/package.json @@ -3,7 +3,7 @@ "version": "7.0.5", "cache-version": { "content": "2", - "index": "4" + "index": "5" }, "description": "Fast, fault-tolerant, cross-platform, disk-based, data-agnostic, content-addressable cache.", "main": "index.js", diff --git a/put.js b/put.js index 88c5f3b..5f525d0 100644 --- a/put.js +++ b/put.js @@ -8,12 +8,14 @@ const to = require('mississippi').to module.exports = putData function putData (cache, key, data, opts) { opts = opts || {} - return write(cache, data, opts).then(integrity => { - return index.insert(cache, key, integrity, opts).then(entry => { + return write(cache, data, opts).then(res => { + // TODO - stop modifying opts + opts.size = res.size + return index.insert(cache, key, res.integrity, opts).then(entry => { if (opts.memoize) { memo.put(cache, entry, data) } - return integrity + return res.integrity }) }) } @@ -22,8 +24,13 @@ module.exports.stream = putStream function putStream (cache, key, opts) { opts = opts || {} let integrity - const contentStream = write.stream(cache, opts).on('integrity', int => { + let size + const contentStream = write.stream( + cache, opts + ).on('integrity', int => { integrity = int + }).on('size', s => { + size = s }) let memoData let memoTotal = 0 @@ -38,6 +45,8 @@ function putStream (cache, key, opts) { }) }, cb => { contentStream.end(() => { + // TODO - stop modifying `opts` + opts.size = size index.insert(cache, key, integrity, opts).then(entry => { if (opts.memoize) { memo.put(cache, entry, Buffer.concat(memoData, memoTotal)) diff --git a/test/get.js b/test/get.js index 2fe9727..9071213 100644 --- a/test/get.js +++ b/test/get.js @@ -17,12 +17,20 @@ const CacheContent = require('./util/cache-content') const CACHE = path.join(testDir, 'cache') const CONTENT = Buffer.from('foobarbaz', 'utf8') +const SIZE = CONTENT.length const KEY = 'my-test-key' const INTEGRITY = ssri.fromData(CONTENT).toString() const METADATA = { foo: 'bar' } const get = require('..').get +function opts (extra) { + return Object.assign({ + size: SIZE, + metadata: METADATA + }, extra) +} + // Simple wrapper util cause this gets WORDY function streamGet (byDigest) { const args = [].slice.call(arguments, 1) @@ -30,6 +38,7 @@ function streamGet (byDigest) { let dataLen = 0 let integrity let metadata + let size const stream = ( byDigest ? get.stream.byDigest : get.stream ).apply(null, args) @@ -40,9 +49,11 @@ function streamGet (byDigest) { integrity = ssri.stringify(int) }).on('metadata', m => { metadata = m + }).on('size', s => { + size = s }) return finished(stream).then(() => ({ - data: Buffer.concat(data, dataLen), integrity, metadata + data: Buffer.concat(data, dataLen), integrity, metadata, size })) } @@ -51,15 +62,14 @@ test('basic bulk get', t => { [INTEGRITY]: CONTENT })) fixture.create(CACHE) - return index.insert(CACHE, KEY, INTEGRITY, { - metadata: METADATA - }).then(() => { + return index.insert(CACHE, KEY, INTEGRITY, opts()).then(() => { return get(CACHE, KEY) }).then(res => { t.deepEqual(res, { metadata: METADATA, data: CONTENT, - integrity: INTEGRITY + integrity: INTEGRITY, + size: SIZE }, 'bulk key get returned proper data') }).then(() => { return get.byDigest(CACHE, INTEGRITY) @@ -73,9 +83,7 @@ test('basic stream get', t => { [INTEGRITY]: CONTENT })) fixture.create(CACHE) - return index.insert(CACHE, KEY, INTEGRITY, { - metadata: METADATA - }).then(() => { + return index.insert(CACHE, KEY, INTEGRITY, opts()).then(() => { return BB.join( streamGet(false, CACHE, KEY), streamGet(true, CACHE, INTEGRITY), @@ -83,7 +91,8 @@ test('basic stream get', t => { t.deepEqual(byKey, { data: CONTENT, integrity: INTEGRITY, - metadata: METADATA + metadata: METADATA, + size: SIZE }, 'got all expected data and fields from key fetch') t.deepEqual( byDigest.data, @@ -109,9 +118,7 @@ test('ENOENT if not found', t => { }) test('get.info index entry lookup', t => { - return index.insert(CACHE, KEY, INTEGRITY, { - metadata: METADATA - }).then(ENTRY => { + return index.insert(CACHE, KEY, INTEGRITY, opts()).then(ENTRY => { return get.info(CACHE, KEY).then(entry => { t.deepEqual(entry, ENTRY, 'get.info() returned the right entry') }) @@ -124,9 +131,7 @@ test('memoizes data on bulk read', t => { [INTEGRITY]: CONTENT })) fixture.create(CACHE) - return index.insert(CACHE, KEY, INTEGRITY, { - metadata: METADATA - }).then(ENTRY => { + return index.insert(CACHE, KEY, INTEGRITY, opts()).then(ENTRY => { return get(CACHE, KEY).then(() => { t.deepEqual(memo.get(CACHE, KEY), null, 'no memoization!') return get(CACHE, KEY, { memoize: true }) @@ -134,7 +139,8 @@ test('memoizes data on bulk read', t => { t.deepEqual(res, { metadata: METADATA, data: CONTENT, - integrity: INTEGRITY + integrity: INTEGRITY, + size: SIZE }, 'usual data returned') t.deepEqual(memo.get(CACHE, KEY), { entry: ENTRY, @@ -147,7 +153,8 @@ test('memoizes data on bulk read', t => { t.deepEqual(res, { metadata: METADATA, data: CONTENT, - integrity: INTEGRITY + integrity: INTEGRITY, + size: SIZE }, 'memoized data fetched by default') return get(CACHE, KEY, { memoize: false }).then(() => { throw new Error('expected get to fail') @@ -169,9 +176,7 @@ test('memoizes data on stream read', t => { [INTEGRITY]: CONTENT })) fixture.create(CACHE) - return index.insert(CACHE, KEY, INTEGRITY, { - metadata: METADATA - }).then(ENTRY => { + return index.insert(CACHE, KEY, INTEGRITY, opts()).then(ENTRY => { return BB.join( streamGet(false, CACHE, KEY), streamGet(true, CACHE, INTEGRITY), @@ -208,7 +213,8 @@ test('memoizes data on stream read', t => { t.deepEqual(byKey, { metadata: METADATA, data: CONTENT, - integrity: INTEGRITY + integrity: INTEGRITY, + size: SIZE }, 'usual data returned from key fetch') t.deepEqual(memo.get(CACHE, KEY), { entry: ENTRY, @@ -234,7 +240,8 @@ test('memoizes data on stream read', t => { t.deepEqual(byKey, { metadata: METADATA, data: CONTENT, - integrity: INTEGRITY + integrity: INTEGRITY, + size: SIZE }, 'key fetch fulfilled by memoization cache') t.deepEqual( byDigest.data, @@ -266,6 +273,7 @@ test('get.info uses memoized data', t => { key: KEY, integrity: INTEGRITY, time: +(new Date()), + size: SIZE, metadata: null } memo.put(CACHE, ENTRY, CONTENT) diff --git a/test/index.find.js b/test/index.find.js index af2348e..abe78c0 100644 --- a/test/index.find.js +++ b/test/index.find.js @@ -11,6 +11,7 @@ const testDir = require('./util/test-dir')(__filename) BB.promisifyAll(fs) const CACHE = path.join(testDir, 'cache') +const SIZE = 999 const contentPath = require('../lib/content/path') const index = require('../lib/entry-index') @@ -19,7 +20,8 @@ test('index.find cache hit', function (t) { key: 'whatever', integrity: 'whatnot-deadbeef', time: 12345, - metadata: 'omgsometa' + metadata: 'omgsometa', + size: 5 } const fixture = new Tacks(CacheIndex({ 'whatever': entry @@ -68,12 +70,14 @@ test('index.find key case-sensitivity', function (t) { 'jsonstream': { key: 'jsonstream', integrity: 'sha1-lowercase', - time: 54321 + time: 54321, + size: SIZE }, 'JSONStream': { key: 'JSONStream', integrity: 'sha1-capitalised', - time: 12345 + time: 12345, + size: SIZE } })) fixture.create(CACHE) @@ -97,7 +101,8 @@ test('index.find path-breaking characters', function (t) { key: ';;!registry\nhttps://registry.npmjs.org/back \\ slash@Cool™?', integrity: 'sha1-deadbeef', time: 12345, - metadata: 'omgsometa' + metadata: 'omgsometa', + size: 9 } const fixture = new Tacks(CacheIndex({ [entry.key]: entry @@ -123,7 +128,8 @@ test('index.find extremely long keys', function (t) { key: key, integrity: 'sha1-deadbeef', time: 12345, - metadata: 'woo' + metadata: 'woo', + size: 10 } const fixture = new Tacks(CacheIndex({ [entry.key]: entry @@ -190,7 +196,8 @@ test('index.find hash conflict in same bucket', function (t) { key: 'whatever', integrity: 'sha1-deadbeef', time: 12345, - metadata: 'yay' + metadata: 'yay', + size: 8 } const fixture = new Tacks(CacheIndex({ 'whatever': [ diff --git a/test/index.insert.js b/test/index.insert.js index e9d0812..2055e8b 100644 --- a/test/index.insert.js +++ b/test/index.insert.js @@ -18,17 +18,25 @@ const index = require('../lib/entry-index') const KEY = 'foo' const BUCKET = index._bucketPath(CACHE, KEY) const INTEGRITY = 'sha512-deadbeef' +const SIZE = 999 + +function opts (extra) { + return Object.assign({ + size: SIZE + }, extra) +} test('basic insertion', function (t) { - return index.insert(CACHE, KEY, INTEGRITY, { + return index.insert(CACHE, KEY, INTEGRITY, opts({ metadata: 'foo' - }).then(entry => { + })).then(entry => { t.deepEqual(entry, { key: KEY, integrity: INTEGRITY, path: contentPath(CACHE, INTEGRITY), time: entry.time, - metadata: 'foo' + metadata: 'foo', + size: SIZE }, 'formatted entry returned') return fs.readFileAsync(BUCKET, 'utf8') }).then(data => { @@ -41,16 +49,17 @@ test('basic insertion', function (t) { key: KEY, integrity: INTEGRITY, time: entry.time, - metadata: 'foo' + metadata: 'foo', + size: SIZE }, 'entry matches what was inserted') }) }) test('inserts additional entries into existing key', function (t) { - return index.insert(CACHE, KEY, INTEGRITY, { + return index.insert(CACHE, KEY, INTEGRITY, opts({ metadata: 1 - }).then(() => ( - index.insert(CACHE, KEY, INTEGRITY, {metadata: 2}) + })).then(() => ( + index.insert(CACHE, KEY, INTEGRITY, opts({metadata: 2})) )).then(() => { return fs.readFileAsync(BUCKET, 'utf8') }).then(data => { @@ -61,11 +70,13 @@ test('inserts additional entries into existing key', function (t) { t.deepEqual(entries, [{ key: KEY, integrity: INTEGRITY, - metadata: 1 + metadata: 1, + size: SIZE }, { key: KEY, integrity: INTEGRITY, - metadata: 2 + metadata: 2, + size: SIZE }], 'all entries present') }) }) @@ -76,12 +87,13 @@ test('separates entries even if one is corrupted', function (t) { 'foo': '\n' + JSON.stringify({ key: KEY, integrity: 'meh', - time: 54321 + time: 54321, + size: SIZE }) + '\n{"key": "' + KEY + '"\noway' })) fixture.create(CACHE) return index.insert( - CACHE, KEY, INTEGRITY + CACHE, KEY, INTEGRITY, opts() ).then(() => { return fs.readFileAsync(BUCKET, 'utf8') }).then(data => { @@ -89,7 +101,8 @@ test('separates entries even if one is corrupted', function (t) { delete entry.time t.deepEqual(entry, { key: KEY, - integrity: INTEGRITY + integrity: INTEGRITY, + size: SIZE }, 'new entry unaffected by corruption') }) }) @@ -97,7 +110,7 @@ test('separates entries even if one is corrupted', function (t) { test('optional arbitrary metadata', function (t) { const metadata = { foo: 'bar' } return index.insert( - CACHE, KEY, INTEGRITY, { metadata: metadata } + CACHE, KEY, INTEGRITY, opts({ metadata: metadata }) ).then(() => { return fs.readFileAsync(BUCKET, 'utf8') }).then(data => { @@ -106,15 +119,16 @@ test('optional arbitrary metadata', function (t) { t.deepEqual(entry, { key: KEY, integrity: INTEGRITY, - metadata: metadata + metadata: metadata, + size: SIZE }, 'entry includes inserted metadata') }) }) test('key case-sensitivity', function (t) { return BB.join( - index.insert(CACHE, KEY, INTEGRITY), - index.insert(CACHE, KEY.toUpperCase(), INTEGRITY + 'upper') + index.insert(CACHE, KEY, INTEGRITY, opts()), + index.insert(CACHE, KEY.toUpperCase(), INTEGRITY + 'upper', opts()) ).then(() => { return BB.join( index.find(CACHE, KEY), @@ -124,17 +138,21 @@ test('key case-sensitivity', function (t) { delete upperEntry.time t.deepEqual({ key: entry.key, - integrity: entry.integrity + integrity: entry.integrity, + size: SIZE }, { key: KEY, - integrity: INTEGRITY + integrity: INTEGRITY, + size: SIZE }, 'regular entry exists') t.deepEqual({ key: upperEntry.key, - integrity: upperEntry.integrity + integrity: upperEntry.integrity, + size: SIZE }, { key: KEY.toUpperCase(), - integrity: INTEGRITY + 'upper' + integrity: INTEGRITY + 'upper', + size: SIZE }, 'case-variant entry intact') } ) @@ -144,7 +162,7 @@ test('key case-sensitivity', function (t) { test('path-breaking characters', function (t) { const newKey = ';;!registry\nhttps://registry.npmjs.org/back \\ slash@Cool™?' return index.insert( - CACHE, newKey, INTEGRITY + CACHE, newKey, INTEGRITY, opts() ).then(() => { const bucket = index._bucketPath(CACHE, newKey) return fs.readFileAsync(bucket, 'utf8') @@ -153,7 +171,8 @@ test('path-breaking characters', function (t) { delete entry.time t.deepEqual(entry, { key: newKey, - integrity: INTEGRITY + integrity: INTEGRITY, + size: SIZE }, 'entry exists and matches original key with invalid chars') }) }) @@ -164,7 +183,7 @@ test('extremely long keys', function (t) { newKey += i } return index.insert( - CACHE, newKey, INTEGRITY + CACHE, newKey, INTEGRITY, opts() ).then(() => { const bucket = index._bucketPath(CACHE, newKey) return fs.readFileAsync(bucket, 'utf8') @@ -173,7 +192,8 @@ test('extremely long keys', function (t) { delete entry.time t.deepEqual(entry, { key: newKey, - integrity: INTEGRITY + integrity: INTEGRITY, + size: SIZE }, 'entry exists in spite of INCREDIBLY LONG key') }) }) diff --git a/test/ls.js b/test/ls.js index 87f23aa..57d2228 100644 --- a/test/ls.js +++ b/test/ls.js @@ -20,13 +20,15 @@ test('basic listing', function (t) { key: 'whatever', integrity: 'sha512-deadbeef', time: 12345, - metadata: 'omgsometa' + metadata: 'omgsometa', + size: 234234 }, 'whatnot': { key: 'whatnot', integrity: 'sha512-bada55', time: 54321, - metadata: null + metadata: null, + size: 425345345 } } const fixture = new Tacks(CacheIndex(contents)) @@ -57,13 +59,15 @@ test('separate keys in conflicting buckets', function (t) { key: 'whatever', integrity: 'sha512-deadbeef', time: 12345, - metadata: 'omgsometa' + metadata: 'omgsometa', + size: 5 }, 'whatev': { key: 'whatev', integrity: 'sha512-bada55', time: 54321, - metadata: null + metadata: null, + size: 99234234 } } const fixture = new Tacks(CacheIndex({ diff --git a/test/util/cache-index.js b/test/util/cache-index.js index 3e2a51c..81ed5c3 100644 --- a/test/util/cache-index.js +++ b/test/util/cache-index.js @@ -15,11 +15,10 @@ const File = Tacks.File // // The returned object is for use with Tacks module.exports = CacheIndex -function CacheIndex (entries, hashAlgorithm) { - hashAlgorithm = hashAlgorithm || 'sha512' +function CacheIndex (entries) { var tree = Dir({}) Object.keys(entries).forEach(function (k) { - const bpath = bucketPath('', k, hashAlgorithm) + const bpath = bucketPath('', k) const parts = bpath.split(path.sep) let lines = entries[k] let serialised