Skip to content

Commit

Permalink
feat(content): collate content files into subdirs
Browse files Browse the repository at this point in the history
Fixes: #14

Filesystems generally see degraded performance from directories with too
many files in them. By nesting using the first two characters of the hash,
we should get a pretty good spread of directories without having too many files
in any single one.

Additionally, in order to prevent conflicts between different hashAlgorithms
being used in the same cache, this further collates content dirs under the
hashAlgorithm used for their digests.

BREAKING CHANGE:
Previously-generated content directories are no longer compatible
and must be regenerated.
  • Loading branch information
zkat committed Mar 2, 2017
1 parent e8402a5 commit c094d9f
Show file tree
Hide file tree
Showing 14 changed files with 91 additions and 42 deletions.
7 changes: 5 additions & 2 deletions lib/content/path.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@
var path = require('path')

module.exports = contentPath
function contentPath (cache, address) {
return path.join(cache, 'content', address)
function contentPath (cache, address, hashAlgorithm) {
address = address && address.toLowerCase()
hashAlgorithm = hashAlgorithm ? hashAlgorithm.toLowerCase() : 'sha512'
return path.join(
cache, 'content', hashAlgorithm, address.slice(0, 2), address)
}
4 changes: 2 additions & 2 deletions lib/content/put-stream.js
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ function pipeToTmp (inputStream, cache, tmpTarget, opts, errCheck) {
let digest
const hashStream = checksumStream({
digest: opts.digest,
algorithm: opts.hashAlgorithm,
algorithm: opts.hashAlgorithm || 'sha512',
size: opts.size
}).on('digest', d => {
digest = d
Expand Down Expand Up @@ -128,7 +128,7 @@ function pipeToTmp (inputStream, cache, tmpTarget, opts, errCheck) {

function moveToDestination (tmpTarget, cache, digest, opts, errCheck) {
errCheck()
const destination = contentPath(cache, digest)
const destination = contentPath(cache, digest, opts.hashAlgorithm)
const destDir = path.dirname(destination)

return fixOwner.mkdirfix(
Expand Down
8 changes: 4 additions & 4 deletions lib/content/read.js
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@ function readStream (cache, address, opts) {
digest: address,
algorithm: opts.hashAlgorithm || 'sha512'
})
const cpath = contentPath(cache, address)
hasContent(cache, address).then(exists => {
const cpath = contentPath(cache, address, opts.hashAlgorithm || 'sha512')
hasContent(cache, address, opts.hashAlgorithm).then(exists => {
if (!exists) {
const err = new Error('content not found')
err.code = 'ENOENT'
Expand All @@ -34,10 +34,10 @@ function readStream (cache, address, opts) {
}

module.exports.hasContent = hasContent
function hasContent (cache, address, cb) {
function hasContent (cache, address, algorithm) {
if (!address) { return Promise.resolve(false) }
return fs.lstatAsync(
contentPath(cache, address)
contentPath(cache, address, algorithm || 'sha512')
).then(() => true).catch(err => {
if (err && err.code === 'ENOENT') {
return Promise.resolve(false)
Expand Down
6 changes: 4 additions & 2 deletions lib/content/rm.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ var contentPath = require('./path')
var rimraf = Promise.promisify(require('rimraf'))

module.exports = rm
function rm (cache, address) {
return rimraf(contentPath(cache, address))
function rm (cache, address, algorithm) {
address = address.toLowerCase()
algorithm = algorithm && algorithm.toLowerCase()
return rimraf(contentPath(cache, address, algorithm || 'sha512'))
}
2 changes: 1 addition & 1 deletion lib/entry-index.js
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,7 @@ function formatEntry (cache, entry) {
key: entry.key,
digest: entry.digest,
hashAlgorithm: entry.hashAlgorithm,
path: contentPath(cache, entry.digest),
path: contentPath(cache, entry.digest, entry.hashAlgorithm),
time: entry.time,
metadata: entry.metadata
}
Expand Down
5 changes: 3 additions & 2 deletions test/content.put-stream.chownr.js
Original file line number Diff line number Diff line change
Expand Up @@ -31,13 +31,14 @@ test('allows setting a custom uid for cache contents on write', {
t.plan(7)
pipe(fromString(CONTENT), putStream(CACHE, {
uid: NEWUID,
gid: NEWGID
gid: NEWGID,
hashAlgorithm: 'sha1'
}), function (err) {
if (err) { throw err }
var expectedPaths = [
CACHE, // this includes cache/tmp
path.join(CACHE, 'content'),
path.join(CACHE, 'content', DIGEST)
path.join(CACHE, 'content', 'sha1', DIGEST.slice(0, 2), DIGEST)
]
t.deepEqual(
updatedPaths.sort(),
Expand Down
9 changes: 5 additions & 4 deletions test/content.put-stream.js
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@ const putStream = require('../lib/content/put-stream')

test('basic put', function (t) {
const CONTENT = 'foobarbaz'
const DIGEST = crypto.createHash('sha1').update(CONTENT).digest('hex')
// Default is sha512
const DIGEST = crypto.createHash('sha512').update(CONTENT).digest('hex')
let foundDigest
const src = fromString(CONTENT)
const stream = putStream(CACHE).on('digest', function (d) {
Expand All @@ -42,7 +43,7 @@ test('basic put', function (t) {

test('checks input digest doesn\'t match data', function (t) {
const CONTENT = 'foobarbaz'
const DIGEST = crypto.createHash('sha1').update(CONTENT).digest('hex')
const DIGEST = crypto.createHash('sha512').update(CONTENT).digest('hex')
t.plan(5)
let foundDigest1
let foundDigest2
Expand Down Expand Up @@ -107,7 +108,7 @@ test('errors if input size does not match expected', function (t) {

test('does not overwrite content if already on disk', function (t) {
const CONTENT = 'foobarbaz'
const DIGEST = crypto.createHash('sha1').update(CONTENT).digest('hex')
const DIGEST = crypto.createHash('sha512').update(CONTENT).digest('hex')
const contentDir = {}
contentDir[DIGEST] = File('nope')
const fixture = new Tacks(Dir({
Expand Down Expand Up @@ -163,7 +164,7 @@ test('errors if input stream errors', function (t) {

test('exits normally if file already open', function (t) {
const CONTENT = 'foobarbaz'
const DIGEST = crypto.createHash('sha1').update(CONTENT).digest('hex')
const DIGEST = crypto.createHash('sha512').update(CONTENT).digest('hex')
const PATH = path.join(CACHE, 'content', DIGEST)
const contentDir = {}
contentDir[DIGEST] = File(CONTENT)
Expand Down
38 changes: 27 additions & 11 deletions test/content.read.js
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,14 @@ const read = require('../lib/content/read')
test('readStream: returns a stream with cache content data', function (t) {
const CONTENT = 'foobarbaz'
const DIGEST = crypto.createHash('sha512').update(CONTENT).digest('hex')
const dir = {}
dir[DIGEST] = File(CONTENT)
const fixture = new Tacks(Dir({
'content': Dir(dir)
'content': Dir({
'sha512': Dir({
[DIGEST.slice(0, 2)]: Dir({
[DIGEST]: File(CONTENT)
})
})
})
}))
fixture.create(CACHE)
const stream = read.readStream(CACHE, DIGEST)
Expand All @@ -37,10 +41,14 @@ test('readStream: allows hashAlgorithm configuration', function (t) {
const CONTENT = 'foobarbaz'
const HASH = 'whirlpool'
const DIGEST = crypto.createHash(HASH).update(CONTENT).digest('hex')
const dir = {}
dir[DIGEST] = File(CONTENT)
const fixture = new Tacks(Dir({
'content': Dir(dir)
'content': Dir({
[HASH]: Dir({
[DIGEST.slice(0, 2)]: Dir({
[DIGEST]: File(CONTENT)
})
})
})
}))
fixture.create(CACHE)
const stream = read.readStream(CACHE, DIGEST, { hashAlgorithm: HASH })
Expand Down Expand Up @@ -71,11 +79,15 @@ test('readStream: errors if content missing', function (t) {

test('readStream: errors if content fails checksum', function (t) {
const CONTENT = 'foobarbaz'
const DIGEST = crypto.createHash('sha1').update(CONTENT).digest('hex')
const dir = {}
dir[DIGEST] = File(CONTENT.slice(3)) // invalid contents!
const DIGEST = crypto.createHash('sha512').update(CONTENT).digest('hex')
const fixture = new Tacks(Dir({
'content': Dir(dir)
'content': Dir({
'sha512': Dir({
[DIGEST.slice(0, 2)]: Dir({
[DIGEST]: File(CONTENT.slice(3)) // invalid contents!
})
})
})
}))
fixture.create(CACHE)
const stream = read.readStream(CACHE, DIGEST)
Expand All @@ -92,7 +104,11 @@ test('readStream: errors if content fails checksum', function (t) {
test('hasContent: returns true when a cache file exists', function (t) {
const fixture = new Tacks(Dir({
'content': Dir({
'deadbeef': File('')
'sha512': Dir({
'de': Dir({
'deadbeef': File('')
})
})
})
}))
fixture.create(CACHE)
Expand Down
4 changes: 3 additions & 1 deletion test/content.rm.js
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,9 @@ const rm = require('../lib/content/rm')
test('removes a content entry', function (t) {
const fixture = new Tacks(Dir({
'content': Dir({
'deadbeef': File('')
'de': Dir({
'deadbeef': File('')
})
})
}))
fixture.create(CACHE)
Expand Down
24 changes: 20 additions & 4 deletions test/get.js
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,11 @@ function streamGet (byDigest) {
test('basic bulk get', t => {
const fixture = new Tacks(Dir({
'content': Dir({
[DIGEST]: File(CONTENT)
[ALGO]: Dir({
[DIGEST.slice(0, 2)]: Dir({
[DIGEST]: File(CONTENT)
})
})
})
}))
fixture.create(CACHE)
Expand All @@ -85,7 +89,11 @@ test('basic bulk get', t => {
test('basic stream get', t => {
const fixture = new Tacks(Dir({
'content': Dir({
[DIGEST]: File(CONTENT)
[ALGO]: Dir({
[DIGEST.slice(0, 2)]: Dir({
[DIGEST]: File(CONTENT)
})
})
})
}))
fixture.create(CACHE)
Expand Down Expand Up @@ -141,7 +149,11 @@ test('memoizes data on bulk read', t => {
memo.clearMemoized()
const fixture = new Tacks(Dir({
'content': Dir({
[DIGEST]: File(CONTENT)
[ALGO]: Dir({
[DIGEST.slice(0, 2)]: Dir({
[DIGEST]: File(CONTENT)
})
})
})
}))
fixture.create(CACHE)
Expand Down Expand Up @@ -191,7 +203,11 @@ test('memoizes data on stream read', t => {
memo.clearMemoized()
const fixture = new Tacks(Dir({
'content': Dir({
[DIGEST]: File(CONTENT)
[ALGO]: Dir({
[DIGEST.slice(0, 2)]: Dir({
[DIGEST]: File(CONTENT)
})
})
})
}))
fixture.create(CACHE)
Expand Down
6 changes: 5 additions & 1 deletion test/index.find.js
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,11 @@ test('index.find cache hit', function (t) {
CACHE, entry.key
).then(info => {
t.ok(info, 'cache hit')
t.equal(info.path, contentPath(CACHE, entry.digest), 'path added to info')
t.equal(
info.path,
contentPath(CACHE, entry.digest, entry.hashAlgorithm),
'path added to info'
)
delete info.path
t.deepEqual(info, entry, 'rest of info matches entry on disk')
})
Expand Down
4 changes: 2 additions & 2 deletions test/index.insert.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
'use strict'

const CacheIndex = require('./util/cache-index')
const contentPath = require('../lib/content/path')
const fs = require('fs')
const path = require('path')
const Promise = require('bluebird')
Expand All @@ -15,7 +16,6 @@ const Dir = Tacks.Dir
const index = require('../lib/entry-index')

const KEY = 'foo'
const KEYHASH = index._hashKey(KEY)
const BUCKET = index._bucketPath(CACHE, KEY)
const DIGEST = 'deadbeef'
const ALGO = 'whatnot'
Expand All @@ -28,7 +28,7 @@ test('basic insertion', function (t) {
key: KEY,
digest: DIGEST,
hashAlgorithm: ALGO,
path: path.join(CACHE, 'content', DIGEST),
path: contentPath(CACHE, DIGEST, ALGO),
time: entry.time,
metadata: 'foo'
}, 'formatted entry returned')
Expand Down
12 changes: 8 additions & 4 deletions test/index.ls.js
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,11 @@ test('basic listing', function (t) {
'index': CacheIndex(contents)
}))
contents.whatever.path =
contentPath(CACHE, contents.whatever.digest)
contentPath(
CACHE, contents.whatever.digest, contents.whatever.hashAlgorithm)
contents.whatnot.path =
contentPath(CACHE, contents.whatnot.digest)
contentPath(
CACHE, contents.whatnot.digest, contents.whatnot.hashAlgorithm)
fixture.create(CACHE)
return index.ls(CACHE).then(listing => {
t.deepEqual(listing, contents, 'index contents correct')
Expand Down Expand Up @@ -65,9 +67,11 @@ test('separate keys in conflicting buckets', function (t) {
})
}))
contents.whatever.path =
contentPath(CACHE, contents.whatever.digest)
contentPath(
CACHE, contents.whatever.digest, contents.whatever.hashAlgorithm)
contents.whatev.path =
contentPath(CACHE, contents.whatev.digest)
contentPath(
CACHE, contents.whatev.digest, contents.whatev.hashAlgorithm)
fixture.create(CACHE)
return index.ls(CACHE).then(listing => {
t.deepEqual(listing, contents, 'index contents correct')
Expand Down
4 changes: 2 additions & 2 deletions test/put.js
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ const testDir = require('./util/test-dir')(__filename)
const CACHE = path.join(testDir, 'cache')
const CONTENT = bufferise('foobarbaz')
const KEY = 'my-test-key'
const ALGO = 'sha1'
const ALGO = 'sha512'
const DIGEST = crypto.createHash(ALGO).update(CONTENT).digest('hex')
const METADATA = { foo: 'bar' }
const contentPath = require('../lib/content/path')
Expand All @@ -31,7 +31,7 @@ function bufferise (string) {
test('basic bulk insertion', t => {
return put(CACHE, KEY, CONTENT).then(digest => {
t.equal(digest, DIGEST, 'returned content digest')
const dataPath = contentPath(CACHE, digest)
const dataPath = contentPath(CACHE, digest, ALGO)
return fs.readFileAsync(dataPath)
}).then(data => {
t.deepEqual(data, CONTENT, 'content was correctly inserted')
Expand Down

0 comments on commit c094d9f

Please sign in to comment.