Skip to content

Commit

Permalink
feat(index): collate index files into subdirs
Browse files Browse the repository at this point in the history
Fixes: #14

Most filesystem see performance degradation if a single directory has too many files
in it. For that reason, the entry index now stores index buckets as:

`<cache>/index/<bucketKey.slice(0, 2)>/<bucketKey>`.

BREAKING CHANGE: Previously-generated index entries are no longer compatible and the index must be regenerated.
  • Loading branch information
zkat committed Mar 2, 2017
1 parent bbc5fca commit e8402a5
Show file tree
Hide file tree
Showing 3 changed files with 61 additions and 47 deletions.
70 changes: 42 additions & 28 deletions lib/entry-index.js
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ const through = require('mississippi').through
module.exports.insert = insert
function insert (cache, key, digest, opts) {
opts = opts || {}
const bucket = indexPath(cache, key)
const bucket = bucketPath(cache, key)
const lock = bucket + '.lock'
return fixOwner.mkdirfix(
path.dirname(bucket), opts.uid, opts.gid
Expand Down Expand Up @@ -74,7 +74,7 @@ function insert (cache, key, digest, opts) {

module.exports.find = find
function find (cache, key) {
const bucket = indexPath(cache, key)
const bucket = bucketPath(cache, key)
const stream = fs.createReadStream(bucket)
let ret
return Promise.fromNode(cb => {
Expand Down Expand Up @@ -105,37 +105,49 @@ function del (cache, key) {

module.exports.lsStream = lsStream
function lsStream (cache) {
const indexPath = path.join(cache, 'index')
const indexDir = path.join(cache, 'index')
const stream = through.obj()
fs.readdir(indexPath, function (err, files) {
fs.readdir(indexDir, function (err, buckets) {
if (err && err.code === 'ENOENT') {
return stream.end()
} else if (err) {
return stream.emit('error', err)
} else {
asyncMap(files, function (f, cb) {
fs.readFile(path.join(indexPath, f), 'utf8', function (err, data) {
if (err) { return cb(err) }
const entries = {}
data.split('\n').forEach(function (entry) {
let parsed
try {
parsed = JSON.parse(entry)
} catch (e) {
}
// NOTE - it's possible for an entry to be
// incomplete/corrupt. So we just skip it.
// See comment on `insert()` for deets.
if (parsed) {
entries[parsed.key] = formatEntry(cache, parsed)
}
})
Object.keys(entries).forEach(function (k) {
stream.write(entries[k])
})
cb()
asyncMap(buckets, (bucket, cb) => {
fs.readdir(path.join(indexDir, bucket), (err, files) => {
if (err && err.code === 'ENOENT') {
return cb()
} else if (err) {
return cb(err)
} else {
asyncMap(files, function (f, cb) {
fs.readFile(path.join(indexDir, bucket, f), 'utf8', function (err, data) {
if (err) { return cb(err) }
const entries = {}
data.split('\n').forEach(function (entry) {
let parsed
try {
parsed = JSON.parse(entry)
} catch (e) {
}
// NOTE - it's possible for an entry to be
// incomplete/corrupt. So we just skip it.
// See comment on `insert()` for deets.
if (parsed) {
entries[parsed.key] = formatEntry(cache, parsed)
}
})
Object.keys(entries).forEach(function (k) {
stream.write(entries[k])
})
cb()
})
}, function (err) {
cb(err)
})
}
})
}, function (err) {
}, err => {
if (err) { stream.emit('error') }
stream.end()
})
Expand Down Expand Up @@ -165,8 +177,10 @@ function notFoundError (cache, key) {
return err
}

function indexPath (cache, key) {
return path.join(cache, 'index', hashKey(key))
module.exports._bucketPath = bucketPath
function bucketPath (cache, key) {
const hashed = hashKey(key)
return path.join(cache, 'index', hashed.slice(0, 2), hashed)
}

module.exports._hashKey = hashKey
Expand Down
24 changes: 9 additions & 15 deletions test/index.insert.js
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ const index = require('../lib/entry-index')

const KEY = 'foo'
const KEYHASH = index._hashKey(KEY)
const BUCKET = index._bucketPath(CACHE, KEY)
const DIGEST = 'deadbeef'
const ALGO = 'whatnot'

Expand All @@ -31,8 +32,7 @@ test('basic insertion', function (t) {
time: entry.time,
metadata: 'foo'
}, 'formatted entry returned')
const bucket = path.join(CACHE, 'index', KEYHASH)
return fs.readFileAsync(bucket, 'utf8')
return fs.readFileAsync(BUCKET, 'utf8')
}).then(data => {
t.equal(data[0], '{', 'first entry starts with a {, not \\n')
const entry = JSON.parse(data)
Expand All @@ -53,8 +53,7 @@ test('inserts additional entries into existing key', function (t) {
).then(() => (
index.insert(CACHE, KEY, DIGEST, {metadata: 2})
)).then(() => {
const bucket = path.join(CACHE, 'index', KEYHASH)
return fs.readFileAsync(bucket, 'utf8')
return fs.readFileAsync(BUCKET, 'utf8')
}).then(data => {
const entries = data.split('\n').map(JSON.parse)
entries.forEach(function (e) { delete e.time })
Expand Down Expand Up @@ -84,8 +83,7 @@ test('separates entries even if one is corrupted', function (t) {
return index.insert(
CACHE, KEY, DIGEST
).then(() => {
const bucket = path.join(CACHE, 'index', KEYHASH)
return fs.readFileAsync(bucket, 'utf8')
return fs.readFileAsync(BUCKET, 'utf8')
}).then(data => {
const entry = JSON.parse(data.split('\n')[4])
delete entry.time
Expand All @@ -101,8 +99,7 @@ test('optional arbitrary metadata', function (t) {
return index.insert(
CACHE, KEY, DIGEST, { metadata: metadata }
).then(() => {
const bucket = path.join(CACHE, 'index', KEYHASH)
return fs.readFileAsync(bucket, 'utf8')
return fs.readFileAsync(BUCKET, 'utf8')
}).then(data => {
const entry = JSON.parse(data)
delete entry.time
Expand All @@ -119,8 +116,7 @@ test('key case-sensitivity', function (t) {
index.insert(CACHE, KEY, DIGEST),
index.insert(CACHE, KEY.toUpperCase(), DIGEST)
).then(() => {
const bucket = path.join(CACHE, 'index', KEYHASH)
return fs.readFileAsync(bucket, 'utf8')
return fs.readFileAsync(BUCKET, 'utf8')
}).then(data => {
const entries = data.split('\n').map(JSON.parse).sort(e => (
e.key === KEY
Expand Down Expand Up @@ -148,7 +144,7 @@ test('hash conflict in same bucket', function (t) {
).then(() => (
index.insert(CACHE, CONFLICTING, DIGEST)
)).then(() => {
const bucket = path.join(CACHE, 'index', index._hashKey(NEWKEY))
const bucket = index._bucketPath(CACHE, NEWKEY)
return fs.readFileAsync(bucket, 'utf8')
}).then(data => {
const entries = data.split('\n').map(JSON.parse)
Expand All @@ -165,11 +161,10 @@ test('hash conflict in same bucket', function (t) {

test('path-breaking characters', function (t) {
const newKey = ';;!registry\nhttps://registry.npmjs.org/back \\ slash@Cool™?'
const newHash = index._hashKey(newKey)
return index.insert(
CACHE, newKey, DIGEST
).then(() => {
const bucket = path.join(CACHE, 'index', newHash)
const bucket = index._bucketPath(CACHE, newKey)
return fs.readFileAsync(bucket, 'utf8')
}).then(data => {
const entry = JSON.parse(data)
Expand All @@ -186,11 +181,10 @@ test('extremely long keys', function (t) {
for (let i = 0; i < 10000; i++) {
newKey += i
}
const newHash = index._hashKey(newKey)
return index.insert(
CACHE, newKey, DIGEST
).then(() => {
const bucket = path.join(CACHE, 'index', newHash)
const bucket = index._bucketPath(CACHE, newKey)
return fs.readFileAsync(bucket, 'utf8')
}).then(data => {
const entry = JSON.parse(data)
Expand Down
14 changes: 10 additions & 4 deletions test/util/cache-index.js
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ function CacheIndex (entries) {
Object.keys(entries).forEach(function (k) {
var lines = entries[k]
var hashed = hashKey(k)
var prefix = hashed.slice(0, 2)
var serialised
if (typeof lines === 'string') {
serialised = lines
Expand All @@ -25,12 +26,17 @@ function CacheIndex (entries) {
}
serialised = lines.map(JSON.stringify).join('\n')
}
index[hashed] = index[hashed]
? [index[hashed], serialised].join('\n')
index[prefix] = index[prefix] || {}
index[prefix][hashed] = index[prefix][hashed]
? [index[prefix][hashed], serialised].join('\n')
: serialised
})
Object.keys(index).forEach(function (k) {
index[k] = File(index[k])
Object.keys(index).forEach(function (prefix) {
var files = {}
Object.keys(index[prefix]).forEach(key => {
files[key] = File(index[prefix][key])
})
index[prefix] = Dir(files)
})
return Dir(index)
}

0 comments on commit e8402a5

Please sign in to comment.