Skip to content

Commit

Permalink
fix(index): who cares about race conditions anyway
Browse files Browse the repository at this point in the history
PRETTY SURE THIS IS FINE
  • Loading branch information
zkat committed Mar 4, 2017
1 parent 954b1b3 commit b1d3888
Show file tree
Hide file tree
Showing 3 changed files with 48 additions and 116 deletions.
94 changes: 22 additions & 72 deletions lib/entry-index.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ const contentPath = require('./content/path')
const crypto = require('crypto')
const fixOwner = require('./util/fix-owner')
const fs = require('graceful-fs')
const lockfile = require('lockfile')
const path = require('path')
const pipe = require('mississippi').pipe
const Promise = require('bluebird')
Expand All @@ -14,64 +13,30 @@ const through = require('mississippi').through

const indexV = require('../package.json')['cache-version'].index

const appendFileAsync = Promise.promisify(fs.appendFile)

module.exports.insert = insert
function insert (cache, key, digest, opts) {
opts = opts || {}
const bucket = bucketPath(cache, key)
const lock = bucket + '.lock'
return fixOwner.mkdirfix(
path.dirname(bucket), opts.uid, opts.gid
).then(() => (
Promise.fromNode(_cb => {
const cb = (err, entry) => {
lockfile.unlock(lock, er => {
_cb(err || er, entry)
})
}
lockfile.lock(lock, {
stale: 60000,
retries: 10,
wait: 10000
}, function (err) {
if (err) { return _cb(err) }
fs.stat(bucket, function (err, existing) {
if (err && err.code !== 'ENOENT' && err.code !== 'EPERM') {
return cb(err)
}
const entry = {
key: key,
digest: digest,
hashAlgorithm: opts.hashAlgorithm,
time: +(new Date()),
metadata: opts.metadata
}
// Because of the way these entries work,
// the index is safe from fs.appendFile stopping
// mid-write so long as newlines are *prepended*
//
// That is, if a write fails, it will be ignored
// by `find`, and the next successful one will be
// used.
//
// This should be -very rare-, since `fs.appendFile`
// will often be atomic on most platforms unless
// very large metadata has been included, but caches
// like this one tend to last a long time. :)
// Most corrupted reads are likely to be from attempting
// to read the index while it's being written to --
// which is safe, but not guaranteed to be atomic.
const e = (existing ? '\n' : '') + JSON.stringify(entry)
fs.appendFile(bucket, e, function (err) {
cb(err, entry)
})
})
})
})
)).then(entry => {
return fixOwner.chownr(bucket, opts.uid, opts.gid).then(() => {
return formatEntry(cache, entry)
})
})
).then(() => {
const entry = {
key: key,
digest: digest,
hashAlgorithm: opts.hashAlgorithm,
time: +(new Date()),
metadata: opts.metadata
}
return appendFileAsync(
bucket, '\n' + JSON.stringify(entry)
).then(() => entry)
}).then(entry => (
fixOwner.chownr(bucket, opts.uid, opts.gid).then(() => (
formatEntry(cache, entry)
))
))
}

module.exports.find = find
Expand Down Expand Up @@ -126,7 +91,7 @@ function lsStream (cache) {
fs.readFile(path.join(indexDir, bucket, f), 'utf8', function (err, data) {
if (err) { return cb(err) }
const entries = {}
data.split('\n').forEach(function (entry) {
data.split('\n').slice(1).forEach(function (entry) {
let parsed
try {
parsed = JSON.parse(entry)
Expand Down Expand Up @@ -186,30 +151,15 @@ function bucketDir (cache) {
module.exports._bucketPath = bucketPath
function bucketPath (cache, key) {
const hashed = hashKey(key)
return path.join(bucketDir(cache), hashed.slice(0, 2), hashed)
return path.join(bucketDir(cache), hashed.slice(0, 2), hashed.slice(2))
}

module.exports._hashKey = hashKey
function hashKey (key) {
// NOTE (SECURITY)
//
// `sha1` conflicts can be generated, but it doesn't matter in this case,
// since we intend for there to be regular conflicts anyway. You can have
// the entire cache in a single bucket and all that'll do is just make a big
// file with a lot of contention, if you can even pull it off in the `key`
// string. So whatever. `sha1` is faster and it doesn't trigger the warnings
// `md5` tends to (yet?...).
//
// Not to mention, that in the case of pacote/npm, the amount of control
// anyone would have over this key is so minimal that it's incredibly
// unlikely that they could intentionally generate a large number of
// conflicts just with a package key such that they'd do anything resembling
// a hash flood DOS.
return crypto
.createHash('sha1')
.update(key.toLowerCase()) // lump case-variant keys into same bucket.
.createHash('sha256')
.update(key)
.digest('hex')
.slice(0, 7)
}

function formatEntry (cache, entry) {
Expand Down
68 changes: 25 additions & 43 deletions test/index.insert.js
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ test('basic insertion', function (t) {
}, 'formatted entry returned')
return fs.readFileAsync(BUCKET, 'utf8')
}).then(data => {
t.equal(data[0], '{', 'first entry starts with a {, not \\n')
t.equal(data[0], '\n', 'first entry starts with a \\n')
const entry = JSON.parse(data)
t.ok(entry.time, 'entry has a timestamp')
t.deepEqual(entry, {
Expand All @@ -55,7 +55,7 @@ test('inserts additional entries into existing key', function (t) {
)).then(() => {
return fs.readFileAsync(BUCKET, 'utf8')
}).then(data => {
const entries = data.split('\n').map(JSON.parse)
const entries = data.split('\n').slice(1).map(JSON.parse)
entries.forEach(function (e) { delete e.time })
t.deepEqual(entries, [{
key: KEY,
Expand Down Expand Up @@ -112,48 +112,30 @@ test('optional arbitrary metadata', function (t) {
test('key case-sensitivity', function (t) {
return Promise.join(
index.insert(CACHE, KEY, DIGEST),
index.insert(CACHE, KEY.toUpperCase(), DIGEST)
index.insert(CACHE, KEY.toUpperCase(), DIGEST + 'upper')
).then(() => {
return fs.readFileAsync(BUCKET, 'utf8')
}).then(data => {
const entries = data.split('\n').map(JSON.parse).sort(e => (
e.key === KEY
? -1
: 1
))
entries.forEach(function (e) { delete e.time })
t.deepEqual(entries, [{
key: KEY,
digest: DIGEST
}, {
key: KEY.toUpperCase(),
digest: DIGEST
}], 'all entries present')
})
})

test('hash conflict in same bucket', function (t) {
// NOTE - this test will break if `index._hashKey` changes its algorithm.
// Adapt to it accordingly.
const NEWKEY = KEY.toUpperCase()
const CONFLICTING = KEY.toLowerCase()
return index.insert(
CACHE, NEWKEY, DIGEST
).then(() => (
index.insert(CACHE, CONFLICTING, DIGEST)
)).then(() => {
const bucket = index._bucketPath(CACHE, NEWKEY)
return fs.readFileAsync(bucket, 'utf8')
}).then(data => {
const entries = data.split('\n').map(JSON.parse)
entries.forEach(function (e) { delete e.time })
t.deepEqual(entries, [{
key: NEWKEY,
digest: DIGEST
}, {
key: CONFLICTING,
digest: DIGEST
}], 'multiple entries for conflicting keys in the same bucket')
return Promise.join(
index.find(CACHE, KEY),
index.find(CACHE, KEY.toUpperCase()),
(entry, upperEntry) => {
delete entry.time
delete upperEntry.time
t.deepEqual({
key: entry.key,
digest: entry.digest
}, {
key: KEY,
digest: DIGEST
}, 'regular entry exists')
t.deepEqual({
key: upperEntry.key,
digest: upperEntry.digest
}, {
key: KEY.toUpperCase(),
digest: DIGEST + 'upper'
}, 'case-variant entry intact')
}
)
})
})

Expand Down
2 changes: 1 addition & 1 deletion test/util/cache-index.js
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ function CacheIndex (entries, hashAlgorithm) {
if (typeof lines.length !== 'number') {
lines = [lines]
}
serialised = lines.map(JSON.stringify).join('\n')
serialised = '\n' + lines.map(JSON.stringify).join('\n')
}
insertContent(tree, parts, serialised)
})
Expand Down

0 comments on commit b1d3888

Please sign in to comment.