Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add batch-put benchmark #4

Merged
merged 1 commit into from
May 27, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,16 @@ Perform concurrent `put()` operations on sorted string keys. Options:
- `--concurrency`: default 10
- `--valueSize`: size of value, as a number in bytes or string with unit (e.g. `--valueSize 1kb`)

### `batch-put`

Same as `write`, but in batches rather than singular puts. Perform concurrent `batch()` operations on random string keys and values. Options:

- `-n`: amount of operations, default 1e6
- `--batchSize`: default 1000
- `--chained`: boolean flag, default false, use chained batch
- `--concurrency`: default 1
- `--valueSize`: size of value, as a number in bytes or string with unit (e.g. `--valueSize 1kb`)

<!-- ### Other ideas

- Write batches in different sizes (feature: define a matrix)
Expand Down
137 changes: 137 additions & 0 deletions benchmarks/batch-put.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
'use strict'

const crypto = require('crypto')
const ldu = require('../lib/level-du')
const keyTmpl = '0000000000000000'

exports.defaults = {
benchmark: {
n: 1e6,
batchSize: 1e3,
concurrency: 1,
valueSize: 100,
chained: false
}
}

exports.plot = require('./write.plot')

exports.run = function (factory, stream, options) {
stream.write('Elapsed (ms), Entries, Bytes, Last 1000 Avg Time, MB/s\n')

function make16CharPaddedKey () {
const r = Math.floor(Math.random() * options.n)
const k = keyTmpl + r

return k.substr(k.length - 16)
}

function start (db) {
const startTime = Date.now()
const batchSize = options.batchSize

let inProgress = 0
let totalWrites = 0
let totalBytes = 0
let timesAccum = 0
let elapsed

function report () {
console.log(
'Wrote', options.n, 'entries in',
Math.floor((Date.now() - startTime) / 1000) + 's,',
(Math.floor((totalBytes / 1048576) * 100) / 100) + 'MB'
)

stream.end()

db.close(function (err) {
if (err) throw err

ldu(db, function (err, size) {
if (err) throw err
if (size) console.log('Database size:', Math.floor(size / 1024 / 1024) + 'M')
})
})
}

function write () {
if (totalWrites >= options.n) return report(Date.now() - startTime)
if (inProgress >= options.concurrency) return

inProgress++

if (totalWrites % 100000 === 0) {
console.log('' + inProgress, totalWrites,
Math.round(totalWrites / options.n * 100) + '%')
}

// TODO: batchSize should be a multiple of 10
if (totalWrites % 1000 === 0) {
elapsed = Date.now() - startTime
stream.write(
elapsed +
',' + totalWrites +
',' + totalBytes +
',' + Math.floor(timesAccum / 1000) +
',' + (Math.floor(((totalBytes / 1048576) / (elapsed / 1000)) * 100) / 100) +
'\n')
timesAccum = 0
}

let start

if (options.chained) {
const batch = db.batch()

for (let i = 0; i < batchSize; i++) {
// TODO: see comment in write.js
const key = make16CharPaddedKey()
const value = crypto.randomBytes(options.valueSize).toString('hex')

batch.put(key, value)
}

start = process.hrtime()
batch.write(onWrite)
} else {
const ops = new Array(batchSize)

for (let i = 0; i < batchSize; i++) {
// TODO: see comment in write.js
const key = make16CharPaddedKey()
const value = crypto.randomBytes(options.valueSize).toString('hex')

ops[i] = { type: 'put', key, value }
}

start = process.hrtime()
db.batch(ops, onWrite)
}

function onWrite (err) {
if (err) throw err

const duration = process.hrtime(start)
const nano = (duration[0] * 1e9) + duration[1]

totalBytes += (keyTmpl.length + options.valueSize) * batchSize
totalWrites += batchSize
timesAccum += nano
inProgress--

process.nextTick(write)
}
}

for (let i = 0; i < options.concurrency; i++) write()
}

// TODO (once stream is sync): skip setTimeout
setTimeout(function () {
factory(function (err, db) {
if (err) throw err
start(db)
})
}, 500)
}
2 changes: 2 additions & 0 deletions benchmarks/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,5 @@
exports.write = require('./write')
exports['write-random'] = require('./write-random')
exports['write-sorted'] = require('./write-sorted')

exports['batch-put'] = require('./batch-put')
4 changes: 4 additions & 0 deletions benchmarks/write.js
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,10 @@ exports.run = function (factory, stream, options) {
timesAccum = 0
}

// TODO: though we don't start the clock until after crypto.randomBytes(),
// due to concurrency there might be put() callbacks waiting in libuv
// while the main thread is blocked? hmz. Maybe use async randomBytes(),
// or pregenerated values (bonus: make them deterministic).
const key = make16CharPaddedKey()
const value = crypto.randomBytes(options.valueSize).toString('hex')
const start = process.hrtime()
Expand Down
1 change: 1 addition & 0 deletions benchmarks/write.plot.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

const e = require('../lib/escape-gnuplot-string')

// Note: also used by batch-put.js
module.exports = function (title, description, results) {
const durations = results.map(function (res, i) {
const file = res.csvFile
Expand Down