-
Notifications
You must be signed in to change notification settings - Fork 29.6k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
benchmark: use t-test for comparing node versions
The data sampling is done in node and the data processing is done in R. Only plyr was added as an R dependency and it is fairly standard. PR-URL: #7094 Reviewed-By: Trevor Norris <[email protected]> Reviewed-By: Jeremiah Senkpiel <[email protected]> Reviewed-By: Brian White <[email protected]> Reviewed-By: Anna Henningsen <[email protected]>
- Loading branch information
1 parent
8bb59fd
commit 855009a
Showing
3 changed files
with
161 additions
and
162 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
|
||
args = commandArgs(TRUE); | ||
|
||
args.options = list(); | ||
|
||
temp.option.key = NULL; | ||
|
||
for (arg in args) { | ||
# Optional arguments declaration | ||
if (substring(arg, 1, 1) == '-') { | ||
temp.option.key = substring(arg, 2); | ||
if (substring(arg, 2, 2) == '-') { | ||
temp.option.key = substring(arg, 3); | ||
} | ||
|
||
args.options[[temp.option.key]] = TRUE; | ||
} | ||
# Optional arguments value | ||
else if (!is.null(temp.option.key)) { | ||
args.options[[temp.option.key]] = arg; | ||
|
||
temp.option.key = NULL; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
#!/usr/bin/env Rscript | ||
library(ggplot2); | ||
library(plyr); | ||
|
||
# get __dirname and load ./_cli.R | ||
args = commandArgs(trailingOnly = F); | ||
dirname = dirname(sub("--file=", "", args[grep("--file", args)])); | ||
source(paste0(dirname, '/_cli.R'), chdir=T); | ||
|
||
if (!is.null(args.options$help) || | ||
(!is.null(args.options$plot) && args.options$plot == TRUE)) { | ||
stop("usage: cat file.csv | Rscript compare.R | ||
--help show this message | ||
--plot filename save plot to filename"); | ||
} | ||
|
||
plot.filename = args.options$plot; | ||
|
||
dat = read.csv(file('stdin')); | ||
dat = data.frame(dat); | ||
dat$nameTwoLines = paste0(dat$filename, '\n', dat$configuration); | ||
dat$name = paste0(dat$filename, dat$configuration); | ||
|
||
# Create a box plot | ||
if (!is.null(plot.filename)) { | ||
p = ggplot(data=dat); | ||
p = p + geom_boxplot(aes(x=nameTwoLines, y=rate, fill=binary)); | ||
p = p + ylab("rate of operations (higher is better)"); | ||
p = p + xlab("benchmark"); | ||
p = p + theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)); | ||
ggsave(plot.filename, p); | ||
} | ||
|
||
# Print a table with results | ||
statistics = ddply(dat, "name", function(subdat) { | ||
# Perform a statistics test to see of there actually is a difference in | ||
# performace. | ||
w = t.test(rate ~ binary, data=subdat); | ||
|
||
# Calculate improvement for the "new" binary compared with the "old" binary | ||
new_mu = mean(subset(subdat, binary == "new")$rate); | ||
old_mu = mean(subset(subdat, binary == "old")$rate); | ||
improvement = sprintf("%.2f %%", ((new_mu - old_mu) / old_mu * 100)); | ||
|
||
# Add user friendly stars to the table. There should be at least one star | ||
# before you can say that there is an improvement. | ||
significant = ''; | ||
if (w$p.value < 0.001) { | ||
significant = '***'; | ||
} else if (w$p.value < 0.01) { | ||
significant = '**'; | ||
} else if (w$p.value < 0.05) { | ||
significant = '*'; | ||
} | ||
|
||
r = list( | ||
improvement = improvement, | ||
significant = significant, | ||
p.value = w$p.value | ||
); | ||
return(data.frame(r)); | ||
}); | ||
|
||
|
||
# Set the benchmark names as the row.names to left align them in the print | ||
row.names(statistics) = statistics$name; | ||
statistics$name = NULL; | ||
|
||
options(width = 200); | ||
print(statistics); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,181 +1,86 @@ | ||
'use strict'; | ||
var usage = 'node benchmark/compare.js ' + | ||
'<node-binary1> <node-binary2> ' + | ||
'[--html] [--red|-r] [--green|-g] ' + | ||
'[-- <type> [testFilter]]'; | ||
|
||
var show = 'both'; | ||
var nodes = []; | ||
var html = false; | ||
var benchmarks; | ||
const fork = require('child_process').fork; | ||
const path = require('path'); | ||
const CLI = require('./_cli.js'); | ||
|
||
// | ||
// Parse arguments | ||
// | ||
const cli = CLI(`usage: ./node compare.js [options] [--] <category> ... | ||
Run each benchmark in the <category> directory many times using two diffrent | ||
node versions. More than one <category> directory can be specified. | ||
The output is formatted as csv, which can be processed using for | ||
example 'compare.R'. | ||
--new ./new-node-binary new node binary (required) | ||
--old ./old-node-binary old node binary (required) | ||
--runs 30 number of samples | ||
--filter pattern string to filter benchmark scripts | ||
--set variable=value set benchmark variable (can be repeated) | ||
`, { | ||
arrayArgs: ['set'] | ||
}); | ||
|
||
if (!cli.optional.new || !cli.optional.old) { | ||
cli.abort(cli.usage); | ||
return; | ||
} | ||
|
||
for (var i = 2; i < process.argv.length; i++) { | ||
var arg = process.argv[i]; | ||
switch (arg) { | ||
case '--red': case '-r': | ||
show = show === 'green' ? 'both' : 'red'; | ||
break; | ||
case '--green': case '-g': | ||
show = show === 'red' ? 'both' : 'green'; | ||
break; | ||
case '--html': | ||
html = true; | ||
break; | ||
case '-h': case '-?': case '--help': | ||
console.log(usage); | ||
process.exit(0); | ||
break; | ||
case '--': | ||
benchmarks = []; | ||
break; | ||
default: | ||
if (Array.isArray(benchmarks)) | ||
benchmarks.push(arg); | ||
else | ||
nodes.push(arg); | ||
break; | ||
} | ||
const binaries = ['old', 'new']; | ||
const runs = cli.optional.runs ? parseInt(cli.optional.runs, 10) : 30; | ||
const benchmarks = cli.benchmarks(); | ||
|
||
if (benchmarks.length === 0) { | ||
console.error('no benchmarks found'); | ||
process.exit(1); | ||
} | ||
|
||
var start, green, red, reset, end; | ||
if (!html) { | ||
start = ''; | ||
green = '\u001b[1;32m'; | ||
red = '\u001b[1;31m'; | ||
reset = '\u001b[m'; | ||
end = ''; | ||
} else { | ||
start = '<pre style="background-color:#333;color:#eee">'; | ||
green = '<span style="background-color:#0f0;color:#000">'; | ||
red = '<span style="background-color:#f00;color:#fff">'; | ||
reset = '</span>'; | ||
end = '</pre>'; | ||
// Create queue from the benchmarks list such both node versions are tested | ||
// `runs` amount of times each. | ||
const queue = []; | ||
for (let iter = 0; iter < runs; iter++) { | ||
for (const filename of benchmarks) { | ||
for (const binary of binaries) { | ||
queue.push({ binary, filename, iter }); | ||
} | ||
} | ||
} | ||
|
||
var runBench = process.env.NODE_BENCH || 'bench'; | ||
// Print csv header | ||
console.log('"binary", "filename", "configuration", "rate", "time"'); | ||
|
||
if (nodes.length !== 2) | ||
return console.error('usage:\n %s', usage); | ||
(function recursive(i) { | ||
const job = queue[i]; | ||
|
||
var spawn = require('child_process').spawn; | ||
var results = {}; | ||
var toggle = 1; | ||
var r = (+process.env.NODE_BENCH_RUNS || 1) * 2; | ||
const child = fork(path.resolve(__dirname, job.filename), cli.optional.set, { | ||
execPath: cli.optional[job.binary] | ||
}); | ||
|
||
run(); | ||
function run() { | ||
if (--r < 0) | ||
return compare(); | ||
toggle = ++toggle % 2; | ||
child.on('message', function(data) { | ||
// Construct configuration string, " A=a, B=b, ..." | ||
let conf = ''; | ||
for (const key of Object.keys(data.conf)) { | ||
conf += ' ' + key + '=' + JSON.stringify(data.conf[key]); | ||
} | ||
conf = conf.slice(1); | ||
|
||
var node = nodes[toggle]; | ||
console.error('running %s', node); | ||
var env = {}; | ||
for (var i in process.env) | ||
env[i] = process.env[i]; | ||
env.NODE = node; | ||
// Escape qoutes (") for correct csv formatting | ||
conf = conf.replace(/"/g, '""'); | ||
|
||
var out = ''; | ||
var child; | ||
if (Array.isArray(benchmarks) && benchmarks.length) { | ||
child = spawn( | ||
node, | ||
['benchmark/run.js'].concat(benchmarks), | ||
{ env: env } | ||
); | ||
} else { | ||
child = spawn('make', [runBench], { env: env }); | ||
} | ||
child.stdout.setEncoding('utf8'); | ||
child.stdout.on('data', function(c) { | ||
out += c; | ||
console.log(`"${job.binary}", "${job.filename}", "${conf}", ` + | ||
`${data.rate}, ${data.time}`); | ||
}); | ||
|
||
child.stderr.pipe(process.stderr); | ||
|
||
child.on('close', function(code) { | ||
child.once('close', function(code) { | ||
if (code) { | ||
console.error('%s exited with code=%d', node, code); | ||
process.exit(code); | ||
} else { | ||
out.trim().split(/\r?\n/).forEach(function(line) { | ||
line = line.trim(); | ||
if (!line) | ||
return; | ||
|
||
var s = line.split(':'); | ||
var num = +s.pop(); | ||
if (!num && num !== 0) | ||
return; | ||
|
||
line = s.join(':'); | ||
var res = results[line] = results[line] || {}; | ||
res[node] = res[node] || []; | ||
res[node].push(num); | ||
}); | ||
|
||
run(); | ||
} | ||
}); | ||
} | ||
|
||
function compare() { | ||
// each result is an object with {"foo.js arg=bar":12345,...} | ||
// compare each thing, and show which node did the best. | ||
// node[0] is shown in green, node[1] shown in red. | ||
var maxLen = -Infinity; | ||
var util = require('util'); | ||
console.log(start); | ||
|
||
Object.keys(results).map(function(bench) { | ||
var res = results[bench]; | ||
var n0 = avg(res[nodes[0]]); | ||
var n1 = avg(res[nodes[1]]); | ||
|
||
var pct = ((n0 - n1) / n1 * 100).toFixed(2); | ||
|
||
var g = n0 > n1 ? green : ''; | ||
var r = n0 > n1 ? '' : red; | ||
var c = r || g; | ||
|
||
if (show === 'green' && !g || show === 'red' && !r) | ||
return; | ||
} | ||
|
||
var r0 = util.format( | ||
'%s%s: %d%s', | ||
g, | ||
nodes[0], | ||
n0.toPrecision(5), g ? reset : '' | ||
); | ||
var r1 = util.format( | ||
'%s%s: %d%s', | ||
r, | ||
nodes[1], | ||
n1.toPrecision(5), r ? reset : '' | ||
); | ||
pct = c + pct + '%' + reset; | ||
var l = util.format('%s: %s %s', bench, r0, r1); | ||
maxLen = Math.max(l.length + pct.length, maxLen); | ||
return [l, pct]; | ||
}).filter(function(l) { | ||
return l; | ||
}).forEach(function(line) { | ||
var l = line[0]; | ||
var pct = line[1]; | ||
var dotLen = maxLen - l.length - pct.length + 2; | ||
var dots = ' ' + new Array(Math.max(0, dotLen)).join('.') + ' '; | ||
console.log(l + dots + pct); | ||
// If there are more benchmarks execute the next | ||
if (i + 1 < queue.length) { | ||
recursive(i + 1); | ||
} | ||
}); | ||
console.log(end); | ||
} | ||
|
||
function avg(list) { | ||
if (list.length >= 3) { | ||
list = list.sort(); | ||
var q = Math.floor(list.length / 4) || 1; | ||
list = list.slice(q, -q); | ||
} | ||
return list.reduce(function(a, b) { | ||
return a + b; | ||
}, 0) / list.length; | ||
} | ||
})(0); |