Skip to content

Commit

Permalink
benchmark: use t-test for comparing node versions
Browse files Browse the repository at this point in the history
The data sampling is done in node and the data processing is done in R.
Only plyr was added as an R dependency and it is fairly standard.

PR-URL: #7094
Reviewed-By: Trevor Norris <[email protected]>
Reviewed-By: Jeremiah Senkpiel <[email protected]>
Reviewed-By: Brian White <[email protected]>
Reviewed-By: Anna Henningsen <[email protected]>
  • Loading branch information
AndreasMadsen committed Jul 26, 2016
1 parent 8bb59fd commit 855009a
Show file tree
Hide file tree
Showing 3 changed files with 161 additions and 162 deletions.
24 changes: 24 additions & 0 deletions benchmark/_cli.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@

args = commandArgs(TRUE);

args.options = list();

temp.option.key = NULL;

for (arg in args) {
# Optional arguments declaration
if (substring(arg, 1, 1) == '-') {
temp.option.key = substring(arg, 2);
if (substring(arg, 2, 2) == '-') {
temp.option.key = substring(arg, 3);
}

args.options[[temp.option.key]] = TRUE;
}
# Optional arguments value
else if (!is.null(temp.option.key)) {
args.options[[temp.option.key]] = arg;

temp.option.key = NULL;
}
}
70 changes: 70 additions & 0 deletions benchmark/compare.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
#!/usr/bin/env Rscript
library(ggplot2);
library(plyr);

# get __dirname and load ./_cli.R
args = commandArgs(trailingOnly = F);
dirname = dirname(sub("--file=", "", args[grep("--file", args)]));
source(paste0(dirname, '/_cli.R'), chdir=T);

if (!is.null(args.options$help) ||
(!is.null(args.options$plot) && args.options$plot == TRUE)) {
stop("usage: cat file.csv | Rscript compare.R
--help show this message
--plot filename save plot to filename");
}

plot.filename = args.options$plot;

dat = read.csv(file('stdin'));
dat = data.frame(dat);
dat$nameTwoLines = paste0(dat$filename, '\n', dat$configuration);
dat$name = paste0(dat$filename, dat$configuration);

# Create a box plot
if (!is.null(plot.filename)) {
p = ggplot(data=dat);
p = p + geom_boxplot(aes(x=nameTwoLines, y=rate, fill=binary));
p = p + ylab("rate of operations (higher is better)");
p = p + xlab("benchmark");
p = p + theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5));
ggsave(plot.filename, p);
}

# Print a table with results
statistics = ddply(dat, "name", function(subdat) {
# Perform a statistics test to see of there actually is a difference in
# performace.
w = t.test(rate ~ binary, data=subdat);

# Calculate improvement for the "new" binary compared with the "old" binary
new_mu = mean(subset(subdat, binary == "new")$rate);
old_mu = mean(subset(subdat, binary == "old")$rate);
improvement = sprintf("%.2f %%", ((new_mu - old_mu) / old_mu * 100));

# Add user friendly stars to the table. There should be at least one star
# before you can say that there is an improvement.
significant = '';
if (w$p.value < 0.001) {
significant = '***';
} else if (w$p.value < 0.01) {
significant = '**';
} else if (w$p.value < 0.05) {
significant = '*';
}

r = list(
improvement = improvement,
significant = significant,
p.value = w$p.value
);
return(data.frame(r));
});


# Set the benchmark names as the row.names to left align them in the print
row.names(statistics) = statistics$name;
statistics$name = NULL;

options(width = 200);
print(statistics);
229 changes: 67 additions & 162 deletions benchmark/compare.js
Original file line number Diff line number Diff line change
@@ -1,181 +1,86 @@
'use strict';
var usage = 'node benchmark/compare.js ' +
'<node-binary1> <node-binary2> ' +
'[--html] [--red|-r] [--green|-g] ' +
'[-- <type> [testFilter]]';

var show = 'both';
var nodes = [];
var html = false;
var benchmarks;
const fork = require('child_process').fork;
const path = require('path');
const CLI = require('./_cli.js');

//
// Parse arguments
//
const cli = CLI(`usage: ./node compare.js [options] [--] <category> ...
Run each benchmark in the <category> directory many times using two diffrent
node versions. More than one <category> directory can be specified.
The output is formatted as csv, which can be processed using for
example 'compare.R'.
--new ./new-node-binary new node binary (required)
--old ./old-node-binary old node binary (required)
--runs 30 number of samples
--filter pattern string to filter benchmark scripts
--set variable=value set benchmark variable (can be repeated)
`, {
arrayArgs: ['set']
});

if (!cli.optional.new || !cli.optional.old) {
cli.abort(cli.usage);
return;
}

for (var i = 2; i < process.argv.length; i++) {
var arg = process.argv[i];
switch (arg) {
case '--red': case '-r':
show = show === 'green' ? 'both' : 'red';
break;
case '--green': case '-g':
show = show === 'red' ? 'both' : 'green';
break;
case '--html':
html = true;
break;
case '-h': case '-?': case '--help':
console.log(usage);
process.exit(0);
break;
case '--':
benchmarks = [];
break;
default:
if (Array.isArray(benchmarks))
benchmarks.push(arg);
else
nodes.push(arg);
break;
}
const binaries = ['old', 'new'];
const runs = cli.optional.runs ? parseInt(cli.optional.runs, 10) : 30;
const benchmarks = cli.benchmarks();

if (benchmarks.length === 0) {
console.error('no benchmarks found');
process.exit(1);
}

var start, green, red, reset, end;
if (!html) {
start = '';
green = '\u001b[1;32m';
red = '\u001b[1;31m';
reset = '\u001b[m';
end = '';
} else {
start = '<pre style="background-color:#333;color:#eee">';
green = '<span style="background-color:#0f0;color:#000">';
red = '<span style="background-color:#f00;color:#fff">';
reset = '</span>';
end = '</pre>';
// Create queue from the benchmarks list such both node versions are tested
// `runs` amount of times each.
const queue = [];
for (let iter = 0; iter < runs; iter++) {
for (const filename of benchmarks) {
for (const binary of binaries) {
queue.push({ binary, filename, iter });
}
}
}

var runBench = process.env.NODE_BENCH || 'bench';
// Print csv header
console.log('"binary", "filename", "configuration", "rate", "time"');

if (nodes.length !== 2)
return console.error('usage:\n %s', usage);
(function recursive(i) {
const job = queue[i];

var spawn = require('child_process').spawn;
var results = {};
var toggle = 1;
var r = (+process.env.NODE_BENCH_RUNS || 1) * 2;
const child = fork(path.resolve(__dirname, job.filename), cli.optional.set, {
execPath: cli.optional[job.binary]
});

run();
function run() {
if (--r < 0)
return compare();
toggle = ++toggle % 2;
child.on('message', function(data) {
// Construct configuration string, " A=a, B=b, ..."
let conf = '';
for (const key of Object.keys(data.conf)) {
conf += ' ' + key + '=' + JSON.stringify(data.conf[key]);
}
conf = conf.slice(1);

var node = nodes[toggle];
console.error('running %s', node);
var env = {};
for (var i in process.env)
env[i] = process.env[i];
env.NODE = node;
// Escape qoutes (") for correct csv formatting
conf = conf.replace(/"/g, '""');

var out = '';
var child;
if (Array.isArray(benchmarks) && benchmarks.length) {
child = spawn(
node,
['benchmark/run.js'].concat(benchmarks),
{ env: env }
);
} else {
child = spawn('make', [runBench], { env: env });
}
child.stdout.setEncoding('utf8');
child.stdout.on('data', function(c) {
out += c;
console.log(`"${job.binary}", "${job.filename}", "${conf}", ` +
`${data.rate}, ${data.time}`);
});

child.stderr.pipe(process.stderr);

child.on('close', function(code) {
child.once('close', function(code) {
if (code) {
console.error('%s exited with code=%d', node, code);
process.exit(code);
} else {
out.trim().split(/\r?\n/).forEach(function(line) {
line = line.trim();
if (!line)
return;

var s = line.split(':');
var num = +s.pop();
if (!num && num !== 0)
return;

line = s.join(':');
var res = results[line] = results[line] || {};
res[node] = res[node] || [];
res[node].push(num);
});

run();
}
});
}

function compare() {
// each result is an object with {"foo.js arg=bar":12345,...}
// compare each thing, and show which node did the best.
// node[0] is shown in green, node[1] shown in red.
var maxLen = -Infinity;
var util = require('util');
console.log(start);

Object.keys(results).map(function(bench) {
var res = results[bench];
var n0 = avg(res[nodes[0]]);
var n1 = avg(res[nodes[1]]);

var pct = ((n0 - n1) / n1 * 100).toFixed(2);

var g = n0 > n1 ? green : '';
var r = n0 > n1 ? '' : red;
var c = r || g;

if (show === 'green' && !g || show === 'red' && !r)
return;
}

var r0 = util.format(
'%s%s: %d%s',
g,
nodes[0],
n0.toPrecision(5), g ? reset : ''
);
var r1 = util.format(
'%s%s: %d%s',
r,
nodes[1],
n1.toPrecision(5), r ? reset : ''
);
pct = c + pct + '%' + reset;
var l = util.format('%s: %s %s', bench, r0, r1);
maxLen = Math.max(l.length + pct.length, maxLen);
return [l, pct];
}).filter(function(l) {
return l;
}).forEach(function(line) {
var l = line[0];
var pct = line[1];
var dotLen = maxLen - l.length - pct.length + 2;
var dots = ' ' + new Array(Math.max(0, dotLen)).join('.') + ' ';
console.log(l + dots + pct);
// If there are more benchmarks execute the next
if (i + 1 < queue.length) {
recursive(i + 1);
}
});
console.log(end);
}

function avg(list) {
if (list.length >= 3) {
list = list.sort();
var q = Math.floor(list.length / 4) || 1;
list = list.slice(q, -q);
}
return list.reduce(function(a, b) {
return a + b;
}, 0) / list.length;
}
})(0);

0 comments on commit 855009a

Please sign in to comment.