node/benchmark/scatter.R

#!/usr/bin/env Rscript
library(ggplot2);
library(plyr);

# get __dirname and load ./_cli.R
args = commandArgs(trailingOnly = F);
dirname = dirname(sub("--file=", "", args[grep("--file", args)]));
source(paste0(dirname, '/_cli.R'), chdir=T);

if (is.null(args.options$xaxis) || is.null(args.options$category) ||
   (!is.null(args.options$plot) && args.options$plot == TRUE)) {
  stop("usage: cat file.csv | Rscript scatter.R [variable=value ...]
  --xaxis    variable   variable name to use as xaxis (required)
  --category variable   variable name to use as colored category (required)
  --plot     filename   save plot to filename
  --log                 use a log-2 scale for xaxis in the plot");
}

plot.filename = args.options$plot;

# parse options
x.axis.name = args.options$xaxis;
category.name = args.options$category;
use.log2 = !is.null(args.options$log);

# parse data
dat = read.csv(file('stdin'), strip.white=TRUE);
dat = data.frame(dat);

# List of aggregated variables
aggregate = names(dat);
aggregate = aggregate[
  ! aggregate %in% c('rate', 'time', 'filename', x.axis.name, category.name)
];
# Variables that don't change aren't aggregated
for (aggregate.key in aggregate) {
  if (length(unique(dat[[aggregate.key]])) == 1) {
    aggregate = aggregate[aggregate != aggregate.key];
  }
}

# Print out aggregated variables
for (aggregate.variable in aggregate) {
  cat(sprintf('aggregating variable: %s\n', aggregate.variable));
}
if (length(aggregate) > 0) {
  cat('\n');
}

# Calculate statistics
stats = ddply(dat, c(x.axis.name, category.name), function(subdat) {
  rate = subdat$rate;

  # calculate standard error of the mean
  se = sqrt(var(rate)/length(rate));

  # calculate mean and 95 % confidence interval
  r = list(
    rate = mean(rate),
    confidence.interval = se * qt(0.975, length(rate) - 1)
  );

  return(data.frame(r));
});

print(stats, row.names=F);

if (!is.null(plot.filename)) {
  p = ggplot(stats, aes_string(x=x.axis.name, y='mean', colour=category.name));
  if (use.log2) {
    p = p + scale_x_continuous(trans='log2');
  }
  p = p + geom_errorbar(aes(ymin=mean-confidence.interval, ymax=mean+confidence.interval), width=.1);
  p = p + geom_point();
  p = p + ylab("rate of operations (higher is better)");
  p = p + ggtitle(dat[1, 1]);
  ggsave(plot.filename, p);
}
benchmark: add script for creating scatter plot Previously this a tool in `plot.R`. It is now are more complete tool which executes the benchmarks many times and creates a boxplot. PR-URL: https://github.com/nodejs/node/pull/7094 Reviewed-By: Trevor Norris <trev.norris@gmail.com> Reviewed-By: Jeremiah Senkpiel <fishrock123@rocketmail.com> Reviewed-By: Brian White <mscdex@mscdex.net> Reviewed-By: Anna Henningsen <anna@addaleax.net> 9 years ago			`#!/usr/bin/env Rscript`
			`library(ggplot2);`
			`library(plyr);`

			`# get __dirname and load ./_cli.R`
			`args = commandArgs(trailingOnly = F);`
			`dirname = dirname(sub("--file=", "", args[grep("--file", args)]));`
			`source(paste0(dirname, '/_cli.R'), chdir=T);`

			`if (is.null(args.options$xaxis) \|\| is.null(args.options$category) \|\|`
			`(!is.null(args.options$plot) && args.options$plot == TRUE)) {`
			`stop("usage: cat file.csv \| Rscript scatter.R [variable=value ...]`
			`--xaxis variable variable name to use as xaxis (required)`
			`--category variable variable name to use as colored category (required)`
			`--plot filename save plot to filename`
			`--log use a log-2 scale for xaxis in the plot");`
			`}`

			`plot.filename = args.options$plot;`

			`# parse options`
			`x.axis.name = args.options$xaxis;`
			`category.name = args.options$category;`
			`use.log2 = !is.null(args.options$log);`

			`# parse data`
			`dat = read.csv(file('stdin'), strip.white=TRUE);`
			`dat = data.frame(dat);`

			`# List of aggregated variables`
			`aggregate = names(dat);`
			`aggregate = aggregate[`
			`! aggregate %in% c('rate', 'time', 'filename', x.axis.name, category.name)`
			`];`
			`# Variables that don't change aren't aggregated`
			`for (aggregate.key in aggregate) {`
			`if (length(unique(dat[[aggregate.key]])) == 1) {`
			`aggregate = aggregate[aggregate != aggregate.key];`
			`}`
			`}`

			`# Print out aggregated variables`
			`for (aggregate.variable in aggregate) {`
			`cat(sprintf('aggregating variable: %s\n', aggregate.variable));`
			`}`
			`if (length(aggregate) > 0) {`
			`cat('\n');`
			`}`

			`# Calculate statistics`
			`stats = ddply(dat, c(x.axis.name, category.name), function(subdat) {`
			`rate = subdat$rate;`

			`# calculate standard error of the mean`
			`se = sqrt(var(rate)/length(rate));`

			`# calculate mean and 95 % confidence interval`
			`r = list(`
			`rate = mean(rate),`
			`confidence.interval = se * qt(0.975, length(rate) - 1)`
			`);`

			`return(data.frame(r));`
			`});`

			`print(stats, row.names=F);`

			`if (!is.null(plot.filename)) {`
			`p = ggplot(stats, aes_string(x=x.axis.name, y='mean', colour=category.name));`
			`if (use.log2) {`
			`p = p + scale_x_continuous(trans='log2');`
			`}`
			`p = p + geom_errorbar(aes(ymin=mean-confidence.interval, ymax=mean+confidence.interval), width=.1);`
			`p = p + geom_point();`
			`p = p + ylab("rate of operations (higher is better)");`
			`p = p + ggtitle(dat[1, 1]);`
			`ggsave(plot.filename, p);`
			`}`