• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/env Rscript
2library(ggplot2);
3library(plyr);
4
5# get __dirname and load ./_cli.R
6args = commandArgs(trailingOnly = F);
7dirname = dirname(sub("--file=", "", args[grep("--file", args)]));
8source(paste0(dirname, '/_cli.R'), chdir=T);
9
10if (is.null(args.options$xaxis) || is.null(args.options$category) ||
11   (!is.null(args.options$plot) && args.options$plot == TRUE)) {
12  stop("usage: cat file.csv | Rscript scatter.R [variable=value ...]
13  --xaxis    variable   variable name to use as xaxis (required)
14  --category variable   variable name to use as colored category (required)
15  --plot     filename   save plot to filename
16  --log                 use a log-2 scale for xaxis in the plot");
17}
18
19plot.filename = args.options$plot;
20
21# parse options
22x.axis.name = args.options$xaxis;
23category.name = args.options$category;
24use.log2 = !is.null(args.options$log);
25
26# parse data
27dat = read.csv(file('stdin'), strip.white=TRUE);
28dat = data.frame(dat);
29
30# List of aggregated variables
31aggregate = names(dat);
32aggregate = aggregate[
33  ! aggregate %in% c('rate', 'time', 'filename', x.axis.name, category.name)
34];
35# Variables that don't change aren't aggregated
36for (aggregate.key in aggregate) {
37  if (length(unique(dat[[aggregate.key]])) == 1) {
38    aggregate = aggregate[aggregate != aggregate.key];
39  }
40}
41
42# Print out aggregated variables
43for (aggregate.variable in aggregate) {
44  cat(sprintf('aggregating variable: %s\n', aggregate.variable));
45}
46if (length(aggregate) > 0) {
47  cat('\n');
48}
49
50# Calculate statistics
51stats = ddply(dat, c(x.axis.name, category.name), function(subdat) {
52  rate = subdat$rate;
53
54  # calculate confidence interval of the mean
55  ci = NA;
56  if (length(rate) > 1) {
57    se = sqrt(var(rate)/length(rate));
58    ci = se * qt(0.975, length(rate) - 1)
59  }
60
61  # calculate mean and 95 % confidence interval
62  r = list(
63    rate = mean(rate),
64    confidence.interval = ci
65  );
66
67  return(data.frame(r));
68});
69
70print(stats, row.names=F);
71
72if (!is.null(plot.filename)) {
73  p = ggplot(stats, aes_string(x=x.axis.name, y='rate', colour=category.name));
74  if (use.log2) {
75    p = p + scale_x_continuous(trans='log2');
76  }
77  p = p + geom_errorbar(
78    aes(ymin=rate-confidence.interval, ymax=rate+confidence.interval),
79    width=.1, na.rm=TRUE
80  );
81  p = p + geom_point();
82  p = p + geom_line();
83  p = p + ylab("rate of operations (higher is better)");
84  p = p + ggtitle(dat[1, 1]);
85  ggsave(plot.filename, p);
86}
87