1#!/usr/bin/env Rscript 2library(ggplot2); 3library(plyr); 4 5# get __dirname and load ./_cli.R 6args = commandArgs(trailingOnly = F); 7dirname = dirname(sub("--file=", "", args[grep("--file", args)])); 8source(paste0(dirname, '/_cli.R'), chdir=T); 9 10if (is.null(args.options$xaxis) || is.null(args.options$category) || 11 (!is.null(args.options$plot) && args.options$plot == TRUE)) { 12 stop("usage: cat file.csv | Rscript scatter.R [variable=value ...] 13 --xaxis variable variable name to use as xaxis (required) 14 --category variable variable name to use as colored category (required) 15 --plot filename save plot to filename 16 --log use a log-2 scale for xaxis in the plot"); 17} 18 19plot.filename = args.options$plot; 20 21# parse options 22x.axis.name = args.options$xaxis; 23category.name = args.options$category; 24use.log2 = !is.null(args.options$log); 25 26# parse data 27dat = read.csv(file('stdin'), strip.white=TRUE); 28dat = data.frame(dat); 29 30# List of aggregated variables 31aggregate = names(dat); 32aggregate = aggregate[ 33 ! aggregate %in% c('rate', 'time', 'filename', x.axis.name, category.name) 34]; 35# Variables that don't change aren't aggregated 36for (aggregate.key in aggregate) { 37 if (length(unique(dat[[aggregate.key]])) == 1) { 38 aggregate = aggregate[aggregate != aggregate.key]; 39 } 40} 41 42# Print out aggregated variables 43for (aggregate.variable in aggregate) { 44 cat(sprintf('aggregating variable: %s\n', aggregate.variable)); 45} 46if (length(aggregate) > 0) { 47 cat('\n'); 48} 49 50# Calculate statistics 51stats = ddply(dat, c(x.axis.name, category.name), function(subdat) { 52 rate = subdat$rate; 53 54 # calculate confidence interval of the mean 55 ci = NA; 56 if (length(rate) > 1) { 57 se = sqrt(var(rate)/length(rate)); 58 ci = se * qt(0.975, length(rate) - 1) 59 } 60 61 # calculate mean and 95 % confidence interval 62 r = list( 63 rate = mean(rate), 64 confidence.interval = ci 65 ); 66 67 return(data.frame(r)); 68}); 69 70print(stats, row.names=F); 71 72if (!is.null(plot.filename)) { 73 p = ggplot(stats, aes_string(x=x.axis.name, y='rate', colour=category.name)); 74 if (use.log2) { 75 p = p + scale_x_continuous(trans='log2'); 76 } 77 p = p + geom_errorbar( 78 aes(ymin=rate-confidence.interval, ymax=rate+confidence.interval), 79 width=.1, na.rm=TRUE 80 ); 81 p = p + geom_point(); 82 p = p + geom_line(); 83 p = p + ylab("rate of operations (higher is better)"); 84 p = p + ggtitle(dat[1, 1]); 85 ggsave(plot.filename, p); 86} 87