1#!/usr/bin/env python 2# Copyright (c) 2006,2007 Mitch Garnaat http://garnaat.org/ 3# 4# Permission is hereby granted, free of charge, to any person obtaining a 5# copy of this software and associated documentation files (the 6# "Software"), to deal in the Software without restriction, including 7# without limitation the rights to use, copy, modify, merge, publish, dis- 8# tribute, sublicense, and/or sell copies of the Software, and to permit 9# persons to whom the Software is furnished to do so, subject to the fol- 10# lowing conditions: 11# 12# The above copyright notice and this permission notice shall be included 13# in all copies or substantial portions of the Software. 14# 15# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 16# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL- 17# ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT 18# SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 19# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21# IN THE SOFTWARE. 22import os 23from datetime import datetime, timedelta 24from boto.utils import parse_ts 25import boto 26 27class ResultProcessor(object): 28 29 LogFileName = 'log.csv' 30 31 def __init__(self, batch_name, sd, mimetype_files=None): 32 self.sd = sd 33 self.batch = batch_name 34 self.log_fp = None 35 self.num_files = 0 36 self.total_time = 0 37 self.min_time = timedelta.max 38 self.max_time = timedelta.min 39 self.earliest_time = datetime.max 40 self.latest_time = datetime.min 41 self.queue = self.sd.get_obj('output_queue') 42 self.domain = self.sd.get_obj('output_domain') 43 44 def calculate_stats(self, msg): 45 start_time = parse_ts(msg['Service-Read']) 46 end_time = parse_ts(msg['Service-Write']) 47 elapsed_time = end_time - start_time 48 if elapsed_time > self.max_time: 49 self.max_time = elapsed_time 50 if elapsed_time < self.min_time: 51 self.min_time = elapsed_time 52 self.total_time += elapsed_time.seconds 53 if start_time < self.earliest_time: 54 self.earliest_time = start_time 55 if end_time > self.latest_time: 56 self.latest_time = end_time 57 58 def log_message(self, msg, path): 59 keys = sorted(msg.keys()) 60 if not self.log_fp: 61 self.log_fp = open(os.path.join(path, self.LogFileName), 'a') 62 line = ','.join(keys) 63 self.log_fp.write(line+'\n') 64 values = [] 65 for key in keys: 66 value = msg[key] 67 if value.find(',') > 0: 68 value = '"%s"' % value 69 values.append(value) 70 line = ','.join(values) 71 self.log_fp.write(line+'\n') 72 73 def process_record(self, record, path, get_file=True): 74 self.log_message(record, path) 75 self.calculate_stats(record) 76 outputs = record['OutputKey'].split(',') 77 if 'OutputBucket' in record: 78 bucket = boto.lookup('s3', record['OutputBucket']) 79 else: 80 bucket = boto.lookup('s3', record['Bucket']) 81 for output in outputs: 82 if get_file: 83 key_name = output.split(';')[0] 84 key = bucket.lookup(key_name) 85 file_name = os.path.join(path, key_name) 86 print('retrieving file: %s to %s' % (key_name, file_name)) 87 key.get_contents_to_filename(file_name) 88 self.num_files += 1 89 90 def get_results_from_queue(self, path, get_file=True, delete_msg=True): 91 m = self.queue.read() 92 while m: 93 if 'Batch' in m and m['Batch'] == self.batch: 94 self.process_record(m, path, get_file) 95 if delete_msg: 96 self.queue.delete_message(m) 97 m = self.queue.read() 98 99 def get_results_from_domain(self, path, get_file=True): 100 rs = self.domain.query("['Batch'='%s']" % self.batch) 101 for item in rs: 102 self.process_record(item, path, get_file) 103 104 def get_results_from_bucket(self, path): 105 bucket = self.sd.get_obj('output_bucket') 106 if bucket: 107 print('No output queue or domain, just retrieving files from output_bucket') 108 for key in bucket: 109 file_name = os.path.join(path, key) 110 print('retrieving file: %s to %s' % (key, file_name)) 111 key.get_contents_to_filename(file_name) 112 self.num_files + 1 113 114 def get_results(self, path, get_file=True, delete_msg=True): 115 if not os.path.isdir(path): 116 os.mkdir(path) 117 if self.queue: 118 self.get_results_from_queue(path, get_file) 119 elif self.domain: 120 self.get_results_from_domain(path, get_file) 121 else: 122 self.get_results_from_bucket(path) 123 if self.log_fp: 124 self.log_fp.close() 125 print('%d results successfully retrieved.' % self.num_files) 126 if self.num_files > 0: 127 self.avg_time = float(self.total_time)/self.num_files 128 print('Minimum Processing Time: %d' % self.min_time.seconds) 129 print('Maximum Processing Time: %d' % self.max_time.seconds) 130 print('Average Processing Time: %f' % self.avg_time) 131 self.elapsed_time = self.latest_time-self.earliest_time 132 print('Elapsed Time: %d' % self.elapsed_time.seconds) 133 tput = 1.0 / ((self.elapsed_time.seconds/60.0) / self.num_files) 134 print('Throughput: %f transactions / minute' % tput) 135 136