1#!/usr/bin/env python 2# Copyright 2013 The Chromium Authors. All rights reserved. 3# Use of this source code is governed by a BSD-style license that can be 4# found in the LICENSE file. 5 6# A script to accumulate values from the 'dmprof cat' command into CSV or else. 7# 8# Usage: 9# ./accumulate.py -f <format> -t <template-name> < input.json > output 10# 11# <format> is one of "csv", "json", and "tree". If "csv" or "json" is given, 12# accumulate.py dumps a similar file to "dmprof csv|json". If "tree" is given, 13# accumulate.py dumps a human-readable breakdown tree. 14# 15# <template-name> is a label in templates.json. 16 17import datetime 18import json 19import logging 20import optparse 21import sys 22 23from lib.ordered_dict import OrderedDict 24 25 26LOGGER = logging.getLogger('dmprof-accumulate') 27 28 29def visit_in_template(template, snapshot, depth): 30 """Visits all categories via a given template. 31 32 This function is not used. It's a sample function to traverse a template. 33 """ 34 world = template[0] 35 breakdown = template[1] 36 rules = template[2] 37 38 for rule, _ in snapshot[world]['breakdown'][breakdown].iteritems(): 39 print (' ' * depth) + rule 40 if rule in rules: 41 visit_in_template(rules[rule], snapshot, depth + 1) 42 43 44def accumulate(template, snapshot, units_dict, target_units): 45 """Accumulates units in a JSON |snapshot| with applying a given |template|. 46 47 Args: 48 template: A template tree included in a dmprof cat JSON file. 49 snapshot: A snapshot in a dmprof cat JSON file. 50 units_dict: A dict of units in worlds. 51 target_units: A list of unit ids which are a target of this accumulation. 52 """ 53 world = template[0] 54 breakdown = template[1] 55 rules = template[2] 56 57 remainder_units = target_units.copy() 58 category_tree = OrderedDict() 59 total = 0 60 61 for rule, match in snapshot[world]['breakdown'][breakdown].iteritems(): 62 if 'hidden' in match and match['hidden']: 63 continue 64 matched_units = set(match['units']).intersection(target_units) 65 subtotal = 0 66 for unit_id in matched_units: 67 subtotal += units_dict[world][unit_id] 68 total += subtotal 69 remainder_units = remainder_units.difference(matched_units) 70 if rule not in rules: 71 # A category matched with |rule| is a leaf of the breakdown tree. 72 # It is NOT broken down more. 73 category_tree[rule] = subtotal 74 continue 75 76 # A category matched with |rule| is broken down more. 77 subtemplate = rules[rule] 78 subworld = subtemplate[0] 79 subbreakdown = subtemplate[1] 80 81 if subworld == world: 82 # Break down in the same world: consider units. 83 category_tree[rule], accounted_total, subremainder_units = accumulate( 84 subtemplate, snapshot, units_dict, matched_units) 85 subremainder_total = 0 86 if subremainder_units: 87 for unit_id in subremainder_units: 88 subremainder_total += units_dict[world][unit_id] 89 category_tree[rule][None] = subremainder_total 90 if subtotal != accounted_total + subremainder_total: 91 print >> sys.stderr, ( 92 'WARNING: Sum of %s:%s is different from %s by %d bytes.' % ( 93 subworld, subbreakdown, rule, 94 subtotal - (accounted_total + subremainder_total))) 95 else: 96 # Break down in a different world: consider only the total size. 97 category_tree[rule], accounted_total, _ = accumulate( 98 subtemplate, snapshot, units_dict, set(units_dict[subworld].keys())) 99 if subtotal >= accounted_total: 100 category_tree[rule][None] = subtotal - accounted_total 101 else: 102 print >> sys.stderr, ( 103 'WARNING: Sum of %s:%s is larger than %s by %d bytes.' % ( 104 subworld, subbreakdown, rule, accounted_total - subtotal)) 105 print >> sys.stderr, ( 106 'WARNING: Assuming remainder of %s is 0.' % rule) 107 category_tree[rule][None] = 0 108 109 return category_tree, total, remainder_units 110 111 112def flatten(category_tree, header=''): 113 """Flattens a category tree into a flat list.""" 114 result = [] 115 for rule, sub in category_tree.iteritems(): 116 if not rule: 117 rule = 'remaining' 118 if header: 119 flattened_rule = header + '>' + rule 120 else: 121 flattened_rule = rule 122 if isinstance(sub, dict) or isinstance(sub, OrderedDict): 123 result.extend(flatten(sub, flattened_rule)) 124 else: 125 result.append((flattened_rule, sub)) 126 return result 127 128 129def print_category_tree(category_tree, output, depth=0): 130 """Prints a category tree in a human-readable format.""" 131 for label in category_tree: 132 print >> output, (' ' * depth), 133 if (isinstance(category_tree[label], dict) or 134 isinstance(category_tree[label], OrderedDict)): 135 print >> output, '%s:' % label 136 print_category_tree(category_tree[label], output, depth + 1) 137 else: 138 print >> output, '%s: %d' % (label, category_tree[label]) 139 140 141def flatten_all_category_trees(category_trees): 142 flattened_labels = set() 143 flattened_table = [] 144 for category_tree in category_trees: 145 flattened = OrderedDict() 146 for label, subtotal in flatten(category_tree): 147 flattened_labels.add(label) 148 flattened[label] = subtotal 149 flattened_table.append(flattened) 150 return flattened_labels, flattened_table 151 152 153def output_csv(output, category_trees, data, first_time, output_exponent): 154 flattened_labels, flattened_table = flatten_all_category_trees(category_trees) 155 156 sorted_flattened_labels = sorted(flattened_labels) 157 print >> output, ','.join(['second'] + sorted_flattened_labels) 158 for index, row in enumerate(flattened_table): 159 values = [str(data['snapshots'][index]['time'] - first_time)] 160 for label in sorted_flattened_labels: 161 if label in row: 162 divisor = 1 163 if output_exponent.upper() == 'K': 164 divisor = 1024.0 165 elif output_exponent.upper() == 'M': 166 divisor = 1024.0 * 1024.0 167 values.append(str(row[label] / divisor)) 168 else: 169 values.append('0') 170 print >> output, ','.join(values) 171 172 173def output_json(output, category_trees, data, first_time, template_label): 174 flattened_labels, flattened_table = flatten_all_category_trees(category_trees) 175 176 json_snapshots = [] 177 for index, row in enumerate(flattened_table): 178 row_with_meta = row.copy() 179 row_with_meta['second'] = data['snapshots'][index]['time'] - first_time 180 row_with_meta['dump_time'] = datetime.datetime.fromtimestamp( 181 data['snapshots'][index]['time']).strftime('%Y-%m-%d %H:%M:%S') 182 json_snapshots.append(row_with_meta) 183 json_root = { 184 'version': 'JSON_DEEP_2', 185 'policies': { 186 template_label: { 187 'legends': sorted(flattened_labels), 188 'snapshots': json_snapshots 189 } 190 } 191 } 192 json.dump(json_root, output, indent=2, sort_keys=True) 193 194 195def output_tree(output, category_trees): 196 for index, category_tree in enumerate(category_trees): 197 print >> output, '< Snapshot #%d >' % index 198 print_category_tree(category_tree, output, 1) 199 print >> output, '' 200 201 202def do_main(cat_input, output, template_label, output_format, output_exponent): 203 """Does the main work: accumulate for every snapshot and print a result.""" 204 if output_format not in ['csv', 'json', 'tree']: 205 raise NotImplementedError('The output format \"%s\" is not implemented.' % 206 output_format) 207 208 if output_exponent.upper() not in ['B', 'K', 'M']: 209 raise NotImplementedError('The exponent \"%s\" is not implemented.' % 210 output_exponent) 211 212 data = json.loads(cat_input.read(), object_pairs_hook=OrderedDict) 213 214 templates = data['templates'] 215 if not template_label: 216 template_label = data['default_template'] 217 if template_label not in templates: 218 LOGGER.error('A template \'%s\' is not found.' % template_label) 219 return 220 template = templates[template_label] 221 222 category_trees = [] 223 first_time = None 224 225 for snapshot in data['snapshots']: 226 if not first_time: 227 first_time = snapshot['time'] 228 229 units = {} 230 for world_name in snapshot['worlds']: 231 world_units = {} 232 for unit_id, sizes in snapshot['worlds'][world_name]['units'].iteritems(): 233 world_units[int(unit_id)] = sizes[0] 234 units[world_name] = world_units 235 236 category_tree, _, _ = accumulate( 237 template, snapshot['worlds'], units, set(units[template[0]].keys())) 238 category_trees.append(category_tree) 239 240 if output_format == 'csv': 241 output_csv(output, category_trees, data, first_time, output_exponent) 242 elif output_format == 'json': 243 output_json(output, category_trees, data, first_time, template_label) 244 elif output_format == 'tree': 245 output_tree(output, category_trees) 246 247 248def main(): 249 LOGGER.setLevel(logging.DEBUG) 250 handler = logging.StreamHandler() 251 handler.setLevel(logging.INFO) 252 formatter = logging.Formatter('%(message)s') 253 handler.setFormatter(formatter) 254 LOGGER.addHandler(handler) 255 256 parser = optparse.OptionParser() 257 parser.add_option('-t', '--template', dest='template', 258 metavar='TEMPLATE', 259 help='Apply TEMPLATE to list up.') 260 parser.add_option('-f', '--format', dest='format', default='csv', 261 help='Specify the output format: csv, json or tree.') 262 parser.add_option('-e', '--exponent', dest='exponent', default='M', 263 help='Specify B (bytes), K (kilobytes) or M (megabytes).') 264 265 options, _ = parser.parse_args(sys.argv) 266 do_main(sys.stdin, sys.stdout, 267 options.template, options.format, options.exponent) 268 269 270if __name__ == '__main__': 271 sys.exit(main()) 272