1# 2# Copyright (C) 2013 The Android Open Source Project 3# 4# Licensed under the Apache License, Version 2.0 (the "License"); 5# you may not use this file except in compliance with the License. 6# You may obtain a copy of the License at 7# 8# http://www.apache.org/licenses/LICENSE-2.0 9# 10# Unless required by applicable law or agreed to in writing, software 11# distributed under the License is distributed on an "AS IS" BASIS, 12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13# See the License for the specific language governing permissions and 14# limitations under the License. 15# 16 17"""Histogram generation tools.""" 18 19from collections import defaultdict 20 21from update_payload import format_utils 22 23 24class Histogram(object): 25 """A histogram generating object. 26 27 This object serves the sole purpose of formatting (key, val) pairs as an 28 ASCII histogram, including bars and percentage markers, and taking care of 29 label alignment, scaling, etc. In addition to the standard __init__ 30 interface, two static methods are provided for conveniently converting data 31 in different formats into a histogram. Histogram generation is exported via 32 its __str__ method, and looks as follows: 33 34 Yes |################ | 5 (83.3%) 35 No |### | 1 (16.6%) 36 37 TODO(garnold) we may want to add actual methods for adding data or tweaking 38 the output layout and formatting. For now, though, this is fine. 39 40 """ 41 42 def __init__(self, data, scale=20, formatter=None): 43 """Initialize a histogram object. 44 45 Args: 46 data: list of (key, count) pairs constituting the histogram 47 scale: number of characters used to indicate 100% 48 formatter: function used for formatting raw histogram values 49 50 """ 51 self.data = data 52 self.scale = scale 53 self.formatter = formatter or str 54 self.max_key_len = max([len(str(key)) for key, count in self.data]) 55 self.total = sum([count for key, count in self.data]) 56 57 @staticmethod 58 def FromCountDict(count_dict, scale=20, formatter=None, key_names=None): 59 """Takes a dictionary of counts and returns a histogram object. 60 61 This simply converts a mapping from names to counts into a list of (key, 62 count) pairs, optionally translating keys into name strings, then 63 generating and returning a histogram for them. This is a useful convenience 64 call for clients that update a dictionary of counters as they (say) scan a 65 data stream. 66 67 Args: 68 count_dict: dictionary mapping keys to occurrence counts 69 scale: number of characters used to indicate 100% 70 formatter: function used for formatting raw histogram values 71 key_names: dictionary mapping keys to name strings 72 Returns: 73 A histogram object based on the given data. 74 75 """ 76 namer = None 77 if key_names: 78 namer = lambda key: key_names[key] 79 else: 80 namer = lambda key: key 81 82 hist = [(namer(key), count) for key, count in count_dict.items()] 83 return Histogram(hist, scale, formatter) 84 85 @staticmethod 86 def FromKeyList(key_list, scale=20, formatter=None, key_names=None): 87 """Takes a list of (possibly recurring) keys and returns a histogram object. 88 89 This converts the list into a dictionary of counters, then uses 90 FromCountDict() to generate the actual histogram. For example: 91 92 ['a', 'a', 'b', 'a', 'b'] --> {'a': 3, 'b': 2} --> ... 93 94 Args: 95 key_list: list of (possibly recurring) keys 96 scale: number of characters used to indicate 100% 97 formatter: function used for formatting raw histogram values 98 key_names: dictionary mapping keys to name strings 99 Returns: 100 A histogram object based on the given data. 101 102 """ 103 count_dict = defaultdict(int) # Unset items default to zero 104 for key in key_list: 105 count_dict[key] += 1 106 return Histogram.FromCountDict(count_dict, scale, formatter, key_names) 107 108 def __str__(self): 109 hist_lines = [] 110 hist_bar = '|' 111 for key, count in self.data: 112 if self.total: 113 bar_len = count * self.scale / self.total 114 hist_bar = '|%s|' % ('#' * bar_len).ljust(self.scale) 115 116 line = '%s %s %s' % ( 117 str(key).ljust(self.max_key_len), 118 hist_bar, 119 self.formatter(count)) 120 percent_str = format_utils.NumToPercent(count, self.total) 121 if percent_str: 122 line += ' (%s)' % percent_str 123 hist_lines.append(line) 124 125 return '\n'.join(hist_lines) 126 127 def GetKeys(self): 128 """Returns the keys of the histogram.""" 129 return [key for key, _ in self.data] 130