1# Copyright 2022 The Pigweed Authors 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); you may not 4# use this file except in compliance with the License. You may obtain a copy of 5# the License at 6# 7# https://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 11# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 12# License for the specific language governing permissions and limitations under 13# the License. 14""" 15The label module defines a class to store and manipulate size reports. 16""" 17 18from collections import defaultdict 19from dataclasses import dataclass 20from typing import Iterable, Dict, Sequence, Tuple, List, Optional 21import csv 22 23 24@dataclass 25class Label: 26 """Return type of DataSourceMap generator.""" 27 28 name: str 29 size: int 30 capacity: Optional[int] = None 31 exists_both: Optional[bool] = None 32 parents: Tuple[str, ...] = () 33 34 def is_new(self) -> bool: 35 return (not self.exists_both) and self.size > 0 36 37 def is_del(self) -> bool: 38 return (not self.exists_both) and self.size < 0 39 40 41@dataclass 42class LabelInfo: 43 size: int = 0 44 capacity: Optional[int] = None 45 exists_both: Optional[bool] = None 46 47 48class _LabelMap: 49 """Private module to hold parent and child labels with their size.""" 50 51 _label_map: Dict[str, Dict[str, LabelInfo]] 52 53 def __init__(self): 54 self._label_map = defaultdict(lambda: defaultdict(LabelInfo)) 55 56 def remove( 57 self, parent_label: str, child_label: Optional[str] = None 58 ) -> None: 59 """Delete entire parent label or the child label.""" 60 if child_label: 61 del self._label_map[parent_label][child_label] 62 else: 63 del self._label_map[parent_label] 64 65 def __getitem__(self, parent_label: str) -> Dict[str, LabelInfo]: 66 """Indexing LabelMap using '[]' operators by specifying a label.""" 67 return self._label_map[parent_label] 68 69 def __contains__(self, parent_label: str) -> bool: 70 return parent_label in self._label_map 71 72 def map_generator(self) -> Iterable[Tuple[str, Dict[str, LabelInfo]]]: 73 for parent_label, label_dict in self._label_map.items(): 74 yield parent_label, label_dict 75 76 77class _DataSource: 78 """Private module to store a data source name with a _LabelMap.""" 79 80 def __init__(self, name: str): 81 self._name = name 82 self._ds_label_map = _LabelMap() 83 84 def get_name(self) -> str: 85 return self._name 86 87 def add_label( 88 self, 89 parent_label: str, 90 child_label: str, 91 size: int, 92 diff_exist: Optional[bool] = None, 93 ) -> None: 94 curr_label_info = self._ds_label_map[parent_label][child_label] 95 curr_label_info.size += size 96 if curr_label_info.exists_both is None: 97 curr_label_info.exists_both = diff_exist 98 99 def __getitem__(self, parent_label: str) -> Dict[str, LabelInfo]: 100 return self._ds_label_map[parent_label] 101 102 def __contains__(self, parent_label: str) -> bool: 103 return parent_label in self._ds_label_map 104 105 def label_map_generator(self) -> Iterable[Tuple[str, Dict[str, LabelInfo]]]: 106 for parent_label, label_dict in self._ds_label_map.map_generator(): 107 yield parent_label, label_dict 108 109 110class DataSourceMap: 111 """Module to store an array of DataSources and capacities. 112 113 An organize way to store a hierachy of labels and their sizes. 114 Includes a capacity array to hold regex patterns for applying 115 capacities to matching label names. 116 117 """ 118 119 _BASE_TOTAL_LABEL = 'total' 120 121 @classmethod 122 def from_bloaty_tsv(cls, raw_tsv: Iterable[str]) -> 'DataSourceMap': 123 """Read in Bloaty TSV output and store in DataSourceMap.""" 124 reader = csv.reader(raw_tsv, delimiter='\t') 125 top_row = next(reader) 126 vmsize_index = top_row.index('vmsize') 127 ds_map_tsv = cls(top_row[:vmsize_index]) 128 for row in reader: 129 ds_map_tsv.insert_label_hierachy( 130 row[:vmsize_index], int(row[vmsize_index]) 131 ) 132 return ds_map_tsv 133 134 def __init__(self, data_sources_names: Iterable[str]): 135 self._data_sources = list( 136 _DataSource(name) for name in ['base', *data_sources_names] 137 ) 138 self._capacity_array: List[Tuple[str, int]] = [] 139 140 def label_exists( 141 self, ds_index: int, parent_label: str, child_label: str 142 ) -> bool: 143 return (parent_label in self._data_sources[ds_index]) and ( 144 child_label in self._data_sources[ds_index][parent_label] 145 ) 146 147 def insert_label_hierachy( 148 self, 149 label_hierarchy: Iterable[str], 150 size: int, 151 diff_exist: Optional[bool] = None, 152 ) -> None: 153 """Insert a hierachy of labels with its size.""" 154 155 # Insert initial '__base__' data source that holds the 156 # running total size. 157 self._data_sources[0].add_label( 158 '__base__', self._BASE_TOTAL_LABEL, size 159 ) 160 complete_label_hierachy = [self._BASE_TOTAL_LABEL, *label_hierarchy] 161 for index in range(len(complete_label_hierachy) - 1): 162 if complete_label_hierachy[index]: 163 self._data_sources[index + 1].add_label( 164 complete_label_hierachy[index], 165 complete_label_hierachy[index + 1], 166 size, 167 diff_exist, 168 ) 169 170 def add_capacity(self, regex_pattern: str, capacity: int) -> None: 171 """Insert regex pattern and capacity into dictionary.""" 172 self._capacity_array.append((regex_pattern, capacity)) 173 174 def diff(self, base: 'DataSourceMap') -> 'DiffDataSourceMap': 175 """Calculate the difference between 2 DataSourceMaps.""" 176 diff_dsm = DiffDataSourceMap(self.get_ds_names()) 177 curr_parent = self._BASE_TOTAL_LABEL 178 179 # Iterate through base labels at each datasource index. 180 last_data_source = len(base.get_ds_names()) - 1 181 parent_data_source_index = last_data_source + 1 182 for b_label in base.labels(last_data_source): 183 if last_data_source > 0: 184 curr_parent = b_label.parents[-1] 185 lb_hierachy_names = [*b_label.parents, b_label.name] 186 187 # Check if label exists in target binary DataSourceMap. 188 # Subtract base from target size and insert diff size 189 # into DiffDataSourceMap. 190 if self.label_exists( 191 parent_data_source_index, curr_parent, b_label.name 192 ): 193 diff_size = ( 194 self._data_sources[parent_data_source_index][curr_parent][ 195 b_label.name 196 ].size 197 ) - b_label.size 198 199 if diff_size: 200 diff_dsm.insert_label_hierachy( 201 lb_hierachy_names, diff_size, True 202 ) 203 else: 204 diff_dsm.insert_label_hierachy(lb_hierachy_names, 0, True) 205 206 # label is not present in target - insert with negative size 207 else: 208 diff_dsm.insert_label_hierachy( 209 lb_hierachy_names, -1 * b_label.size, False 210 ) 211 212 # Iterate through all of target labels 213 # to find labels new to target from base. 214 for t_label in self.labels(last_data_source): 215 if last_data_source > 0: 216 curr_parent = t_label.parents[-1] 217 218 # New addition to target 219 if not base.label_exists( 220 parent_data_source_index, curr_parent, t_label.name 221 ): 222 diff_dsm.insert_label_hierachy( 223 [*t_label.parents, f"{t_label.name}"], t_label.size, False 224 ) 225 226 return diff_dsm 227 228 def get_total_size(self) -> int: 229 return self._data_sources[0]['__base__'][self._BASE_TOTAL_LABEL].size 230 231 def get_ds_names(self) -> Tuple[str, ...]: 232 """List of DataSource names for easy indexing and reference.""" 233 return tuple( 234 data_source.get_name() for data_source in self._data_sources[1:] 235 ) 236 237 def labels(self, ds_index: Optional[int] = None) -> Iterable[Label]: 238 """Generator that yields a Label depending on specified data source. 239 240 Args: 241 ds_index: Integer index of target data source. 242 243 Returns: 244 Iterable Label objects. 245 """ 246 ds_index = len(self._data_sources) if ds_index is None else ds_index + 2 247 yield from self._per_data_source_generator( 248 tuple(), self._data_sources[1:ds_index] 249 ) 250 251 def _per_data_source_generator( 252 self, 253 parent_labels: Tuple[str, ...], 254 data_sources: Sequence[_DataSource], 255 ) -> Iterable[Label]: 256 """Recursive generator to return Label based off parent labels.""" 257 for ds_index, curr_ds in enumerate(data_sources): 258 for parent_label, label_map in curr_ds.label_map_generator(): 259 if not parent_labels: 260 curr_parent = self._BASE_TOTAL_LABEL 261 else: 262 curr_parent = parent_labels[-1] 263 if parent_label == curr_parent: 264 for child_label, label_info in label_map.items(): 265 if len(data_sources) == 1: 266 yield Label( 267 child_label, 268 label_info.size, 269 parents=parent_labels, 270 exists_both=label_info.exists_both, 271 ) 272 else: 273 yield from self._per_data_source_generator( 274 (*parent_labels, child_label), 275 data_sources[ds_index + 1 :], 276 ) 277 278 279class DiffDataSourceMap(DataSourceMap): 280 """DataSourceMap that holds diff information.""" 281 282 def has_diff_sublabels(self, top_ds_label: str) -> bool: 283 """Checks if first datasource is identical.""" 284 for label in self.labels(): 285 if label.size != 0: 286 if (label.parents and (label.parents[0] == top_ds_label)) or ( 287 label.name == top_ds_label 288 ): 289 return True 290 return False 291