• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright 2022 The Pigweed Authors
2#
3# Licensed under the Apache License, Version 2.0 (the "License"); you may not
4# use this file except in compliance with the License. You may obtain a copy of
5# the License at
6#
7#     https://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
11# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
12# License for the specific language governing permissions and limitations under
13# the License.
14"""
15The label module defines a class to store and manipulate size reports.
16"""
17
18from collections import defaultdict
19from dataclasses import dataclass
20from typing import Iterable, Dict, Sequence, Tuple, List, Optional
21import csv
22
23
24@dataclass
25class Label:
26    """Return type of DataSourceMap generator."""
27
28    name: str
29    size: int
30    capacity: Optional[int] = None
31    exists_both: Optional[bool] = None
32    parents: Tuple[str, ...] = ()
33
34    def is_new(self) -> bool:
35        return (not self.exists_both) and self.size > 0
36
37    def is_del(self) -> bool:
38        return (not self.exists_both) and self.size < 0
39
40
41@dataclass
42class LabelInfo:
43    size: int = 0
44    capacity: Optional[int] = None
45    exists_both: Optional[bool] = None
46
47
48class _LabelMap:
49    """Private module to hold parent and child labels with their size."""
50
51    _label_map: Dict[str, Dict[str, LabelInfo]]
52
53    def __init__(self):
54        self._label_map = defaultdict(lambda: defaultdict(LabelInfo))
55
56    def remove(
57        self, parent_label: str, child_label: Optional[str] = None
58    ) -> None:
59        """Delete entire parent label or the child label."""
60        if child_label:
61            del self._label_map[parent_label][child_label]
62        else:
63            del self._label_map[parent_label]
64
65    def __getitem__(self, parent_label: str) -> Dict[str, LabelInfo]:
66        """Indexing LabelMap using '[]' operators by specifying a label."""
67        return self._label_map[parent_label]
68
69    def __contains__(self, parent_label: str) -> bool:
70        return parent_label in self._label_map
71
72    def map_generator(self) -> Iterable[Tuple[str, Dict[str, LabelInfo]]]:
73        for parent_label, label_dict in self._label_map.items():
74            yield parent_label, label_dict
75
76
77class _DataSource:
78    """Private module to store a data source name with a _LabelMap."""
79
80    def __init__(self, name: str):
81        self._name = name
82        self._ds_label_map = _LabelMap()
83
84    def get_name(self) -> str:
85        return self._name
86
87    def add_label(
88        self,
89        parent_label: str,
90        child_label: str,
91        size: int,
92        diff_exist: Optional[bool] = None,
93    ) -> None:
94        curr_label_info = self._ds_label_map[parent_label][child_label]
95        curr_label_info.size += size
96        if curr_label_info.exists_both is None:
97            curr_label_info.exists_both = diff_exist
98
99    def __getitem__(self, parent_label: str) -> Dict[str, LabelInfo]:
100        return self._ds_label_map[parent_label]
101
102    def __contains__(self, parent_label: str) -> bool:
103        return parent_label in self._ds_label_map
104
105    def label_map_generator(self) -> Iterable[Tuple[str, Dict[str, LabelInfo]]]:
106        for parent_label, label_dict in self._ds_label_map.map_generator():
107            yield parent_label, label_dict
108
109
110class DataSourceMap:
111    """Module to store an array of DataSources and capacities.
112
113    An organize way to store a hierachy of labels and their sizes.
114    Includes a capacity array to hold regex patterns for applying
115    capacities to matching label names.
116
117    """
118
119    _BASE_TOTAL_LABEL = 'total'
120
121    @classmethod
122    def from_bloaty_tsv(cls, raw_tsv: Iterable[str]) -> 'DataSourceMap':
123        """Read in Bloaty TSV output and store in DataSourceMap."""
124        reader = csv.reader(raw_tsv, delimiter='\t')
125        top_row = next(reader)
126        vmsize_index = top_row.index('vmsize')
127        ds_map_tsv = cls(top_row[:vmsize_index])
128        for row in reader:
129            ds_map_tsv.insert_label_hierachy(
130                row[:vmsize_index], int(row[vmsize_index])
131            )
132        return ds_map_tsv
133
134    def __init__(self, data_sources_names: Iterable[str]):
135        self._data_sources = list(
136            _DataSource(name) for name in ['base', *data_sources_names]
137        )
138        self._capacity_array: List[Tuple[str, int]] = []
139
140    def label_exists(
141        self, ds_index: int, parent_label: str, child_label: str
142    ) -> bool:
143        return (parent_label in self._data_sources[ds_index]) and (
144            child_label in self._data_sources[ds_index][parent_label]
145        )
146
147    def insert_label_hierachy(
148        self,
149        label_hierarchy: Iterable[str],
150        size: int,
151        diff_exist: Optional[bool] = None,
152    ) -> None:
153        """Insert a hierachy of labels with its size."""
154
155        # Insert initial '__base__' data source that holds the
156        # running total size.
157        self._data_sources[0].add_label(
158            '__base__', self._BASE_TOTAL_LABEL, size
159        )
160        complete_label_hierachy = [self._BASE_TOTAL_LABEL, *label_hierarchy]
161        for index in range(len(complete_label_hierachy) - 1):
162            if complete_label_hierachy[index]:
163                self._data_sources[index + 1].add_label(
164                    complete_label_hierachy[index],
165                    complete_label_hierachy[index + 1],
166                    size,
167                    diff_exist,
168                )
169
170    def add_capacity(self, regex_pattern: str, capacity: int) -> None:
171        """Insert regex pattern and capacity into dictionary."""
172        self._capacity_array.append((regex_pattern, capacity))
173
174    def diff(self, base: 'DataSourceMap') -> 'DiffDataSourceMap':
175        """Calculate the difference between 2 DataSourceMaps."""
176        diff_dsm = DiffDataSourceMap(self.get_ds_names())
177        curr_parent = self._BASE_TOTAL_LABEL
178
179        # Iterate through base labels at each datasource index.
180        last_data_source = len(base.get_ds_names()) - 1
181        parent_data_source_index = last_data_source + 1
182        for b_label in base.labels(last_data_source):
183            if last_data_source > 0:
184                curr_parent = b_label.parents[-1]
185            lb_hierachy_names = [*b_label.parents, b_label.name]
186
187            # Check if label exists in target binary DataSourceMap.
188            # Subtract base from target size and insert diff size
189            # into DiffDataSourceMap.
190            if self.label_exists(
191                parent_data_source_index, curr_parent, b_label.name
192            ):
193                diff_size = (
194                    self._data_sources[parent_data_source_index][curr_parent][
195                        b_label.name
196                    ].size
197                ) - b_label.size
198
199                if diff_size:
200                    diff_dsm.insert_label_hierachy(
201                        lb_hierachy_names, diff_size, True
202                    )
203                else:
204                    diff_dsm.insert_label_hierachy(lb_hierachy_names, 0, True)
205
206            # label is not present in target - insert with negative size
207            else:
208                diff_dsm.insert_label_hierachy(
209                    lb_hierachy_names, -1 * b_label.size, False
210                )
211
212        # Iterate through all of target labels
213        # to find labels new to target from base.
214        for t_label in self.labels(last_data_source):
215            if last_data_source > 0:
216                curr_parent = t_label.parents[-1]
217
218            # New addition to target
219            if not base.label_exists(
220                parent_data_source_index, curr_parent, t_label.name
221            ):
222                diff_dsm.insert_label_hierachy(
223                    [*t_label.parents, f"{t_label.name}"], t_label.size, False
224                )
225
226        return diff_dsm
227
228    def get_total_size(self) -> int:
229        return self._data_sources[0]['__base__'][self._BASE_TOTAL_LABEL].size
230
231    def get_ds_names(self) -> Tuple[str, ...]:
232        """List of DataSource names for easy indexing and reference."""
233        return tuple(
234            data_source.get_name() for data_source in self._data_sources[1:]
235        )
236
237    def labels(self, ds_index: Optional[int] = None) -> Iterable[Label]:
238        """Generator that yields a Label depending on specified data source.
239
240        Args:
241            ds_index: Integer index of target data source.
242
243        Returns:
244            Iterable Label objects.
245        """
246        ds_index = len(self._data_sources) if ds_index is None else ds_index + 2
247        yield from self._per_data_source_generator(
248            tuple(), self._data_sources[1:ds_index]
249        )
250
251    def _per_data_source_generator(
252        self,
253        parent_labels: Tuple[str, ...],
254        data_sources: Sequence[_DataSource],
255    ) -> Iterable[Label]:
256        """Recursive generator to return Label based off parent labels."""
257        for ds_index, curr_ds in enumerate(data_sources):
258            for parent_label, label_map in curr_ds.label_map_generator():
259                if not parent_labels:
260                    curr_parent = self._BASE_TOTAL_LABEL
261                else:
262                    curr_parent = parent_labels[-1]
263                if parent_label == curr_parent:
264                    for child_label, label_info in label_map.items():
265                        if len(data_sources) == 1:
266                            yield Label(
267                                child_label,
268                                label_info.size,
269                                parents=parent_labels,
270                                exists_both=label_info.exists_both,
271                            )
272                        else:
273                            yield from self._per_data_source_generator(
274                                (*parent_labels, child_label),
275                                data_sources[ds_index + 1 :],
276                            )
277
278
279class DiffDataSourceMap(DataSourceMap):
280    """DataSourceMap that holds diff information."""
281
282    def has_diff_sublabels(self, top_ds_label: str) -> bool:
283        """Checks if first datasource is identical."""
284        for label in self.labels():
285            if label.size != 0:
286                if (label.parents and (label.parents[0] == top_ds_label)) or (
287                    label.name == top_ds_label
288                ):
289                    return True
290        return False
291