• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright 2022 The Pigweed Authors
2#
3# Licensed under the Apache License, Version 2.0 (the "License"); you may not
4# use this file except in compliance with the License. You may obtain a copy of
5# the License at
6#
7#     https://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
11# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
12# License for the specific language governing permissions and limitations under
13# the License.
14"""Generates a useful bloaty config file containing new data sources."""
15
16import argparse
17import logging
18import re
19import sys
20from typing import BinaryIO, Dict, List, Optional, TextIO
21
22import pw_cli.argument_types
23from elftools.elf import elffile  # type: ignore
24
25_LOG = logging.getLogger('bloaty_config')
26
27# 'pw_bloat_config_memory_region_NAME_{start,end}{_N,}' where _N defaults to 0.
28_MEMORY_REGION_SYMBOL_RE = re.compile(
29    r'pw_bloat_config_memory_region_' +
30    r'(?P<name>\w+)_(?P<limit>(start|end))(_(?P<index>\d+))?')
31
32
33def _parse_args() -> argparse.Namespace:
34    """Return a CLI argument parser for this module."""
35    parser = argparse.ArgumentParser(
36        description='Generates useful bloaty configurations entries',
37        epilog='Hint: try this:\n'
38        '   python -m pw_bloat.bloaty_config my_app.elf -o my_app.bloat')
39    parser.add_argument('elf_file', type=argparse.FileType('rb'))
40    parser.add_argument('--output',
41                        '-o',
42                        type=argparse.FileType('w'),
43                        help='The generated bloaty configuration',
44                        default=sys.stdout)
45    parser.add_argument(
46        '--utilization',
47        action=argparse.BooleanOptionalAction,
48        default=True,
49        help=(
50            'Generate the utilization custom_data_source based on sections ' +
51            'with "unused_space" in anywhere in their name'))
52    parser.add_argument(
53        '--memoryregions',
54        action=argparse.BooleanOptionalAction,
55        default=True,
56        help=('Generate the memoryregions custom_data_source based on ' +
57              'symbols defined in the linker script matching the following ' +
58              'pattern: ' +
59              '"pw::bloat::config::memory_region::NAME[0].{start,end}"'))
60    parser.add_argument('-l',
61                        '--loglevel',
62                        type=pw_cli.argument_types.log_level,
63                        default=logging.INFO,
64                        help='Set the log level'
65                        '(debug, info, warning, error, critical)')
66    return parser.parse_args()
67
68
69def _parse_memory_regions(parsed_elf_file: elffile.ELFFile) -> Optional[Dict]:
70    """
71    Search for the special pw::bloat::config symbols in the ELF binary.
72
73    This produces a dictionary which looks like:
74      {
75        MEMORY_REGION_NAME_0:{
76          0:(VM_START_ADDRESS, VM_END_ADDRESS)
77          ...
78          N:(VM_START_ADDRESS, VM_END_ADDRESS)
79        }
80        ...
81        MEMORY_REGION_NAME_M:{
82          0:(VM_START_ADDRESS, VM_END_ADDRESS)
83          ...
84          K:(VM_START_ADDRESS, VM_END_ADDRESS)
85        }
86      }
87    """
88    symtab_section = parsed_elf_file.get_section_by_name('.symtab')
89    assert symtab_section
90
91    # Produces an initial dictionary which looks like:
92    #  {
93    #    MEMORY_REGION_NAME_0:{
94    #      0:{ 'start':vm_start_address, 'end':vm_end_address }
95    #      ...
96    #      N:{ 'start':vm_start_address, 'end':vm_end_address }
97    #    }
98    #    ...
99    #    MEMORY_REGION_NAME_M:{
100    #      0:{ 'start':vm_start_address, 'end':vm_end_address }
101    #      ...
102    #      K:{ 'start':vm_start_address, 'end':vm_end_address }
103    #    }
104    #  }
105    memory_regions: Dict = {}
106    for symbol in symtab_section.iter_symbols():
107        match = _MEMORY_REGION_SYMBOL_RE.match(symbol.name)
108        if not match:
109            continue
110
111        name = match.group('name')
112        limit = match.group('limit')
113        if match.group('index'):
114            index = int(match.group('index'))
115        else:
116            index = 0
117        if name not in memory_regions:
118            memory_regions[name] = {}
119        memory_region = memory_regions[name]
120        if index not in memory_region:
121            memory_region[index] = {}
122        memory_region_segment = memory_region[index]
123        memory_region_segment[limit] = symbol.entry.st_value
124
125    # If the user did not provide a single pw::bloat::config symbol in the ELF
126    # binary then bail out and do nothing.
127    if not memory_regions:
128        _LOG.info('No valid pw::bloat::config::memory_region::* symbols found')
129        return None
130
131    # Ensure all memory regions' ranges have an end and start.
132    missing_range_limits = False
133    for region_name, ranges in memory_regions.items():
134        for index, limits in ranges.items():
135            if 'start' not in limits:
136                missing_range_limits = True
137                _LOG.error('%s[%d] is missing the start address', region_name,
138                           index)
139            if 'end' not in limits:
140                missing_range_limits = True
141                _LOG.error('%s[%d] is missing the end address', region_name,
142                           index)
143    if missing_range_limits:
144        _LOG.error('Invalid memory regions detected: missing ranges')
145        return None
146
147    # Translate the initial memory_regions dictionary to the tupled return
148    # format, i.e. (start, end) values in the nested dictionary.
149    tupled_memory_regions: Dict = {}
150    for region_name, ranges in memory_regions.items():
151        if region_name not in tupled_memory_regions:
152            tupled_memory_regions[region_name] = {}
153        for index, limits in ranges.items():
154            tupled_memory_regions[region_name][index] = (limits['start'],
155                                                         limits['end'])
156
157    # Ensure the memory regions do not overlap.
158    if _memory_regions_overlap(tupled_memory_regions):
159        _LOG.error('Invalid memory regions detected: overlaps detected')
160        return None
161
162    return tupled_memory_regions
163
164
165def _parse_segments(parsed_elf_file: elffile.ELFFile) -> Dict:
166    """
167    Report all of the segment information from the ELF binary.
168
169    Iterates over all of the segments in the ELF file's program header and
170    reports where they reside in virtual memory through a dictionary which
171    looks like:
172      {
173        0:(start_vmaddr,end_vmaddr),
174        ...
175        N:(start_vmaddr,end_vmaddr),
176      }
177    """
178    segments = {}
179    for i in range(parsed_elf_file.num_segments()):
180        segment = parsed_elf_file.get_segment(i)
181        start_vmaddr = segment['p_vaddr']
182        memory_size = segment['p_memsz']
183        if memory_size == 0:
184            continue  # Not a loaded segment which resides in virtual memory.
185        end_vmaddr = start_vmaddr + memory_size
186        segments[i] = (start_vmaddr, end_vmaddr)
187    return segments
188
189
190def _memory_regions_overlap(memory_regions: Dict) -> bool:
191    """Returns where any memory regions overlap each other."""
192    overlaps_detected = False
193    for current_name, current_ranges in memory_regions.items():
194        for current_index, (current_start,
195                            current_end) in current_ranges.items():
196            for other_name, other_ranges in memory_regions.items():
197                for other_index, (other_start,
198                                  other_end) in other_ranges.items():
199                    if (current_name == other_name
200                            and current_index == other_index):
201                        continue  # Skip yourself.
202                    # Check if the other region end is within this region.
203                    other_end_overlaps = (current_start < other_end <=
204                                          current_end)
205                    other_start_overlaps = (current_start <= other_start <
206                                            current_end)
207                    if other_end_overlaps or other_start_overlaps:
208                        overlaps_detected = True
209                        _LOG.error(f'error: {current_name}[{current_index}] ' +
210                                   f'[{hex(current_start)},' +
211                                   f'{hex(current_end)}] overlaps with ' +
212                                   f'{other_name}[{other_index}] '
213                                   f'[{hex(other_start)},' +
214                                   f'{hex(other_end)}] overlaps with ')
215    return overlaps_detected
216
217
218def _get_segments_to_memory_region_map(elf_file: BinaryIO) -> Optional[Dict]:
219    """
220    Processes an ELF file to look up what memory regions segments are in.
221
222    Returns the result from map_segments_to_memory_regions if valid memory
223    regions were parsed out of the ELF file.
224    """
225    parsed_elf_file = elffile.ELFFile(elf_file)
226
227    memory_regions = _parse_memory_regions(parsed_elf_file)
228    if not memory_regions:
229        return None
230
231    segments = _parse_segments(parsed_elf_file)
232
233    return map_segments_to_memory_regions(segments=segments,
234                                          memory_regions=memory_regions)
235
236
237def map_segments_to_memory_regions(segments: Dict,
238                                   memory_regions: Dict) -> Dict:
239    """
240    Maps segments to the virtual memory regions they reside in.
241
242    This takes in the results from _parse_memory_regions and _parse_segments and
243    produces a dictionary which looks like:
244    {
245      SEGMENT_INDEX_0:'MEMORY_REGION_NAME_0',
246      SEGMENT_INDEX_1:'MEMORY_REGION_NAME_0',
247      ...
248      SEGMENT_INDEX_N:'MEMORY_REGION_NAME_M',
249    }
250    """
251
252    # Now for each segment, determine what memory region it belongs to
253    # and generate a bloaty config output for it.
254    segment_to_memory_region = {}
255    for segment, (segment_start, segment_end) in segments.items():
256        # Note this is the final filter bloaty rewrite pattern format.
257        for memory_region_name, memory_region_info in memory_regions.items():
258            for _, (subregion_start,
259                    subregion_end) in memory_region_info.items():
260                if (segment_start >= subregion_start
261                        and segment_end <= subregion_end):
262                    # We found the subregion the segment resides in.
263                    segment_to_memory_region[segment] = memory_region_name
264        if segment not in segment_to_memory_region:
265            _LOG.error(
266                f'Error: Failed to find memory region for LOAD #{segment} ' +
267                f'[{hex(segment_start)},{hex(segment_end)}]')
268    return segment_to_memory_region
269
270
271def generate_memoryregions_data_source(segment_to_memory_region: Dict) -> str:
272    output: List[str] = []
273    output.append('custom_data_source: {')
274    output.append('  name: "memoryregions"')
275    output.append('  base_data_source: "segments"')
276    for segment_index, memory_region in segment_to_memory_region.items():
277        output.append('  rewrite: {')
278        segment_filter = r'^LOAD ' + f'#{segment_index}' + r' \\[.*\\]$'
279        output.append(f'    pattern:"{segment_filter}"')
280        output.append(f'    replacement:"{memory_region}"')
281        output.append('  }')
282    output.append('  rewrite: {')
283    output.append('    pattern:".*"')
284    output.append('    replacement:"Not resident in memory"')
285    output.append('  }')
286    output.append('}')
287    return '\n'.join(output) + '\n'
288
289
290def generate_utilization_data_source() -> str:
291    output: List[str] = []
292    output.append('custom_data_source: {')
293    output.append('  name:"utilization"')
294    output.append('  base_data_source:"sections"')
295    output.append('  rewrite: {')
296    output.append('    pattern:"unused_space"')
297    output.append('    replacement:"Free space"')
298    output.append('  }')
299    output.append('  rewrite: {')
300    output.append('    pattern:".*"')
301    output.append('    replacement:"Used space"')
302    output.append('  }')
303    output.append('}')
304    return '\n'.join(output) + '\n'
305
306
307def generate_bloaty_config(elf_file: BinaryIO, enable_memoryregions: bool,
308                           enable_utilization: bool, out_file: TextIO) -> None:
309    if enable_memoryregions:
310        # Enable the "memoryregions" data_source if the user provided the
311        # required pw_bloat specific symbols in their linker script.
312        segment_to_memory_region = _get_segments_to_memory_region_map(elf_file)
313        if not segment_to_memory_region:
314            _LOG.info('memoryregions data_source is not provided')
315        else:
316            _LOG.info('memoryregions data_source is provided')
317            out_file.write(
318                generate_memoryregions_data_source(segment_to_memory_region))
319
320    if enable_utilization:
321        _LOG.info('utilization data_source is provided')
322        out_file.write(generate_utilization_data_source())
323
324
325def main() -> int:
326    """Generates a useful bloaty config file containing new data sources."""
327    args = _parse_args()
328
329    logging.basicConfig(format='%(message)s', level=args.loglevel)
330
331    generate_bloaty_config(elf_file=args.elf_file,
332                           enable_memoryregions=args.memoryregions,
333                           enable_utilization=args.utilization,
334                           out_file=args.output)
335    return 0
336
337
338if __name__ == "__main__":
339    sys.exit(main())
340