1# Copyright 2022 The Pigweed Authors 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); you may not 4# use this file except in compliance with the License. You may obtain a copy of 5# the License at 6# 7# https://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 11# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 12# License for the specific language governing permissions and limitations under 13# the License. 14"""Generates a useful bloaty config file containing new data sources.""" 15 16import argparse 17import logging 18import re 19import sys 20from typing import BinaryIO, Dict, List, Optional, TextIO 21 22import pw_cli.argument_types 23from elftools.elf import elffile # type: ignore 24 25_LOG = logging.getLogger('bloaty_config') 26 27# 'pw_bloat_config_memory_region_NAME_{start,end}{_N,}' where _N defaults to 0. 28_MEMORY_REGION_SYMBOL_RE = re.compile( 29 r'pw_bloat_config_memory_region_' + 30 r'(?P<name>\w+)_(?P<limit>(start|end))(_(?P<index>\d+))?') 31 32 33def _parse_args() -> argparse.Namespace: 34 """Return a CLI argument parser for this module.""" 35 parser = argparse.ArgumentParser( 36 description='Generates useful bloaty configurations entries', 37 epilog='Hint: try this:\n' 38 ' python -m pw_bloat.bloaty_config my_app.elf -o my_app.bloat') 39 parser.add_argument('elf_file', type=argparse.FileType('rb')) 40 parser.add_argument('--output', 41 '-o', 42 type=argparse.FileType('w'), 43 help='The generated bloaty configuration', 44 default=sys.stdout) 45 parser.add_argument( 46 '--utilization', 47 action=argparse.BooleanOptionalAction, 48 default=True, 49 help=( 50 'Generate the utilization custom_data_source based on sections ' + 51 'with "unused_space" in anywhere in their name')) 52 parser.add_argument( 53 '--memoryregions', 54 action=argparse.BooleanOptionalAction, 55 default=True, 56 help=('Generate the memoryregions custom_data_source based on ' + 57 'symbols defined in the linker script matching the following ' + 58 'pattern: ' + 59 '"pw::bloat::config::memory_region::NAME[0].{start,end}"')) 60 parser.add_argument('-l', 61 '--loglevel', 62 type=pw_cli.argument_types.log_level, 63 default=logging.INFO, 64 help='Set the log level' 65 '(debug, info, warning, error, critical)') 66 return parser.parse_args() 67 68 69def _parse_memory_regions(parsed_elf_file: elffile.ELFFile) -> Optional[Dict]: 70 """ 71 Search for the special pw::bloat::config symbols in the ELF binary. 72 73 This produces a dictionary which looks like: 74 { 75 MEMORY_REGION_NAME_0:{ 76 0:(VM_START_ADDRESS, VM_END_ADDRESS) 77 ... 78 N:(VM_START_ADDRESS, VM_END_ADDRESS) 79 } 80 ... 81 MEMORY_REGION_NAME_M:{ 82 0:(VM_START_ADDRESS, VM_END_ADDRESS) 83 ... 84 K:(VM_START_ADDRESS, VM_END_ADDRESS) 85 } 86 } 87 """ 88 symtab_section = parsed_elf_file.get_section_by_name('.symtab') 89 assert symtab_section 90 91 # Produces an initial dictionary which looks like: 92 # { 93 # MEMORY_REGION_NAME_0:{ 94 # 0:{ 'start':vm_start_address, 'end':vm_end_address } 95 # ... 96 # N:{ 'start':vm_start_address, 'end':vm_end_address } 97 # } 98 # ... 99 # MEMORY_REGION_NAME_M:{ 100 # 0:{ 'start':vm_start_address, 'end':vm_end_address } 101 # ... 102 # K:{ 'start':vm_start_address, 'end':vm_end_address } 103 # } 104 # } 105 memory_regions: Dict = {} 106 for symbol in symtab_section.iter_symbols(): 107 match = _MEMORY_REGION_SYMBOL_RE.match(symbol.name) 108 if not match: 109 continue 110 111 name = match.group('name') 112 limit = match.group('limit') 113 if match.group('index'): 114 index = int(match.group('index')) 115 else: 116 index = 0 117 if name not in memory_regions: 118 memory_regions[name] = {} 119 memory_region = memory_regions[name] 120 if index not in memory_region: 121 memory_region[index] = {} 122 memory_region_segment = memory_region[index] 123 memory_region_segment[limit] = symbol.entry.st_value 124 125 # If the user did not provide a single pw::bloat::config symbol in the ELF 126 # binary then bail out and do nothing. 127 if not memory_regions: 128 _LOG.info('No valid pw::bloat::config::memory_region::* symbols found') 129 return None 130 131 # Ensure all memory regions' ranges have an end and start. 132 missing_range_limits = False 133 for region_name, ranges in memory_regions.items(): 134 for index, limits in ranges.items(): 135 if 'start' not in limits: 136 missing_range_limits = True 137 _LOG.error('%s[%d] is missing the start address', region_name, 138 index) 139 if 'end' not in limits: 140 missing_range_limits = True 141 _LOG.error('%s[%d] is missing the end address', region_name, 142 index) 143 if missing_range_limits: 144 _LOG.error('Invalid memory regions detected: missing ranges') 145 return None 146 147 # Translate the initial memory_regions dictionary to the tupled return 148 # format, i.e. (start, end) values in the nested dictionary. 149 tupled_memory_regions: Dict = {} 150 for region_name, ranges in memory_regions.items(): 151 if region_name not in tupled_memory_regions: 152 tupled_memory_regions[region_name] = {} 153 for index, limits in ranges.items(): 154 tupled_memory_regions[region_name][index] = (limits['start'], 155 limits['end']) 156 157 # Ensure the memory regions do not overlap. 158 if _memory_regions_overlap(tupled_memory_regions): 159 _LOG.error('Invalid memory regions detected: overlaps detected') 160 return None 161 162 return tupled_memory_regions 163 164 165def _parse_segments(parsed_elf_file: elffile.ELFFile) -> Dict: 166 """ 167 Report all of the segment information from the ELF binary. 168 169 Iterates over all of the segments in the ELF file's program header and 170 reports where they reside in virtual memory through a dictionary which 171 looks like: 172 { 173 0:(start_vmaddr,end_vmaddr), 174 ... 175 N:(start_vmaddr,end_vmaddr), 176 } 177 """ 178 segments = {} 179 for i in range(parsed_elf_file.num_segments()): 180 segment = parsed_elf_file.get_segment(i) 181 start_vmaddr = segment['p_vaddr'] 182 memory_size = segment['p_memsz'] 183 if memory_size == 0: 184 continue # Not a loaded segment which resides in virtual memory. 185 end_vmaddr = start_vmaddr + memory_size 186 segments[i] = (start_vmaddr, end_vmaddr) 187 return segments 188 189 190def _memory_regions_overlap(memory_regions: Dict) -> bool: 191 """Returns where any memory regions overlap each other.""" 192 overlaps_detected = False 193 for current_name, current_ranges in memory_regions.items(): 194 for current_index, (current_start, 195 current_end) in current_ranges.items(): 196 for other_name, other_ranges in memory_regions.items(): 197 for other_index, (other_start, 198 other_end) in other_ranges.items(): 199 if (current_name == other_name 200 and current_index == other_index): 201 continue # Skip yourself. 202 # Check if the other region end is within this region. 203 other_end_overlaps = (current_start < other_end <= 204 current_end) 205 other_start_overlaps = (current_start <= other_start < 206 current_end) 207 if other_end_overlaps or other_start_overlaps: 208 overlaps_detected = True 209 _LOG.error(f'error: {current_name}[{current_index}] ' + 210 f'[{hex(current_start)},' + 211 f'{hex(current_end)}] overlaps with ' + 212 f'{other_name}[{other_index}] ' 213 f'[{hex(other_start)},' + 214 f'{hex(other_end)}] overlaps with ') 215 return overlaps_detected 216 217 218def _get_segments_to_memory_region_map(elf_file: BinaryIO) -> Optional[Dict]: 219 """ 220 Processes an ELF file to look up what memory regions segments are in. 221 222 Returns the result from map_segments_to_memory_regions if valid memory 223 regions were parsed out of the ELF file. 224 """ 225 parsed_elf_file = elffile.ELFFile(elf_file) 226 227 memory_regions = _parse_memory_regions(parsed_elf_file) 228 if not memory_regions: 229 return None 230 231 segments = _parse_segments(parsed_elf_file) 232 233 return map_segments_to_memory_regions(segments=segments, 234 memory_regions=memory_regions) 235 236 237def map_segments_to_memory_regions(segments: Dict, 238 memory_regions: Dict) -> Dict: 239 """ 240 Maps segments to the virtual memory regions they reside in. 241 242 This takes in the results from _parse_memory_regions and _parse_segments and 243 produces a dictionary which looks like: 244 { 245 SEGMENT_INDEX_0:'MEMORY_REGION_NAME_0', 246 SEGMENT_INDEX_1:'MEMORY_REGION_NAME_0', 247 ... 248 SEGMENT_INDEX_N:'MEMORY_REGION_NAME_M', 249 } 250 """ 251 252 # Now for each segment, determine what memory region it belongs to 253 # and generate a bloaty config output for it. 254 segment_to_memory_region = {} 255 for segment, (segment_start, segment_end) in segments.items(): 256 # Note this is the final filter bloaty rewrite pattern format. 257 for memory_region_name, memory_region_info in memory_regions.items(): 258 for _, (subregion_start, 259 subregion_end) in memory_region_info.items(): 260 if (segment_start >= subregion_start 261 and segment_end <= subregion_end): 262 # We found the subregion the segment resides in. 263 segment_to_memory_region[segment] = memory_region_name 264 if segment not in segment_to_memory_region: 265 _LOG.error( 266 f'Error: Failed to find memory region for LOAD #{segment} ' + 267 f'[{hex(segment_start)},{hex(segment_end)}]') 268 return segment_to_memory_region 269 270 271def generate_memoryregions_data_source(segment_to_memory_region: Dict) -> str: 272 output: List[str] = [] 273 output.append('custom_data_source: {') 274 output.append(' name: "memoryregions"') 275 output.append(' base_data_source: "segments"') 276 for segment_index, memory_region in segment_to_memory_region.items(): 277 output.append(' rewrite: {') 278 segment_filter = r'^LOAD ' + f'#{segment_index}' + r' \\[.*\\]$' 279 output.append(f' pattern:"{segment_filter}"') 280 output.append(f' replacement:"{memory_region}"') 281 output.append(' }') 282 output.append(' rewrite: {') 283 output.append(' pattern:".*"') 284 output.append(' replacement:"Not resident in memory"') 285 output.append(' }') 286 output.append('}') 287 return '\n'.join(output) + '\n' 288 289 290def generate_utilization_data_source() -> str: 291 output: List[str] = [] 292 output.append('custom_data_source: {') 293 output.append(' name:"utilization"') 294 output.append(' base_data_source:"sections"') 295 output.append(' rewrite: {') 296 output.append(' pattern:"unused_space"') 297 output.append(' replacement:"Free space"') 298 output.append(' }') 299 output.append(' rewrite: {') 300 output.append(' pattern:".*"') 301 output.append(' replacement:"Used space"') 302 output.append(' }') 303 output.append('}') 304 return '\n'.join(output) + '\n' 305 306 307def generate_bloaty_config(elf_file: BinaryIO, enable_memoryregions: bool, 308 enable_utilization: bool, out_file: TextIO) -> None: 309 if enable_memoryregions: 310 # Enable the "memoryregions" data_source if the user provided the 311 # required pw_bloat specific symbols in their linker script. 312 segment_to_memory_region = _get_segments_to_memory_region_map(elf_file) 313 if not segment_to_memory_region: 314 _LOG.info('memoryregions data_source is not provided') 315 else: 316 _LOG.info('memoryregions data_source is provided') 317 out_file.write( 318 generate_memoryregions_data_source(segment_to_memory_region)) 319 320 if enable_utilization: 321 _LOG.info('utilization data_source is provided') 322 out_file.write(generate_utilization_data_source()) 323 324 325def main() -> int: 326 """Generates a useful bloaty config file containing new data sources.""" 327 args = _parse_args() 328 329 logging.basicConfig(format='%(message)s', level=args.loglevel) 330 331 generate_bloaty_config(elf_file=args.elf_file, 332 enable_memoryregions=args.memoryregions, 333 enable_utilization=args.utilization, 334 out_file=args.output) 335 return 0 336 337 338if __name__ == "__main__": 339 sys.exit(main()) 340