1# Copyright (C) 2020 The Android Open Source Project 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14"""Classes for annotating a CFG file with profiling information. 15 16Attributes: 17 END_INSTRUCTION_MARKER (str): The marker used to indicate the end of a HIR 18 instruction. 19 EOF_MARKER (str): The marker used to indicate that the end-of-file has been 20 reached. 21""" 22 23import collections 24import enum 25import logging 26import os 27import re 28 29from typing import DefaultDict, Iterator, List, TextIO, Tuple 30 31from perf2cfg import analyze 32from perf2cfg import events 33from perf2cfg import exceptions 34from perf2cfg import parse 35 36END_INSTRUCTION_MARKER = '<|@' 37EOF_MARKER = '<EOF>' 38 39 40class State(enum.Enum): 41 """State represents the internal state of a CfgEditor object.""" 42 START = 1 43 PARSE_METHOD_NAME = 2 44 SKIP_METHOD = 3 45 SKIP_TO_CFG = 4 46 START_CFG = 5 47 IS_DISASSEMBLY_PASS = 6 48 SKIP_PASS = 7 49 PARSE_FLAGS = 8 50 SKIP_TO_HIR = 9 51 HIR_INSTRUCTION = 10 52 DISASSEMBLY = 11 53 END_HIR = 12 54 END_BLOCK = 13 55 END_CFG = 14 56 END = 15 57 58 59class CfgEditor: 60 """CfgEditor annotates a CFG file with profiling information. 61 62 CfgEditor does *not* edit the input CFG file in place. Instead, it reads 63 the input file line by line, generates annotations from profiling 64 information, and writes an annotated CFG file to a given path. 65 66 CfgEditor includes a CFG file parser based on a finite state machine. This 67 parser supports CFG files in the c1visualizer format dumped by the ART 68 optimizing compiler: 69 - The CFG file must be valid (correctly parsed by c1visualizer). 70 - Each line must contain only one directive. 71 - Disassembly of an IR instruction must end with the `<|@` marker on a 72 newline. 73 74 Attributes: 75 analyzer (analyzer.RecordAnalyzer): A RecordAnalyzer object. 76 input_stream (TextIO): An input CFG text stream. 77 output_stream (TextIO): An output CFG text stream. 78 primary_event (str): An event used to color basic blocks. 79 basic_block_event_counts (DefaultDict[str, int]): A mapping of event 80 names to their total number of events for the current basic block. 81 buffer (List[str]): A list of strings to be written to the output CFG 82 file instead of the current line from the input CFG file. 83 current_method (analyze.Method): A Method object representing the 84 current method being annotated. 85 event_names (List[str]): A list of sorted event names from the 86 analysis. 87 flags_offset (int): An output file offset pointing to the last flags 88 directive seen. 89 isa (str): The instruction set architecture as defined in the input CFG 90 file metadata, or the string "unknown" if no metadata was found. 91 padding (str): A string used to pad assembly instructions with no 92 profiling information. 93 saved_flags (List[str]): A list of strings representing the flags of 94 the current basic block being parsed. 95 state (State): A State value representing the internal state of the 96 parser. 97 """ 98 99 def __init__(self, 100 analyzer: analyze.RecordAnalyzer, 101 input_stream: TextIO, 102 output_stream: TextIO, 103 primary_event: str = 'cpu-cycles') -> None: 104 """Instantiates a CfgEditor. 105 106 Args: 107 analyzer (analyze.RecordAnalyzer): A RecordAnalyzer object. An 108 analysis must have been completed before passing this object to 109 CfgEditor. 110 input_stream (TextIO): An input CFG text stream. 111 output_stream (TextIO): An output CFG text stream. 112 primary_event (str): An event used to color basic blocks. 113 """ 114 self.analyzer = analyzer 115 self.input_stream = input_stream 116 self.output_stream = output_stream 117 self.primary_event = primary_event 118 119 self.basic_block_event_counts: DefaultDict[ 120 str, int] = collections.defaultdict(int) 121 self.buffer: List[str] = [] 122 self.current_method: analyze.Method 123 self.event_names = events.sort_event_names(self.analyzer.event_counts) 124 self.flags_offset = 0 125 self.isa = '' 126 self.padding = '' 127 self.saved_flags: List[str] = [] 128 self.state = State.START 129 130 def edit(self) -> None: 131 """Annotates a CFG file with profiling information.""" 132 for lineno, raw_line in self.lines(): 133 line = raw_line.strip() 134 try: 135 self.parse_line(line) 136 except exceptions.ArchitectureError as ex: 137 logging.error(ex) 138 return 139 except exceptions.ParseError as ex: 140 logging.error('Line %d: %s', lineno, ex) 141 return 142 143 if self.buffer: 144 self.output_stream.write(''.join(self.buffer)) 145 self.buffer = [] 146 else: 147 self.output_stream.write(raw_line) 148 149 self.parse_line(EOF_MARKER) 150 if self.state != State.END: 151 logging.error('Unexpected end-of-file while parsing the CFG file') 152 153 def lines(self) -> Iterator[Tuple[int, str]]: 154 """Iterates over lines from the input CFG stream. 155 156 Yields: 157 Tuple[int, str]: A line number and a non-empty line. 158 """ 159 for lineno, line in enumerate(self.input_stream, 1): 160 if line: 161 yield lineno, line 162 163 def parse_line(self, line: str) -> None: 164 """Parses a line from the input CFG file. 165 166 Args: 167 line (str): A line to parse. 168 169 Raises: 170 exceptions.ParseError: An error occurred during parsing. 171 """ 172 if self.state == State.START: 173 if line == EOF_MARKER: 174 self.state = State.END 175 elif line == 'begin_compilation': 176 self.state = State.PARSE_METHOD_NAME 177 else: 178 raise exceptions.ParseError( 179 'Expected a `begin_compilation` directive') 180 181 elif self.state == State.PARSE_METHOD_NAME: 182 method_name = parse.parse_name(line) 183 if not self.isa: 184 self.set_isa(method_name) 185 186 if method_name in self.analyzer.methods: 187 self.update_current_method(method_name) 188 self.state = State.SKIP_TO_CFG 189 else: 190 # If no profiling information has been recorded for this 191 # method, skip it 192 self.state = State.SKIP_METHOD 193 194 elif self.state == State.SKIP_METHOD: 195 if line == EOF_MARKER: 196 self.state = State.END 197 elif line == 'begin_compilation': 198 self.state = State.PARSE_METHOD_NAME 199 200 elif self.state == State.SKIP_TO_CFG: 201 if line == 'end_compilation': 202 self.state = State.START_CFG 203 204 elif self.state == State.START_CFG: 205 if line == 'begin_cfg': 206 self.state = State.IS_DISASSEMBLY_PASS 207 else: 208 raise exceptions.ParseError('Expected a `begin_cfg` directive') 209 210 elif self.state == State.IS_DISASSEMBLY_PASS: 211 pass_name = parse.parse_name(line) 212 if pass_name == 'disassembly (after)': 213 self.state = State.PARSE_FLAGS 214 else: 215 self.state = State.SKIP_PASS 216 217 elif self.state == State.SKIP_PASS: 218 if line == 'end_cfg': 219 self.state = State.END_CFG 220 221 elif self.state == State.PARSE_FLAGS: 222 if line.startswith('flags'): 223 self.update_saved_flags(line) 224 self.state = State.SKIP_TO_HIR 225 226 elif self.state == State.SKIP_TO_HIR: 227 if line == 'begin_HIR': 228 self.state = State.HIR_INSTRUCTION 229 230 elif self.state == State.HIR_INSTRUCTION: 231 if line.endswith(END_INSTRUCTION_MARKER): 232 # If no disassembly is available for this HIR instruction, skip 233 # it 234 pass 235 elif line == 'end_HIR': 236 self.state = State.END_HIR 237 else: 238 self.state = State.DISASSEMBLY 239 240 elif self.state == State.DISASSEMBLY: 241 if line == END_INSTRUCTION_MARKER: 242 self.state = State.HIR_INSTRUCTION 243 else: 244 self.annotate_instruction(line) 245 246 elif self.state == State.END_HIR: 247 if line == 'end_block': 248 self.annotate_block() 249 self.state = State.END_BLOCK 250 else: 251 raise exceptions.ParseError('Expected a `end_block` directive') 252 253 elif self.state == State.END_BLOCK: 254 if line == 'begin_block': 255 self.state = State.PARSE_FLAGS 256 elif line == 'end_cfg': 257 logging.info('Annotated %s', self.current_method.name) 258 self.state = State.END_CFG 259 else: 260 raise exceptions.ParseError( 261 'Expected a `begin_block` or `end_cfg` directive') 262 263 elif self.state == State.END_CFG: 264 if line == EOF_MARKER: 265 self.state = State.END 266 elif line == 'begin_cfg': 267 self.state = State.IS_DISASSEMBLY_PASS 268 elif line == 'begin_compilation': 269 self.state = State.PARSE_METHOD_NAME 270 271 def set_isa(self, metadata: str) -> None: 272 """Sets the instruction set architecture. 273 274 Args: 275 metadata (str): The input CFG file metadata. 276 277 Raises: 278 exceptions.ArchitectureError: An error occurred when the input CFG 279 file ISA is incompatible with the target architecture. 280 """ 281 match = re.search(r'isa:(\w+)', metadata) 282 if not match: 283 logging.warning( 284 'Could not deduce the CFG file ISA, assuming it is compatible ' 285 'with the target architecture %s', self.analyzer.target_arch) 286 self.isa = 'unknown' 287 return 288 289 self.isa = match.group(1) 290 291 # Map CFG file ISAs to compatible target architectures 292 target_archs = { 293 'x86': [r'x86$', r'x86_64$'], 294 'x86_64': [r'x86_64$'], 295 'arm': [r'armv7', r'armv8'], 296 'arm64': [r'aarch64$', r'armv8'], 297 } 298 299 if not any( 300 re.match(target_arch, self.analyzer.target_arch) 301 for target_arch in target_archs[self.isa]): 302 raise exceptions.ArchitectureError( 303 f'The CFG file ISA {self.isa} is incompatible with the target ' 304 f'architecture {self.analyzer.target_arch}') 305 306 def update_current_method(self, method_name: str) -> None: 307 """Updates the current method and the padding string. 308 309 Args: 310 method_name (str): The name of a method being annotated. 311 """ 312 self.current_method = self.analyzer.methods[method_name] 313 314 annotations = [] 315 for event_name in self.event_names: 316 event_count = self.current_method.event_counts[event_name] 317 annotation = self.generate_method_annotation( 318 event_name, event_count) 319 annotations.append(annotation) 320 321 info = ', '.join(annotations) 322 # By default, c1visualizer displays short method names which are built 323 # by finding the first open parenthesis. To keep that behavior intact, 324 # the profiling information is enclosed in square brackets. 325 directive = parse.build_name(f'[{info}] {method_name}') 326 self.buffer.append(f'{directive}\n') 327 328 max_length = 0 329 for event_name in self.event_names: 330 max_event_count = max( 331 instruction.event_counts[event_name] 332 for instruction in self.current_method.instructions.values()) 333 annotation = self.generate_instruction_annotation( 334 event_name, max_event_count) 335 336 if len(annotation) > max_length: 337 max_length = len(annotation) 338 339 self.padding = '_' + ' ' * max_length 340 341 def update_saved_flags(self, line: str) -> None: 342 """Updates the saved flags and saves space for a block annotation. 343 344 Args: 345 line (str): A line containing a flags directive. 346 """ 347 self.saved_flags = parse.parse_flags(line) 348 self.flags_offset = self.output_stream.tell() 349 350 flags = self.saved_flags.copy() 351 for event_name in self.event_names: 352 # The current method could have only one basic block, making the 353 # maximum block event counts equal to the method ones 354 event_count = self.current_method.event_counts[event_name] 355 annotation = self.generate_block_annotation(event_name, event_count) 356 flags.append(annotation) 357 358 # Save space for a possible performance flag 359 flags.append('LO') 360 361 padding = ' ' * len(parse.build_flags(flags)) 362 self.buffer.append(f'{padding}\n') 363 364 def annotate_block(self) -> None: 365 """Annotates a basic block.""" 366 flags = [] 367 for event_name in self.event_names: 368 event_count = self.basic_block_event_counts[event_name] 369 annotation = self.generate_block_annotation(event_name, event_count) 370 flags.append(annotation) 371 372 flag = self.generate_performance_flag() 373 if flag: 374 flags.append(flag) 375 376 flags.extend(self.saved_flags) 377 378 self.basic_block_event_counts.clear() 379 380 self.output_stream.seek(self.flags_offset) 381 self.output_stream.write(parse.build_flags(flags)) 382 self.output_stream.seek(0, os.SEEK_END) 383 384 def annotate_instruction(self, line: str) -> None: 385 """Annotates an instruction. 386 387 Args: 388 line (str): A line containing an instruction to annotate. 389 """ 390 addr = parse.parse_address(line) 391 392 instruction = self.current_method.instructions.get(addr) 393 if not instruction: 394 # If no profiling information has been recorded for this 395 # instruction, skip it 396 self.buffer.append(f'{self.padding}{line}\n') 397 return 398 399 for eventno, event_name in enumerate(self.event_names): 400 event_count = instruction.event_counts[event_name] 401 self.basic_block_event_counts[event_name] += event_count 402 annotation = self.generate_padded_instruction_annotation( 403 event_name, event_count) 404 405 if eventno: 406 self.buffer.append(f'{annotation}\n') 407 else: 408 self.buffer.append(f'{annotation} {line}\n') 409 410 def generate_performance_flag(self) -> str: 411 """Generates a performance flag for the current basic block. 412 413 For example, a `LO` (low) flag indicates the block is responsible for 1 414 to 10% of the current method primary event (cpu-cycles by default). 415 416 Returns: 417 str: A performance flag, or an empty string if the block 418 contribution is not high enough. 419 """ 420 ranges = [ 421 # Low 422 (1, 10, 'LO'), 423 # Moderate 424 (10, 30, 'MO'), 425 # Considerable 426 (30, 50, 'CO'), 427 # High 428 (50, 101, 'HI'), 429 ] 430 431 ratio = 0 432 method_event_count = self.current_method.event_counts[ 433 self.primary_event] 434 if method_event_count: 435 ratio = int(self.basic_block_event_counts[self.primary_event] / 436 method_event_count * 100) 437 438 for start, end, name in ranges: 439 if start <= ratio < end: 440 return name 441 442 return '' 443 444 def generate_padded_instruction_annotation(self, event_name: str, 445 event_count: int) -> str: 446 """Generates a padded instruction annotation. 447 448 Args: 449 event_name (str): An event name. 450 event_count (int): An event count. 451 452 Returns: 453 str: A padded instruction annotation. 454 """ 455 annotation = self.generate_instruction_annotation( 456 event_name, event_count) 457 458 # Remove one from the final length as a space may be added at the end 459 # of the annotation. The final length will always be positive as the 460 # length of the current padding is one more than the length of the 461 # longest annotation for the current method. 462 padding = ' ' * (len(self.padding) - len(annotation) - 1) 463 parts = annotation.split(':') 464 465 return f'{parts[0]}:{padding}{parts[1]}' 466 467 def generate_method_annotation(self, event_name: str, 468 event_count: int) -> str: 469 """Generates a method annotation. 470 471 Method annotations are relative to the whole analysis and exclude the 472 event count. 473 474 Args: 475 event_name (str): An event name. 476 event_count (int): An event count. 477 478 Returns: 479 str: A method annotation. 480 """ 481 total_event_count = self.analyzer.event_counts[event_name] 482 return self.generate_annotation(event_name, 483 event_count, 484 total_event_count, 485 include_count=False) 486 487 def generate_block_annotation(self, event_name: str, 488 event_count: int) -> str: 489 """Generates a basic block annotation. 490 491 Basic block annotations are relative to the current method and exclude 492 the event count. 493 494 Args: 495 event_name (str): An event name. 496 event_count (int): An event count. 497 498 Returns: 499 str: A basic block annotation. 500 """ 501 total_event_count = self.current_method.event_counts[event_name] 502 return self.generate_annotation(event_name, 503 event_count, 504 total_event_count, 505 include_count=False) 506 507 def generate_instruction_annotation(self, event_name: str, 508 event_count: int) -> str: 509 """Generates an instruction annotation. 510 511 Instruction annotations are relative to the current method and include 512 the event count. 513 514 Args: 515 event_name (str): An event name. 516 event_count (int): An event count. 517 518 Returns: 519 str: An instruction annotation. 520 """ 521 total_event_count = self.current_method.event_counts[event_name] 522 return self.generate_annotation(event_name, 523 event_count, 524 total_event_count, 525 include_count=True) 526 527 # pylint: disable=no-self-use 528 def generate_annotation(self, event_name: str, event_count: int, 529 total_event_count: int, include_count: bool) -> str: 530 """Generates an annotation. 531 532 Args: 533 event_name (str): An event name. 534 event_count (int): An event count. 535 total_event_count (int): A total event count. 536 include_count (bool): If True, includes the event count alongside 537 the event name and ratio. 538 539 Returns: 540 str: An annotation. 541 """ 542 ratio = 0.0 543 if total_event_count: 544 ratio = event_count / total_event_count 545 546 if include_count: 547 return f'{event_name}: {event_count} ({ratio:.2%})' 548 549 return f'{event_name}: {ratio:06.2%}' 550