• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright (C) 2020 The Android Open Source Project
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#   http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14"""Classes for annotating a CFG file with profiling information.
15
16Attributes:
17    END_INSTRUCTION_MARKER (str): The marker used to indicate the end of a HIR
18        instruction.
19    EOF_MARKER (str): The marker used to indicate that the end-of-file has been
20        reached.
21"""
22
23import collections
24import enum
25import logging
26import os
27import re
28
29from typing import DefaultDict, Iterator, List, TextIO, Tuple
30
31from perf2cfg import analyze
32from perf2cfg import events
33from perf2cfg import exceptions
34from perf2cfg import parse
35
36END_INSTRUCTION_MARKER = '<|@'
37EOF_MARKER = '<EOF>'
38
39
40class State(enum.Enum):
41    """State represents the internal state of a CfgEditor object."""
42    START = 1
43    PARSE_METHOD_NAME = 2
44    SKIP_METHOD = 3
45    SKIP_TO_CFG = 4
46    START_CFG = 5
47    IS_DISASSEMBLY_PASS = 6
48    SKIP_PASS = 7
49    PARSE_FLAGS = 8
50    SKIP_TO_HIR = 9
51    HIR_INSTRUCTION = 10
52    DISASSEMBLY = 11
53    END_HIR = 12
54    END_BLOCK = 13
55    END_CFG = 14
56    END = 15
57
58
59class CfgEditor:
60    """CfgEditor annotates a CFG file with profiling information.
61
62    CfgEditor does *not* edit the input CFG file in place. Instead, it reads
63    the input file line by line, generates annotations from profiling
64    information, and writes an annotated CFG file to a given path.
65
66    CfgEditor includes a CFG file parser based on a finite state machine. This
67    parser supports CFG files in the c1visualizer format dumped by the ART
68    optimizing compiler:
69        - The CFG file must be valid (correctly parsed by c1visualizer).
70        - Each line must contain only one directive.
71        - Disassembly of an IR instruction must end with the `<|@` marker on a
72          newline.
73
74    Attributes:
75        analyzer (analyzer.RecordAnalyzer): A RecordAnalyzer object.
76        input_stream (TextIO): An input CFG text stream.
77        output_stream (TextIO): An output CFG text stream.
78        primary_event (str): An event used to color basic blocks.
79        basic_block_event_counts (DefaultDict[str, int]): A mapping of event
80            names to their total number of events for the current basic block.
81        buffer (List[str]): A list of strings to be written to the output CFG
82            file instead of the current line from the input CFG file.
83        current_method (analyze.Method): A Method object representing the
84            current method being annotated.
85        event_names (List[str]): A list of sorted event names from the
86            analysis.
87        flags_offset (int): An output file offset pointing to the last flags
88            directive seen.
89        isa (str): The instruction set architecture as defined in the input CFG
90            file metadata, or the string "unknown" if no metadata was found.
91        padding (str): A string used to pad assembly instructions with no
92            profiling information.
93        saved_flags (List[str]): A list of strings representing the flags of
94            the current basic block being parsed.
95        state (State): A State value representing the internal state of the
96            parser.
97    """
98
99    def __init__(self,
100                 analyzer: analyze.RecordAnalyzer,
101                 input_stream: TextIO,
102                 output_stream: TextIO,
103                 primary_event: str = 'cpu-cycles') -> None:
104        """Instantiates a CfgEditor.
105
106        Args:
107            analyzer (analyze.RecordAnalyzer): A RecordAnalyzer object. An
108                analysis must have been completed before passing this object to
109                CfgEditor.
110            input_stream (TextIO): An input CFG text stream.
111            output_stream (TextIO): An output CFG text stream.
112            primary_event (str): An event used to color basic blocks.
113        """
114        self.analyzer = analyzer
115        self.input_stream = input_stream
116        self.output_stream = output_stream
117        self.primary_event = primary_event
118
119        self.basic_block_event_counts: DefaultDict[
120            str, int] = collections.defaultdict(int)
121        self.buffer: List[str] = []
122        self.current_method: analyze.Method
123        self.event_names = events.sort_event_names(self.analyzer.event_counts)
124        self.flags_offset = 0
125        self.isa = ''
126        self.padding = ''
127        self.saved_flags: List[str] = []
128        self.state = State.START
129
130    def edit(self) -> None:
131        """Annotates a CFG file with profiling information."""
132        for lineno, raw_line in self.lines():
133            line = raw_line.strip()
134            try:
135                self.parse_line(line)
136            except exceptions.ArchitectureError as ex:
137                logging.error(ex)
138                return
139            except exceptions.ParseError as ex:
140                logging.error('Line %d: %s', lineno, ex)
141                return
142
143            if self.buffer:
144                self.output_stream.write(''.join(self.buffer))
145                self.buffer = []
146            else:
147                self.output_stream.write(raw_line)
148
149        self.parse_line(EOF_MARKER)
150        if self.state != State.END:
151            logging.error('Unexpected end-of-file while parsing the CFG file')
152
153    def lines(self) -> Iterator[Tuple[int, str]]:
154        """Iterates over lines from the input CFG stream.
155
156        Yields:
157            Tuple[int, str]: A line number and a non-empty line.
158        """
159        for lineno, line in enumerate(self.input_stream, 1):
160            if line:
161                yield lineno, line
162
163    def parse_line(self, line: str) -> None:
164        """Parses a line from the input CFG file.
165
166        Args:
167            line (str): A line to parse.
168
169        Raises:
170            exceptions.ParseError: An error occurred during parsing.
171        """
172        if self.state == State.START:
173            if line == EOF_MARKER:
174                self.state = State.END
175            elif line == 'begin_compilation':
176                self.state = State.PARSE_METHOD_NAME
177            else:
178                raise exceptions.ParseError(
179                    'Expected a `begin_compilation` directive')
180
181        elif self.state == State.PARSE_METHOD_NAME:
182            method_name = parse.parse_name(line)
183            if not self.isa:
184                self.set_isa(method_name)
185
186            if method_name in self.analyzer.methods:
187                self.update_current_method(method_name)
188                self.state = State.SKIP_TO_CFG
189            else:
190                # If no profiling information has been recorded for this
191                # method, skip it
192                self.state = State.SKIP_METHOD
193
194        elif self.state == State.SKIP_METHOD:
195            if line == EOF_MARKER:
196                self.state = State.END
197            elif line == 'begin_compilation':
198                self.state = State.PARSE_METHOD_NAME
199
200        elif self.state == State.SKIP_TO_CFG:
201            if line == 'end_compilation':
202                self.state = State.START_CFG
203
204        elif self.state == State.START_CFG:
205            if line == 'begin_cfg':
206                self.state = State.IS_DISASSEMBLY_PASS
207            else:
208                raise exceptions.ParseError('Expected a `begin_cfg` directive')
209
210        elif self.state == State.IS_DISASSEMBLY_PASS:
211            pass_name = parse.parse_name(line)
212            if pass_name == 'disassembly (after)':
213                self.state = State.PARSE_FLAGS
214            else:
215                self.state = State.SKIP_PASS
216
217        elif self.state == State.SKIP_PASS:
218            if line == 'end_cfg':
219                self.state = State.END_CFG
220
221        elif self.state == State.PARSE_FLAGS:
222            if line.startswith('flags'):
223                self.update_saved_flags(line)
224                self.state = State.SKIP_TO_HIR
225
226        elif self.state == State.SKIP_TO_HIR:
227            if line == 'begin_HIR':
228                self.state = State.HIR_INSTRUCTION
229
230        elif self.state == State.HIR_INSTRUCTION:
231            if line.endswith(END_INSTRUCTION_MARKER):
232                # If no disassembly is available for this HIR instruction, skip
233                # it
234                pass
235            elif line == 'end_HIR':
236                self.state = State.END_HIR
237            else:
238                self.state = State.DISASSEMBLY
239
240        elif self.state == State.DISASSEMBLY:
241            if line == END_INSTRUCTION_MARKER:
242                self.state = State.HIR_INSTRUCTION
243            else:
244                self.annotate_instruction(line)
245
246        elif self.state == State.END_HIR:
247            if line == 'end_block':
248                self.annotate_block()
249                self.state = State.END_BLOCK
250            else:
251                raise exceptions.ParseError('Expected a `end_block` directive')
252
253        elif self.state == State.END_BLOCK:
254            if line == 'begin_block':
255                self.state = State.PARSE_FLAGS
256            elif line == 'end_cfg':
257                logging.info('Annotated %s', self.current_method.name)
258                self.state = State.END_CFG
259            else:
260                raise exceptions.ParseError(
261                    'Expected a `begin_block` or `end_cfg` directive')
262
263        elif self.state == State.END_CFG:
264            if line == EOF_MARKER:
265                self.state = State.END
266            elif line == 'begin_cfg':
267                self.state = State.IS_DISASSEMBLY_PASS
268            elif line == 'begin_compilation':
269                self.state = State.PARSE_METHOD_NAME
270
271    def set_isa(self, metadata: str) -> None:
272        """Sets the instruction set architecture.
273
274        Args:
275            metadata (str): The input CFG file metadata.
276
277        Raises:
278            exceptions.ArchitectureError: An error occurred when the input CFG
279                file ISA is incompatible with the target architecture.
280        """
281        match = re.search(r'isa:(\w+)', metadata)
282        if not match:
283            logging.warning(
284                'Could not deduce the CFG file ISA, assuming it is compatible '
285                'with the target architecture %s', self.analyzer.target_arch)
286            self.isa = 'unknown'
287            return
288
289        self.isa = match.group(1)
290
291        # Map CFG file ISAs to compatible target architectures
292        target_archs = {
293            'x86': [r'x86$', r'x86_64$'],
294            'x86_64': [r'x86_64$'],
295            'arm': [r'armv7', r'armv8'],
296            'arm64': [r'aarch64$', r'armv8'],
297        }
298
299        if not any(
300                re.match(target_arch, self.analyzer.target_arch)
301                for target_arch in target_archs[self.isa]):
302            raise exceptions.ArchitectureError(
303                f'The CFG file ISA {self.isa} is incompatible with the target '
304                f'architecture {self.analyzer.target_arch}')
305
306    def update_current_method(self, method_name: str) -> None:
307        """Updates the current method and the padding string.
308
309        Args:
310            method_name (str): The name of a method being annotated.
311        """
312        self.current_method = self.analyzer.methods[method_name]
313
314        annotations = []
315        for event_name in self.event_names:
316            event_count = self.current_method.event_counts[event_name]
317            annotation = self.generate_method_annotation(
318                event_name, event_count)
319            annotations.append(annotation)
320
321        info = ', '.join(annotations)
322        # By default, c1visualizer displays short method names which are built
323        # by finding the first open parenthesis. To keep that behavior intact,
324        # the profiling information is enclosed in square brackets.
325        directive = parse.build_name(f'[{info}] {method_name}')
326        self.buffer.append(f'{directive}\n')
327
328        max_length = 0
329        for event_name in self.event_names:
330            max_event_count = max(
331                instruction.event_counts[event_name]
332                for instruction in self.current_method.instructions.values())
333            annotation = self.generate_instruction_annotation(
334                event_name, max_event_count)
335
336            if len(annotation) > max_length:
337                max_length = len(annotation)
338
339        self.padding = '_' + ' ' * max_length
340
341    def update_saved_flags(self, line: str) -> None:
342        """Updates the saved flags and saves space for a block annotation.
343
344        Args:
345            line (str): A line containing a flags directive.
346        """
347        self.saved_flags = parse.parse_flags(line)
348        self.flags_offset = self.output_stream.tell()
349
350        flags = self.saved_flags.copy()
351        for event_name in self.event_names:
352            # The current method could have only one basic block, making the
353            # maximum block event counts equal to the method ones
354            event_count = self.current_method.event_counts[event_name]
355            annotation = self.generate_block_annotation(event_name, event_count)
356            flags.append(annotation)
357
358        # Save space for a possible performance flag
359        flags.append('LO')
360
361        padding = ' ' * len(parse.build_flags(flags))
362        self.buffer.append(f'{padding}\n')
363
364    def annotate_block(self) -> None:
365        """Annotates a basic block."""
366        flags = []
367        for event_name in self.event_names:
368            event_count = self.basic_block_event_counts[event_name]
369            annotation = self.generate_block_annotation(event_name, event_count)
370            flags.append(annotation)
371
372        flag = self.generate_performance_flag()
373        if flag:
374            flags.append(flag)
375
376        flags.extend(self.saved_flags)
377
378        self.basic_block_event_counts.clear()
379
380        self.output_stream.seek(self.flags_offset)
381        self.output_stream.write(parse.build_flags(flags))
382        self.output_stream.seek(0, os.SEEK_END)
383
384    def annotate_instruction(self, line: str) -> None:
385        """Annotates an instruction.
386
387        Args:
388            line (str): A line containing an instruction to annotate.
389        """
390        addr = parse.parse_address(line)
391
392        instruction = self.current_method.instructions.get(addr)
393        if not instruction:
394            # If no profiling information has been recorded for this
395            # instruction, skip it
396            self.buffer.append(f'{self.padding}{line}\n')
397            return
398
399        for eventno, event_name in enumerate(self.event_names):
400            event_count = instruction.event_counts[event_name]
401            self.basic_block_event_counts[event_name] += event_count
402            annotation = self.generate_padded_instruction_annotation(
403                event_name, event_count)
404
405            if eventno:
406                self.buffer.append(f'{annotation}\n')
407            else:
408                self.buffer.append(f'{annotation} {line}\n')
409
410    def generate_performance_flag(self) -> str:
411        """Generates a performance flag for the current basic block.
412
413        For example, a `LO` (low) flag indicates the block is responsible for 1
414        to 10% of the current method primary event (cpu-cycles by default).
415
416        Returns:
417            str: A performance flag, or an empty string if the block
418                contribution is not high enough.
419        """
420        ranges = [
421            # Low
422            (1, 10, 'LO'),
423            # Moderate
424            (10, 30, 'MO'),
425            # Considerable
426            (30, 50, 'CO'),
427            # High
428            (50, 101, 'HI'),
429        ]
430
431        ratio = 0
432        method_event_count = self.current_method.event_counts[
433            self.primary_event]
434        if method_event_count:
435            ratio = int(self.basic_block_event_counts[self.primary_event] /
436                        method_event_count * 100)
437
438        for start, end, name in ranges:
439            if start <= ratio < end:
440                return name
441
442        return ''
443
444    def generate_padded_instruction_annotation(self, event_name: str,
445                                               event_count: int) -> str:
446        """Generates a padded instruction annotation.
447
448        Args:
449            event_name (str): An event name.
450            event_count (int): An event count.
451
452        Returns:
453            str: A padded instruction annotation.
454        """
455        annotation = self.generate_instruction_annotation(
456            event_name, event_count)
457
458        # Remove one from the final length as a space may be added at the end
459        # of the annotation. The final length will always be positive as the
460        # length of the current padding is one more than the length of the
461        # longest annotation for the current method.
462        padding = ' ' * (len(self.padding) - len(annotation) - 1)
463        parts = annotation.split(':')
464
465        return f'{parts[0]}:{padding}{parts[1]}'
466
467    def generate_method_annotation(self, event_name: str,
468                                   event_count: int) -> str:
469        """Generates a method annotation.
470
471        Method annotations are relative to the whole analysis and exclude the
472        event count.
473
474        Args:
475            event_name (str): An event name.
476            event_count (int): An event count.
477
478        Returns:
479            str: A method annotation.
480        """
481        total_event_count = self.analyzer.event_counts[event_name]
482        return self.generate_annotation(event_name,
483                                        event_count,
484                                        total_event_count,
485                                        include_count=False)
486
487    def generate_block_annotation(self, event_name: str,
488                                  event_count: int) -> str:
489        """Generates a basic block annotation.
490
491        Basic block annotations are relative to the current method and exclude
492        the event count.
493
494        Args:
495            event_name (str): An event name.
496            event_count (int): An event count.
497
498        Returns:
499            str: A basic block annotation.
500        """
501        total_event_count = self.current_method.event_counts[event_name]
502        return self.generate_annotation(event_name,
503                                        event_count,
504                                        total_event_count,
505                                        include_count=False)
506
507    def generate_instruction_annotation(self, event_name: str,
508                                        event_count: int) -> str:
509        """Generates an instruction annotation.
510
511        Instruction annotations are relative to the current method and include
512        the event count.
513
514        Args:
515            event_name (str): An event name.
516            event_count (int): An event count.
517
518        Returns:
519            str: An instruction annotation.
520        """
521        total_event_count = self.current_method.event_counts[event_name]
522        return self.generate_annotation(event_name,
523                                        event_count,
524                                        total_event_count,
525                                        include_count=True)
526
527    # pylint: disable=no-self-use
528    def generate_annotation(self, event_name: str, event_count: int,
529                            total_event_count: int, include_count: bool) -> str:
530        """Generates an annotation.
531
532        Args:
533            event_name (str): An event name.
534            event_count (int): An event count.
535            total_event_count (int): A total event count.
536            include_count (bool): If True, includes the event count alongside
537                the event name and ratio.
538
539        Returns:
540            str: An annotation.
541        """
542        ratio = 0.0
543        if total_event_count:
544            ratio = event_count / total_event_count
545
546        if include_count:
547            return f'{event_name}: {event_count} ({ratio:.2%})'
548
549        return f'{event_name}: {ratio:06.2%}'
550