1"""Print a summary of specialization stats for all files in the 2default stats folders. 3""" 4 5from __future__ import annotations 6 7# NOTE: Bytecode introspection modules (opcode, dis, etc.) should only 8# be imported when loading a single dataset. When comparing datasets, it 9# could get it wrong, leading to subtle errors. 10 11import argparse 12import collections 13from collections.abc import KeysView 14from dataclasses import dataclass 15from datetime import date 16import enum 17import functools 18import itertools 19import json 20from operator import itemgetter 21import os 22from pathlib import Path 23import re 24import sys 25import textwrap 26from typing import Any, Callable, TextIO, TypeAlias 27 28 29RawData: TypeAlias = dict[str, Any] 30Rows: TypeAlias = list[tuple] 31Columns: TypeAlias = tuple[str, ...] 32RowCalculator: TypeAlias = Callable[["Stats"], Rows] 33 34 35# TODO: Check for parity 36 37 38if os.name == "nt": 39 DEFAULT_DIR = "c:\\temp\\py_stats\\" 40else: 41 DEFAULT_DIR = "/tmp/py_stats/" 42 43 44SOURCE_DIR = Path(__file__).parents[2] 45 46 47TOTAL = "specialization.hit", "specialization.miss", "execution_count" 48 49 50def pretty(name: str) -> str: 51 return name.replace("_", " ").lower() 52 53 54def _load_metadata_from_source(): 55 def get_defines(filepath: Path, prefix: str = "SPEC_FAIL"): 56 with open(SOURCE_DIR / filepath) as spec_src: 57 defines = collections.defaultdict(list) 58 start = "#define " + prefix + "_" 59 for line in spec_src: 60 line = line.strip() 61 if not line.startswith(start): 62 continue 63 line = line[len(start) :] 64 name, val = line.split() 65 defines[int(val.strip())].append(name.strip()) 66 return defines 67 68 import opcode 69 70 return { 71 "_specialized_instructions": [ 72 op for op in opcode._specialized_opmap.keys() if "__" not in op # type: ignore 73 ], 74 "_stats_defines": get_defines( 75 Path("Include") / "cpython" / "pystats.h", "EVAL_CALL" 76 ), 77 "_defines": get_defines(Path("Python") / "specialize.c"), 78 } 79 80 81def load_raw_data(input: Path) -> RawData: 82 if input.is_file(): 83 with open(input, "r") as fd: 84 data = json.load(fd) 85 86 data["_stats_defines"] = {int(k): v for k, v in data["_stats_defines"].items()} 87 data["_defines"] = {int(k): v for k, v in data["_defines"].items()} 88 89 return data 90 91 elif input.is_dir(): 92 stats = collections.Counter[str]() 93 94 for filename in input.iterdir(): 95 with open(filename) as fd: 96 for line in fd: 97 try: 98 key, value = line.split(":") 99 except ValueError: 100 print( 101 f"Unparsable line: '{line.strip()}' in {filename}", 102 file=sys.stderr, 103 ) 104 continue 105 # Hack to handle older data files where some uops 106 # are missing an underscore prefix in their name 107 if key.startswith("uops[") and key[5:6] != "_": 108 key = "uops[_" + key[5:] 109 stats[key.strip()] += int(value) 110 stats["__nfiles__"] += 1 111 112 data = dict(stats) 113 data.update(_load_metadata_from_source()) 114 return data 115 116 else: 117 raise ValueError(f"{input} is not a file or directory path") 118 119 120def save_raw_data(data: RawData, json_output: TextIO): 121 json.dump(data, json_output) 122 123 124@dataclass(frozen=True) 125class Doc: 126 text: str 127 doc: str 128 129 def markdown(self) -> str: 130 return textwrap.dedent( 131 f""" 132 {self.text} 133 <details> 134 <summary>ⓘ</summary> 135 136 {self.doc} 137 </details> 138 """ 139 ) 140 141 142class Count(int): 143 def markdown(self) -> str: 144 return format(self, ",d") 145 146 147@dataclass(frozen=True) 148class Ratio: 149 num: int 150 den: int | None = None 151 percentage: bool = True 152 153 def __float__(self): 154 if self.den == 0: 155 return 0.0 156 elif self.den is None: 157 return self.num 158 else: 159 return self.num / self.den 160 161 def markdown(self) -> str: 162 if self.den is None: 163 return "" 164 elif self.den == 0: 165 if self.num != 0: 166 return f"{self.num:,} / 0 !!" 167 return "" 168 elif self.percentage: 169 return f"{self.num / self.den:,.01%}" 170 else: 171 return f"{self.num / self.den:,.02f}" 172 173 174class DiffRatio(Ratio): 175 def __init__(self, base: int | str, head: int | str): 176 if isinstance(base, str) or isinstance(head, str): 177 super().__init__(0, 0) 178 else: 179 super().__init__(head - base, base) 180 181 182class OpcodeStats: 183 """ 184 Manages the data related to specific set of opcodes, e.g. tier1 (with prefix 185 "opcode") or tier2 (with prefix "uops"). 186 """ 187 188 def __init__(self, data: dict[str, Any], defines, specialized_instructions): 189 self._data = data 190 self._defines = defines 191 self._specialized_instructions = specialized_instructions 192 193 def get_opcode_names(self) -> KeysView[str]: 194 return self._data.keys() 195 196 def get_pair_counts(self) -> dict[tuple[str, str], int]: 197 pair_counts = {} 198 for name_i, opcode_stat in self._data.items(): 199 for key, value in opcode_stat.items(): 200 if value and key.startswith("pair_count"): 201 name_j, _, _ = key[len("pair_count") + 1 :].partition("]") 202 pair_counts[(name_i, name_j)] = value 203 return pair_counts 204 205 def get_total_execution_count(self) -> int: 206 return sum(x.get("execution_count", 0) for x in self._data.values()) 207 208 def get_execution_counts(self) -> dict[str, tuple[int, int]]: 209 counts = {} 210 for name, opcode_stat in self._data.items(): 211 if "execution_count" in opcode_stat: 212 count = opcode_stat["execution_count"] 213 miss = 0 214 if "specializable" not in opcode_stat: 215 miss = opcode_stat.get("specialization.miss", 0) 216 counts[name] = (count, miss) 217 return counts 218 219 @functools.cache 220 def _get_pred_succ( 221 self, 222 ) -> tuple[dict[str, collections.Counter], dict[str, collections.Counter]]: 223 pair_counts = self.get_pair_counts() 224 225 predecessors: dict[str, collections.Counter] = collections.defaultdict( 226 collections.Counter 227 ) 228 successors: dict[str, collections.Counter] = collections.defaultdict( 229 collections.Counter 230 ) 231 for (first, second), count in pair_counts.items(): 232 if count: 233 predecessors[second][first] = count 234 successors[first][second] = count 235 236 return predecessors, successors 237 238 def get_predecessors(self, opcode: str) -> collections.Counter[str]: 239 return self._get_pred_succ()[0][opcode] 240 241 def get_successors(self, opcode: str) -> collections.Counter[str]: 242 return self._get_pred_succ()[1][opcode] 243 244 def _get_stats_for_opcode(self, opcode: str) -> dict[str, int]: 245 return self._data[opcode] 246 247 def get_specialization_total(self, opcode: str) -> int: 248 family_stats = self._get_stats_for_opcode(opcode) 249 return sum(family_stats.get(kind, 0) for kind in TOTAL) 250 251 def get_specialization_counts(self, opcode: str) -> dict[str, int]: 252 family_stats = self._get_stats_for_opcode(opcode) 253 254 result = {} 255 for key, value in sorted(family_stats.items()): 256 if key.startswith("specialization."): 257 label = key[len("specialization.") :] 258 if label in ("success", "failure") or label.startswith("failure_kinds"): 259 continue 260 elif key in ( 261 "execution_count", 262 "specializable", 263 ) or key.startswith("pair"): 264 continue 265 else: 266 label = key 267 result[label] = value 268 269 return result 270 271 def get_specialization_success_failure(self, opcode: str) -> dict[str, int]: 272 family_stats = self._get_stats_for_opcode(opcode) 273 result = {} 274 for key in ("specialization.success", "specialization.failure"): 275 label = key[len("specialization.") :] 276 val = family_stats.get(key, 0) 277 result[label] = val 278 return result 279 280 def get_specialization_failure_total(self, opcode: str) -> int: 281 return self._get_stats_for_opcode(opcode).get("specialization.failure", 0) 282 283 def get_specialization_failure_kinds(self, opcode: str) -> dict[str, int]: 284 def kind_to_text(kind: int, opcode: str): 285 if kind <= 8: 286 return pretty(self._defines[kind][0]) 287 if opcode == "LOAD_SUPER_ATTR": 288 opcode = "SUPER" 289 elif opcode.endswith("ATTR"): 290 opcode = "ATTR" 291 elif opcode in ("FOR_ITER", "SEND"): 292 opcode = "ITER" 293 elif opcode.endswith("SUBSCR"): 294 opcode = "SUBSCR" 295 for name in self._defines[kind]: 296 if name.startswith(opcode): 297 return pretty(name[len(opcode) + 1 :]) 298 return "kind " + str(kind) 299 300 family_stats = self._get_stats_for_opcode(opcode) 301 failure_kinds = [0] * 40 302 for key in family_stats: 303 if not key.startswith("specialization.failure_kind"): 304 continue 305 index = int(key[:-1].split("[")[1]) 306 failure_kinds[index] = family_stats[key] 307 return { 308 kind_to_text(index, opcode): value 309 for (index, value) in enumerate(failure_kinds) 310 if value 311 } 312 313 def is_specializable(self, opcode: str) -> bool: 314 return "specializable" in self._get_stats_for_opcode(opcode) 315 316 def get_specialized_total_counts(self) -> tuple[int, int, int]: 317 basic = 0 318 specialized_hits = 0 319 specialized_misses = 0 320 not_specialized = 0 321 for opcode, opcode_stat in self._data.items(): 322 if "execution_count" not in opcode_stat: 323 continue 324 count = opcode_stat["execution_count"] 325 if "specializable" in opcode_stat: 326 not_specialized += count 327 elif opcode in self._specialized_instructions: 328 miss = opcode_stat.get("specialization.miss", 0) 329 specialized_hits += count - miss 330 specialized_misses += miss 331 else: 332 basic += count 333 return basic, specialized_hits, specialized_misses, not_specialized 334 335 def get_deferred_counts(self) -> dict[str, int]: 336 return { 337 opcode: opcode_stat.get("specialization.deferred", 0) 338 for opcode, opcode_stat in self._data.items() 339 if opcode != "RESUME" 340 } 341 342 def get_misses_counts(self) -> dict[str, int]: 343 return { 344 opcode: opcode_stat.get("specialization.miss", 0) 345 for opcode, opcode_stat in self._data.items() 346 if not self.is_specializable(opcode) 347 } 348 349 def get_opcode_counts(self) -> dict[str, int]: 350 counts = {} 351 for opcode, entry in self._data.items(): 352 count = entry.get("count", 0) 353 if count: 354 counts[opcode] = count 355 return counts 356 357 358class Stats: 359 def __init__(self, data: RawData): 360 self._data = data 361 362 def get(self, key: str) -> int: 363 return self._data.get(key, 0) 364 365 @functools.cache 366 def get_opcode_stats(self, prefix: str) -> OpcodeStats: 367 opcode_stats = collections.defaultdict[str, dict](dict) 368 for key, value in self._data.items(): 369 if not key.startswith(prefix): 370 continue 371 name, _, rest = key[len(prefix) + 1 :].partition("]") 372 opcode_stats[name][rest.strip(".")] = value 373 return OpcodeStats( 374 opcode_stats, 375 self._data["_defines"], 376 self._data["_specialized_instructions"], 377 ) 378 379 def get_call_stats(self) -> dict[str, int]: 380 defines = self._data["_stats_defines"] 381 result = {} 382 for key, value in sorted(self._data.items()): 383 if "Calls to" in key: 384 result[key] = value 385 elif key.startswith("Calls "): 386 name, index = key[:-1].split("[") 387 label = f"{name} ({pretty(defines[int(index)][0])})" 388 result[label] = value 389 390 for key, value in sorted(self._data.items()): 391 if key.startswith("Frame"): 392 result[key] = value 393 394 return result 395 396 def get_object_stats(self) -> dict[str, tuple[int, int]]: 397 total_materializations = self._data.get("Object inline values", 0) 398 total_allocations = self._data.get("Object allocations", 0) + self._data.get( 399 "Object allocations from freelist", 0 400 ) 401 total_increfs = self._data.get( 402 "Object interpreter increfs", 0 403 ) + self._data.get("Object increfs", 0) 404 total_decrefs = self._data.get( 405 "Object interpreter decrefs", 0 406 ) + self._data.get("Object decrefs", 0) 407 408 result = {} 409 for key, value in self._data.items(): 410 if key.startswith("Object"): 411 if "materialize" in key: 412 den = total_materializations 413 elif "allocations" in key: 414 den = total_allocations 415 elif "increfs" in key: 416 den = total_increfs 417 elif "decrefs" in key: 418 den = total_decrefs 419 else: 420 den = None 421 label = key[6:].strip() 422 label = label[0].upper() + label[1:] 423 result[label] = (value, den) 424 return result 425 426 def get_gc_stats(self) -> list[dict[str, int]]: 427 gc_stats: list[dict[str, int]] = [] 428 for key, value in self._data.items(): 429 if not key.startswith("GC"): 430 continue 431 n, _, rest = key[3:].partition("]") 432 name = rest.strip() 433 gen_n = int(n) 434 while len(gc_stats) <= gen_n: 435 gc_stats.append({}) 436 gc_stats[gen_n][name] = value 437 return gc_stats 438 439 def get_optimization_stats(self) -> dict[str, tuple[int, int | None]]: 440 if "Optimization attempts" not in self._data: 441 return {} 442 443 attempts = self._data["Optimization attempts"] 444 created = self._data["Optimization traces created"] 445 executed = self._data["Optimization traces executed"] 446 uops = self._data["Optimization uops executed"] 447 trace_stack_overflow = self._data["Optimization trace stack overflow"] 448 trace_stack_underflow = self._data["Optimization trace stack underflow"] 449 trace_too_long = self._data["Optimization trace too long"] 450 trace_too_short = self._data["Optimization trace too short"] 451 inner_loop = self._data["Optimization inner loop"] 452 recursive_call = self._data["Optimization recursive call"] 453 low_confidence = self._data["Optimization low confidence"] 454 executors_invalidated = self._data["Executors invalidated"] 455 456 return { 457 Doc( 458 "Optimization attempts", 459 "The number of times a potential trace is identified. Specifically, this " 460 "occurs in the JUMP BACKWARD instruction when the counter reaches a " 461 "threshold.", 462 ): (attempts, None), 463 Doc( 464 "Traces created", "The number of traces that were successfully created." 465 ): (created, attempts), 466 Doc( 467 "Trace stack overflow", 468 "A trace is truncated because it would require more than 5 stack frames.", 469 ): (trace_stack_overflow, attempts), 470 Doc( 471 "Trace stack underflow", 472 "A potential trace is abandoned because it pops more frames than it pushes.", 473 ): (trace_stack_underflow, attempts), 474 Doc( 475 "Trace too long", 476 "A trace is truncated because it is longer than the instruction buffer.", 477 ): (trace_too_long, attempts), 478 Doc( 479 "Trace too short", 480 "A potential trace is abandoned because it it too short.", 481 ): (trace_too_short, attempts), 482 Doc( 483 "Inner loop found", "A trace is truncated because it has an inner loop" 484 ): (inner_loop, attempts), 485 Doc( 486 "Recursive call", 487 "A trace is truncated because it has a recursive call.", 488 ): (recursive_call, attempts), 489 Doc( 490 "Low confidence", 491 "A trace is abandoned because the likelihood of the jump to top being taken " 492 "is too low.", 493 ): (low_confidence, attempts), 494 Doc( 495 "Executors invalidated", 496 "The number of executors that were invalidated due to watched " 497 "dictionary changes.", 498 ): (executors_invalidated, created), 499 Doc("Traces executed", "The number of traces that were executed"): ( 500 executed, 501 None, 502 ), 503 Doc( 504 "Uops executed", 505 "The total number of uops (micro-operations) that were executed", 506 ): ( 507 uops, 508 executed, 509 ), 510 } 511 512 def get_optimizer_stats(self) -> dict[str, tuple[int, int | None]]: 513 attempts = self._data["Optimization optimizer attempts"] 514 successes = self._data["Optimization optimizer successes"] 515 no_memory = self._data["Optimization optimizer failure no memory"] 516 builtins_changed = self._data["Optimizer remove globals builtins changed"] 517 incorrect_keys = self._data["Optimizer remove globals incorrect keys"] 518 519 return { 520 Doc( 521 "Optimizer attempts", 522 "The number of times the trace optimizer (_Py_uop_analyze_and_optimize) was run.", 523 ): (attempts, None), 524 Doc( 525 "Optimizer successes", 526 "The number of traces that were successfully optimized.", 527 ): (successes, attempts), 528 Doc( 529 "Optimizer no memory", 530 "The number of optimizations that failed due to no memory.", 531 ): (no_memory, attempts), 532 Doc( 533 "Remove globals builtins changed", 534 "The builtins changed during optimization", 535 ): (builtins_changed, attempts), 536 Doc( 537 "Remove globals incorrect keys", 538 "The keys in the globals dictionary aren't what was expected", 539 ): (incorrect_keys, attempts), 540 } 541 542 def get_histogram(self, prefix: str) -> list[tuple[int, int]]: 543 rows = [] 544 for k, v in self._data.items(): 545 match = re.match(f"{prefix}\\[([0-9]+)\\]", k) 546 if match is not None: 547 entry = int(match.groups()[0]) 548 rows.append((entry, v)) 549 rows.sort() 550 return rows 551 552 def get_rare_events(self) -> list[tuple[str, int]]: 553 prefix = "Rare event " 554 return [ 555 (key[len(prefix) + 1 : -1].replace("_", " "), val) 556 for key, val in self._data.items() 557 if key.startswith(prefix) 558 ] 559 560 561class JoinMode(enum.Enum): 562 # Join using the first column as a key 563 SIMPLE = 0 564 # Join using the first column as a key, and indicate the change in the 565 # second column of each input table as a new column 566 CHANGE = 1 567 # Join using the first column as a key, indicating the change in the second 568 # column of each input table as a new column, and omit all other columns 569 CHANGE_ONE_COLUMN = 2 570 # Join using the first column as a key, and indicate the change as a new 571 # column, but don't sort by the amount of change. 572 CHANGE_NO_SORT = 3 573 574 575class Table: 576 """ 577 A Table defines how to convert a set of Stats into a specific set of rows 578 displaying some aspect of the data. 579 """ 580 581 def __init__( 582 self, 583 column_names: Columns, 584 calc_rows: RowCalculator, 585 join_mode: JoinMode = JoinMode.SIMPLE, 586 ): 587 self.columns = column_names 588 self.calc_rows = calc_rows 589 self.join_mode = join_mode 590 591 def join_row(self, key: str, row_a: tuple, row_b: tuple) -> tuple: 592 match self.join_mode: 593 case JoinMode.SIMPLE: 594 return (key, *row_a, *row_b) 595 case JoinMode.CHANGE | JoinMode.CHANGE_NO_SORT: 596 return (key, *row_a, *row_b, DiffRatio(row_a[0], row_b[0])) 597 case JoinMode.CHANGE_ONE_COLUMN: 598 return (key, row_a[0], row_b[0], DiffRatio(row_a[0], row_b[0])) 599 600 def join_columns(self, columns: Columns) -> Columns: 601 match self.join_mode: 602 case JoinMode.SIMPLE: 603 return ( 604 columns[0], 605 *("Base " + x for x in columns[1:]), 606 *("Head " + x for x in columns[1:]), 607 ) 608 case JoinMode.CHANGE | JoinMode.CHANGE_NO_SORT: 609 return ( 610 columns[0], 611 *("Base " + x for x in columns[1:]), 612 *("Head " + x for x in columns[1:]), 613 ) + ("Change:",) 614 case JoinMode.CHANGE_ONE_COLUMN: 615 return ( 616 columns[0], 617 "Base " + columns[1], 618 "Head " + columns[1], 619 "Change:", 620 ) 621 622 def join_tables(self, rows_a: Rows, rows_b: Rows) -> tuple[Columns, Rows]: 623 ncols = len(self.columns) 624 625 default = ("",) * (ncols - 1) 626 data_a = {x[0]: x[1:] for x in rows_a} 627 data_b = {x[0]: x[1:] for x in rows_b} 628 629 if len(data_a) != len(rows_a) or len(data_b) != len(rows_b): 630 raise ValueError("Duplicate keys") 631 632 # To preserve ordering, use A's keys as is and then add any in B that 633 # aren't in A 634 keys = list(data_a.keys()) + [k for k in data_b.keys() if k not in data_a] 635 rows = [ 636 self.join_row(k, data_a.get(k, default), data_b.get(k, default)) 637 for k in keys 638 ] 639 if self.join_mode in (JoinMode.CHANGE, JoinMode.CHANGE_ONE_COLUMN): 640 rows.sort(key=lambda row: abs(float(row[-1])), reverse=True) 641 642 columns = self.join_columns(self.columns) 643 return columns, rows 644 645 def get_table( 646 self, base_stats: Stats, head_stats: Stats | None = None 647 ) -> tuple[Columns, Rows]: 648 if head_stats is None: 649 rows = self.calc_rows(base_stats) 650 return self.columns, rows 651 else: 652 rows_a = self.calc_rows(base_stats) 653 rows_b = self.calc_rows(head_stats) 654 cols, rows = self.join_tables(rows_a, rows_b) 655 return cols, rows 656 657 658class Section: 659 """ 660 A Section defines a section of the output document. 661 """ 662 663 def __init__( 664 self, 665 title: str = "", 666 summary: str = "", 667 part_iter=None, 668 *, 669 comparative: bool = True, 670 doc: str = "", 671 ): 672 self.title = title 673 if not summary: 674 self.summary = title.lower() 675 else: 676 self.summary = summary 677 self.doc = textwrap.dedent(doc) 678 if part_iter is None: 679 part_iter = [] 680 if isinstance(part_iter, list): 681 682 def iter_parts(base_stats: Stats, head_stats: Stats | None): 683 yield from part_iter 684 685 self.part_iter = iter_parts 686 else: 687 self.part_iter = part_iter 688 self.comparative = comparative 689 690 691def calc_execution_count_table(prefix: str) -> RowCalculator: 692 def calc(stats: Stats) -> Rows: 693 opcode_stats = stats.get_opcode_stats(prefix) 694 counts = opcode_stats.get_execution_counts() 695 total = opcode_stats.get_total_execution_count() 696 cumulative = 0 697 rows: Rows = [] 698 for opcode, (count, miss) in sorted( 699 counts.items(), key=itemgetter(1), reverse=True 700 ): 701 cumulative += count 702 if miss: 703 miss_val = Ratio(miss, count) 704 else: 705 miss_val = None 706 rows.append( 707 ( 708 opcode, 709 Count(count), 710 Ratio(count, total), 711 Ratio(cumulative, total), 712 miss_val, 713 ) 714 ) 715 return rows 716 717 return calc 718 719 720def execution_count_section() -> Section: 721 return Section( 722 "Execution counts", 723 "Execution counts for Tier 1 instructions.", 724 [ 725 Table( 726 ("Name", "Count:", "Self:", "Cumulative:", "Miss ratio:"), 727 calc_execution_count_table("opcode"), 728 join_mode=JoinMode.CHANGE_ONE_COLUMN, 729 ) 730 ], 731 doc=""" 732 The "miss ratio" column shows the percentage of times the instruction 733 executed that it deoptimized. When this happens, the base unspecialized 734 instruction is not counted. 735 """, 736 ) 737 738 739def pair_count_section(prefix: str, title=None) -> Section: 740 def calc_pair_count_table(stats: Stats) -> Rows: 741 opcode_stats = stats.get_opcode_stats(prefix) 742 pair_counts = opcode_stats.get_pair_counts() 743 total = opcode_stats.get_total_execution_count() 744 745 cumulative = 0 746 rows: Rows = [] 747 for (opcode_i, opcode_j), count in itertools.islice( 748 sorted(pair_counts.items(), key=itemgetter(1), reverse=True), 100 749 ): 750 cumulative += count 751 rows.append( 752 ( 753 f"{opcode_i} {opcode_j}", 754 Count(count), 755 Ratio(count, total), 756 Ratio(cumulative, total), 757 ) 758 ) 759 return rows 760 761 return Section( 762 "Pair counts", 763 f"Pair counts for top 100 {title if title else prefix} pairs", 764 [ 765 Table( 766 ("Pair", "Count:", "Self:", "Cumulative:"), 767 calc_pair_count_table, 768 ) 769 ], 770 comparative=False, 771 doc=""" 772 Pairs of specialized operations that deoptimize and are then followed by 773 the corresponding unspecialized instruction are not counted as pairs. 774 """, 775 ) 776 777 778def pre_succ_pairs_section() -> Section: 779 def iter_pre_succ_pairs_tables(base_stats: Stats, head_stats: Stats | None = None): 780 assert head_stats is None 781 782 opcode_stats = base_stats.get_opcode_stats("opcode") 783 784 for opcode in opcode_stats.get_opcode_names(): 785 predecessors = opcode_stats.get_predecessors(opcode) 786 successors = opcode_stats.get_successors(opcode) 787 predecessors_total = predecessors.total() 788 successors_total = successors.total() 789 if predecessors_total == 0 and successors_total == 0: 790 continue 791 pred_rows = [ 792 (pred, Count(count), Ratio(count, predecessors_total)) 793 for (pred, count) in predecessors.most_common(5) 794 ] 795 succ_rows = [ 796 (succ, Count(count), Ratio(count, successors_total)) 797 for (succ, count) in successors.most_common(5) 798 ] 799 800 yield Section( 801 opcode, 802 f"Successors and predecessors for {opcode}", 803 [ 804 Table( 805 ("Predecessors", "Count:", "Percentage:"), 806 lambda *_: pred_rows, # type: ignore 807 ), 808 Table( 809 ("Successors", "Count:", "Percentage:"), 810 lambda *_: succ_rows, # type: ignore 811 ), 812 ], 813 ) 814 815 return Section( 816 "Predecessor/Successor Pairs", 817 "Top 5 predecessors and successors of each Tier 1 opcode.", 818 iter_pre_succ_pairs_tables, 819 comparative=False, 820 doc=""" 821 This does not include the unspecialized instructions that occur after a 822 specialized instruction deoptimizes. 823 """, 824 ) 825 826 827def specialization_section() -> Section: 828 def calc_specialization_table(opcode: str) -> RowCalculator: 829 def calc(stats: Stats) -> Rows: 830 DOCS = { 831 "deferred": 'Lists the number of "deferred" (i.e. not specialized) instructions executed.', 832 "hit": "Specialized instructions that complete.", 833 "miss": "Specialized instructions that deopt.", 834 "deopt": "Specialized instructions that deopt.", 835 } 836 837 opcode_stats = stats.get_opcode_stats("opcode") 838 total = opcode_stats.get_specialization_total(opcode) 839 specialization_counts = opcode_stats.get_specialization_counts(opcode) 840 841 return [ 842 ( 843 Doc(label, DOCS[label]), 844 Count(count), 845 Ratio(count, total), 846 ) 847 for label, count in specialization_counts.items() 848 ] 849 850 return calc 851 852 def calc_specialization_success_failure_table(name: str) -> RowCalculator: 853 def calc(stats: Stats) -> Rows: 854 values = stats.get_opcode_stats( 855 "opcode" 856 ).get_specialization_success_failure(name) 857 total = sum(values.values()) 858 if total: 859 return [ 860 (label.capitalize(), Count(val), Ratio(val, total)) 861 for label, val in values.items() 862 ] 863 else: 864 return [] 865 866 return calc 867 868 def calc_specialization_failure_kind_table(name: str) -> RowCalculator: 869 def calc(stats: Stats) -> Rows: 870 opcode_stats = stats.get_opcode_stats("opcode") 871 failures = opcode_stats.get_specialization_failure_kinds(name) 872 total = opcode_stats.get_specialization_failure_total(name) 873 874 return sorted( 875 [ 876 (label, Count(value), Ratio(value, total)) 877 for label, value in failures.items() 878 if value 879 ], 880 key=itemgetter(1), 881 reverse=True, 882 ) 883 884 return calc 885 886 def iter_specialization_tables(base_stats: Stats, head_stats: Stats | None = None): 887 opcode_base_stats = base_stats.get_opcode_stats("opcode") 888 names = opcode_base_stats.get_opcode_names() 889 if head_stats is not None: 890 opcode_head_stats = head_stats.get_opcode_stats("opcode") 891 names &= opcode_head_stats.get_opcode_names() # type: ignore 892 else: 893 opcode_head_stats = None 894 895 for opcode in sorted(names): 896 if not opcode_base_stats.is_specializable(opcode): 897 continue 898 if opcode_base_stats.get_specialization_total(opcode) == 0 and ( 899 opcode_head_stats is None 900 or opcode_head_stats.get_specialization_total(opcode) == 0 901 ): 902 continue 903 yield Section( 904 opcode, 905 f"specialization stats for {opcode} family", 906 [ 907 Table( 908 ("Kind", "Count:", "Ratio:"), 909 calc_specialization_table(opcode), 910 JoinMode.CHANGE, 911 ), 912 Table( 913 ("Success", "Count:", "Ratio:"), 914 calc_specialization_success_failure_table(opcode), 915 JoinMode.CHANGE, 916 ), 917 Table( 918 ("Failure kind", "Count:", "Ratio:"), 919 calc_specialization_failure_kind_table(opcode), 920 JoinMode.CHANGE, 921 ), 922 ], 923 ) 924 925 return Section( 926 "Specialization stats", 927 "Specialization stats by family", 928 iter_specialization_tables, 929 ) 930 931 932def specialization_effectiveness_section() -> Section: 933 def calc_specialization_effectiveness_table(stats: Stats) -> Rows: 934 opcode_stats = stats.get_opcode_stats("opcode") 935 total = opcode_stats.get_total_execution_count() 936 937 ( 938 basic, 939 specialized_hits, 940 specialized_misses, 941 not_specialized, 942 ) = opcode_stats.get_specialized_total_counts() 943 944 return [ 945 ( 946 Doc( 947 "Basic", 948 "Instructions that are not and cannot be specialized, e.g. `LOAD_FAST`.", 949 ), 950 Count(basic), 951 Ratio(basic, total), 952 ), 953 ( 954 Doc( 955 "Not specialized", 956 "Instructions that could be specialized but aren't, e.g. `LOAD_ATTR`, `BINARY_SLICE`.", 957 ), 958 Count(not_specialized), 959 Ratio(not_specialized, total), 960 ), 961 ( 962 Doc( 963 "Specialized hits", 964 "Specialized instructions, e.g. `LOAD_ATTR_MODULE` that complete.", 965 ), 966 Count(specialized_hits), 967 Ratio(specialized_hits, total), 968 ), 969 ( 970 Doc( 971 "Specialized misses", 972 "Specialized instructions, e.g. `LOAD_ATTR_MODULE` that deopt.", 973 ), 974 Count(specialized_misses), 975 Ratio(specialized_misses, total), 976 ), 977 ] 978 979 def calc_deferred_by_table(stats: Stats) -> Rows: 980 opcode_stats = stats.get_opcode_stats("opcode") 981 deferred_counts = opcode_stats.get_deferred_counts() 982 total = sum(deferred_counts.values()) 983 if total == 0: 984 return [] 985 986 return [ 987 (name, Count(value), Ratio(value, total)) 988 for name, value in sorted( 989 deferred_counts.items(), key=itemgetter(1), reverse=True 990 )[:10] 991 ] 992 993 def calc_misses_by_table(stats: Stats) -> Rows: 994 opcode_stats = stats.get_opcode_stats("opcode") 995 misses_counts = opcode_stats.get_misses_counts() 996 total = sum(misses_counts.values()) 997 if total == 0: 998 return [] 999 1000 return [ 1001 (name, Count(value), Ratio(value, total)) 1002 for name, value in sorted( 1003 misses_counts.items(), key=itemgetter(1), reverse=True 1004 )[:10] 1005 ] 1006 1007 return Section( 1008 "Specialization effectiveness", 1009 "", 1010 [ 1011 Table( 1012 ("Instructions", "Count:", "Ratio:"), 1013 calc_specialization_effectiveness_table, 1014 JoinMode.CHANGE, 1015 ), 1016 Section( 1017 "Deferred by instruction", 1018 "Breakdown of deferred (not specialized) instruction counts by family", 1019 [ 1020 Table( 1021 ("Name", "Count:", "Ratio:"), 1022 calc_deferred_by_table, 1023 JoinMode.CHANGE, 1024 ) 1025 ], 1026 ), 1027 Section( 1028 "Misses by instruction", 1029 "Breakdown of misses (specialized deopts) instruction counts by family", 1030 [ 1031 Table( 1032 ("Name", "Count:", "Ratio:"), 1033 calc_misses_by_table, 1034 JoinMode.CHANGE, 1035 ) 1036 ], 1037 ), 1038 ], 1039 doc=""" 1040 All entries are execution counts. Should add up to the total number of 1041 Tier 1 instructions executed. 1042 """, 1043 ) 1044 1045 1046def call_stats_section() -> Section: 1047 def calc_call_stats_table(stats: Stats) -> Rows: 1048 call_stats = stats.get_call_stats() 1049 total = sum(v for k, v in call_stats.items() if "Calls to" in k) 1050 return [ 1051 (key, Count(value), Ratio(value, total)) 1052 for key, value in call_stats.items() 1053 ] 1054 1055 return Section( 1056 "Call stats", 1057 "Inlined calls and frame stats", 1058 [ 1059 Table( 1060 ("", "Count:", "Ratio:"), 1061 calc_call_stats_table, 1062 JoinMode.CHANGE, 1063 ) 1064 ], 1065 doc=""" 1066 This shows what fraction of calls to Python functions are inlined (i.e. 1067 not having a call at the C level) and for those that are not, where the 1068 call comes from. The various categories overlap. 1069 1070 Also includes the count of frame objects created. 1071 """, 1072 ) 1073 1074 1075def object_stats_section() -> Section: 1076 def calc_object_stats_table(stats: Stats) -> Rows: 1077 object_stats = stats.get_object_stats() 1078 return [ 1079 (label, Count(value), Ratio(value, den)) 1080 for label, (value, den) in object_stats.items() 1081 ] 1082 1083 return Section( 1084 "Object stats", 1085 "Allocations, frees and dict materializatons", 1086 [ 1087 Table( 1088 ("", "Count:", "Ratio:"), 1089 calc_object_stats_table, 1090 JoinMode.CHANGE, 1091 ) 1092 ], 1093 doc=""" 1094 Below, "allocations" means "allocations that are not from a freelist". 1095 Total allocations = "Allocations from freelist" + "Allocations". 1096 1097 "Inline values" is the number of values arrays inlined into objects. 1098 1099 The cache hit/miss numbers are for the MRO cache, split into dunder and 1100 other names. 1101 """, 1102 ) 1103 1104 1105def gc_stats_section() -> Section: 1106 def calc_gc_stats(stats: Stats) -> Rows: 1107 gc_stats = stats.get_gc_stats() 1108 1109 return [ 1110 ( 1111 Count(i), 1112 Count(gen["collections"]), 1113 Count(gen["objects collected"]), 1114 Count(gen["object visits"]), 1115 ) 1116 for (i, gen) in enumerate(gc_stats) 1117 ] 1118 1119 return Section( 1120 "GC stats", 1121 "GC collections and effectiveness", 1122 [ 1123 Table( 1124 ("Generation:", "Collections:", "Objects collected:", "Object visits:"), 1125 calc_gc_stats, 1126 ) 1127 ], 1128 doc=""" 1129 Collected/visits gives some measure of efficiency. 1130 """, 1131 ) 1132 1133 1134def optimization_section() -> Section: 1135 def calc_optimization_table(stats: Stats) -> Rows: 1136 optimization_stats = stats.get_optimization_stats() 1137 1138 return [ 1139 ( 1140 label, 1141 Count(value), 1142 Ratio(value, den, percentage=label != "Uops executed"), 1143 ) 1144 for label, (value, den) in optimization_stats.items() 1145 ] 1146 1147 def calc_optimizer_table(stats: Stats) -> Rows: 1148 optimizer_stats = stats.get_optimizer_stats() 1149 1150 return [ 1151 (label, Count(value), Ratio(value, den)) 1152 for label, (value, den) in optimizer_stats.items() 1153 ] 1154 1155 def calc_histogram_table(key: str, den: str) -> RowCalculator: 1156 def calc(stats: Stats) -> Rows: 1157 histogram = stats.get_histogram(key) 1158 denominator = stats.get(den) 1159 1160 rows: Rows = [] 1161 last_non_zero = 0 1162 for k, v in histogram: 1163 if v != 0: 1164 last_non_zero = len(rows) 1165 rows.append( 1166 ( 1167 f"<= {k:,d}", 1168 Count(v), 1169 Ratio(v, denominator), 1170 ) 1171 ) 1172 # Don't include any zero entries at the end 1173 rows = rows[: last_non_zero + 1] 1174 return rows 1175 1176 return calc 1177 1178 def calc_unsupported_opcodes_table(stats: Stats) -> Rows: 1179 unsupported_opcodes = stats.get_opcode_stats("unsupported_opcode") 1180 return sorted( 1181 [ 1182 (opcode, Count(count)) 1183 for opcode, count in unsupported_opcodes.get_opcode_counts().items() 1184 ], 1185 key=itemgetter(1), 1186 reverse=True, 1187 ) 1188 1189 def calc_error_in_opcodes_table(stats: Stats) -> Rows: 1190 error_in_opcodes = stats.get_opcode_stats("error_in_opcode") 1191 return sorted( 1192 [ 1193 (opcode, Count(count)) 1194 for opcode, count in error_in_opcodes.get_opcode_counts().items() 1195 ], 1196 key=itemgetter(1), 1197 reverse=True, 1198 ) 1199 1200 def iter_optimization_tables(base_stats: Stats, head_stats: Stats | None = None): 1201 if not base_stats.get_optimization_stats() or ( 1202 head_stats is not None and not head_stats.get_optimization_stats() 1203 ): 1204 return 1205 1206 yield Table(("", "Count:", "Ratio:"), calc_optimization_table, JoinMode.CHANGE) 1207 yield Table(("", "Count:", "Ratio:"), calc_optimizer_table, JoinMode.CHANGE) 1208 for name, den in [ 1209 ("Trace length", "Optimization traces created"), 1210 ("Optimized trace length", "Optimization traces created"), 1211 ("Trace run length", "Optimization traces executed"), 1212 ]: 1213 yield Section( 1214 f"{name} histogram", 1215 "", 1216 [ 1217 Table( 1218 ("Range", "Count:", "Ratio:"), 1219 calc_histogram_table(name, den), 1220 JoinMode.CHANGE_NO_SORT, 1221 ) 1222 ], 1223 ) 1224 yield Section( 1225 "Uop execution stats", 1226 "", 1227 [ 1228 Table( 1229 ("Name", "Count:", "Self:", "Cumulative:", "Miss ratio:"), 1230 calc_execution_count_table("uops"), 1231 JoinMode.CHANGE_ONE_COLUMN, 1232 ) 1233 ], 1234 ) 1235 yield pair_count_section(prefix="uop", title="Non-JIT uop") 1236 yield Section( 1237 "Unsupported opcodes", 1238 "", 1239 [ 1240 Table( 1241 ("Opcode", "Count:"), 1242 calc_unsupported_opcodes_table, 1243 JoinMode.CHANGE, 1244 ) 1245 ], 1246 ) 1247 yield Section( 1248 "Optimizer errored out with opcode", 1249 "Optimization stopped after encountering this opcode", 1250 [Table(("Opcode", "Count:"), calc_error_in_opcodes_table, JoinMode.CHANGE)], 1251 ) 1252 1253 return Section( 1254 "Optimization (Tier 2) stats", 1255 "statistics about the Tier 2 optimizer", 1256 iter_optimization_tables, 1257 ) 1258 1259 1260def rare_event_section() -> Section: 1261 def calc_rare_event_table(stats: Stats) -> Table: 1262 DOCS = { 1263 "set class": "Setting an object's class, `obj.__class__ = ...`", 1264 "set bases": "Setting the bases of a class, `cls.__bases__ = ...`", 1265 "set eval frame func": ( 1266 "Setting the PEP 523 frame eval function " 1267 "`_PyInterpreterState_SetFrameEvalFunc()`" 1268 ), 1269 "builtin dict": "Modifying the builtins, `__builtins__.__dict__[var] = ...`", 1270 "func modification": "Modifying a function, e.g. `func.__defaults__ = ...`, etc.", 1271 "watched dict modification": "A watched dict has been modified", 1272 "watched globals modification": "A watched `globals()` dict has been modified", 1273 } 1274 return [(Doc(x, DOCS[x]), Count(y)) for x, y in stats.get_rare_events()] 1275 1276 return Section( 1277 "Rare events", 1278 "Counts of rare/unlikely events", 1279 [Table(("Event", "Count:"), calc_rare_event_table, JoinMode.CHANGE)], 1280 ) 1281 1282 1283def meta_stats_section() -> Section: 1284 def calc_rows(stats: Stats) -> Rows: 1285 return [("Number of data files", Count(stats.get("__nfiles__")))] 1286 1287 return Section( 1288 "Meta stats", 1289 "Meta statistics", 1290 [Table(("", "Count:"), calc_rows, JoinMode.CHANGE)], 1291 ) 1292 1293 1294LAYOUT = [ 1295 execution_count_section(), 1296 pair_count_section("opcode"), 1297 pre_succ_pairs_section(), 1298 specialization_section(), 1299 specialization_effectiveness_section(), 1300 call_stats_section(), 1301 object_stats_section(), 1302 gc_stats_section(), 1303 optimization_section(), 1304 rare_event_section(), 1305 meta_stats_section(), 1306] 1307 1308 1309def output_markdown( 1310 out: TextIO, 1311 obj: Section | Table | list, 1312 base_stats: Stats, 1313 head_stats: Stats | None = None, 1314 level: int = 2, 1315) -> None: 1316 def to_markdown(x): 1317 if hasattr(x, "markdown"): 1318 return x.markdown() 1319 elif isinstance(x, str): 1320 return x 1321 elif x is None: 1322 return "" 1323 else: 1324 raise TypeError(f"Can't convert {x} to markdown") 1325 1326 match obj: 1327 case Section(): 1328 if obj.title: 1329 print("#" * level, obj.title, file=out) 1330 print(file=out) 1331 print("<details>", file=out) 1332 print("<summary>", obj.summary, "</summary>", file=out) 1333 print(file=out) 1334 if obj.doc: 1335 print(obj.doc, file=out) 1336 1337 if head_stats is not None and obj.comparative is False: 1338 print("Not included in comparative output.\n") 1339 else: 1340 for part in obj.part_iter(base_stats, head_stats): 1341 output_markdown(out, part, base_stats, head_stats, level=level + 1) 1342 print(file=out) 1343 if obj.title: 1344 print("</details>", file=out) 1345 print(file=out) 1346 1347 case Table(): 1348 header, rows = obj.get_table(base_stats, head_stats) 1349 if len(rows) == 0: 1350 return 1351 1352 alignments = [] 1353 for item in header: 1354 if item.endswith(":"): 1355 alignments.append("right") 1356 else: 1357 alignments.append("left") 1358 1359 print("<table>", file=out) 1360 print("<thead>", file=out) 1361 print("<tr>", file=out) 1362 for item, align in zip(header, alignments): 1363 if item.endswith(":"): 1364 item = item[:-1] 1365 print(f'<th align="{align}">{item}</th>', file=out) 1366 print("</tr>", file=out) 1367 print("</thead>", file=out) 1368 1369 print("<tbody>", file=out) 1370 for row in rows: 1371 if len(row) != len(header): 1372 raise ValueError( 1373 "Wrong number of elements in row '" + str(row) + "'" 1374 ) 1375 print("<tr>", file=out) 1376 for col, align in zip(row, alignments): 1377 print(f'<td align="{align}">{to_markdown(col)}</td>', file=out) 1378 print("</tr>", file=out) 1379 print("</tbody>", file=out) 1380 1381 print("</table>", file=out) 1382 print(file=out) 1383 1384 case list(): 1385 for part in obj: 1386 output_markdown(out, part, base_stats, head_stats, level=level) 1387 1388 print("---", file=out) 1389 print("Stats gathered on:", date.today(), file=out) 1390 1391 1392def output_stats(inputs: list[Path], json_output=str | None): 1393 match len(inputs): 1394 case 1: 1395 data = load_raw_data(Path(inputs[0])) 1396 if json_output is not None: 1397 with open(json_output, "w", encoding="utf-8") as f: 1398 save_raw_data(data, f) # type: ignore 1399 stats = Stats(data) 1400 output_markdown(sys.stdout, LAYOUT, stats) 1401 case 2: 1402 if json_output is not None: 1403 raise ValueError( 1404 "Can not output to JSON when there are multiple inputs" 1405 ) 1406 base_data = load_raw_data(Path(inputs[0])) 1407 head_data = load_raw_data(Path(inputs[1])) 1408 base_stats = Stats(base_data) 1409 head_stats = Stats(head_data) 1410 output_markdown(sys.stdout, LAYOUT, base_stats, head_stats) 1411 1412 1413def main(): 1414 parser = argparse.ArgumentParser(description="Summarize pystats results") 1415 1416 parser.add_argument( 1417 "inputs", 1418 nargs="*", 1419 type=str, 1420 default=[DEFAULT_DIR], 1421 help=f""" 1422 Input source(s). 1423 For each entry, if a .json file, the output provided by --json-output from a previous run; 1424 if a directory, a directory containing raw pystats .txt files. 1425 If one source is provided, its stats are printed. 1426 If two sources are provided, comparative stats are printed. 1427 Default is {DEFAULT_DIR}. 1428 """, 1429 ) 1430 1431 parser.add_argument( 1432 "--json-output", 1433 nargs="?", 1434 help="Output complete raw results to the given JSON file.", 1435 ) 1436 1437 args = parser.parse_args() 1438 1439 if len(args.inputs) > 2: 1440 raise ValueError("0-2 arguments may be provided.") 1441 1442 output_stats(args.inputs, json_output=args.json_output) 1443 1444 1445if __name__ == "__main__": 1446 main() 1447