• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#! /usr/bin/env python3
2
3"""
4Compare private dirty page counts between imgdiag test runs.
5
6This script compares private dirty page counts in the boot-image object section
7between specified imgdiag runs.
8
9Usage:
10    # Compare multiple imgdiag runs:
11    ./compare_imgdiag_runs.py ./I1234 ./I1235 ./I1236
12
13    # Check all command line flags:
14    ./compare_imgdiag_runs.py --help
15"""
16
17import argparse
18import glob
19import gzip
20import json
21import os
22import pprint
23import statistics
24
25"""
26These thresholds are used to verify that process sets from different runs
27are similar enough for meaningful comparison. Constants are based on
28the imgdiag run data of 2025-01. Update if necessary.
29"""
30
31# There are about 100 apps in imgdiag_top_100 ATP config + more than 100 system processes.
32MIN_COMMON_PROC_COUNT = 200
33# Allow for a relatively small (<20%) mismatch in process sets between runs.
34MAX_MISMATCH_PROC_COUNT = 40
35
36def main():
37  pp = pprint.PrettyPrinter(indent=2)
38  parser = argparse.ArgumentParser(
39    description='Compare private dirty page counts between imgdiag ATP runs',
40    formatter_class=argparse.ArgumentDefaultsHelpFormatter,
41  )
42
43  parser.add_argument(
44    'invocation_dirs',
45    nargs='+',
46    help='Directories with imgdiag output files',
47  )
48
49  parser.add_argument(
50    '--verbose',
51    action=argparse.BooleanOptionalAction,
52    default=False,
53    help='Print out all mismatched processes and more info about dirty page diffs between individual processes',
54  )
55  args = parser.parse_args()
56
57  runs = []
58  for invoc_dir in args.invocation_dirs:
59    res = glob.glob(os.path.join(invoc_dir, '*dirty-page-counts*'))
60    if len(res) != 1:
61      raise ValueError(f"Expected to find exactly one *dirty-page-counts* file in {invoc_dir}, but found: {res}")
62
63    try:
64      if res[0].endswith('.gz'):
65          with gzip.open(res[0], 'rb') as f:
66            contents = json.load(f)
67      else:
68        with open(res[0], 'r') as f:
69          contents = json.load(f)
70    except:
71      print('Failed to read: ', res[0])
72      raise
73    runs.append(contents)
74
75  basename = lambda p: os.path.basename(os.path.normpath(p))
76  invoc_ids = [basename(path) for path in args.invocation_dirs]
77  print('Comparing: ', invoc_ids)
78
79  items = list()
80  for r in runs:
81    items.append({k[:k.rfind('_')]: v for k, v in r.items()})
82
83  proc_names = list(set(i.keys()) for i in items)
84  common_proc_names = set.intersection(*proc_names)
85  mismatch_proc_names = set.union(*proc_names) - common_proc_names
86  print('Common proc count (used in the comparison): ', len(common_proc_names))
87  if len(common_proc_names) < MIN_COMMON_PROC_COUNT:
88      print('WARNING: common processes count is too low.')
89      print(f'Should be at least {MIN_COMMON_PROC_COUNT}.')
90
91  print('Mismatching proc count (not present in all runs): ', len(mismatch_proc_names))
92  if len(mismatch_proc_names) > MAX_MISMATCH_PROC_COUNT:
93      print('WARNING: too many mismatching process names.')
94      print(f'Should be lower than {MAX_MISMATCH_PROC_COUNT}.')
95
96  if args.verbose:
97    print("Mismatching process names:")
98    pp.pprint(mismatch_proc_names)
99
100  dirty_page_sums = list()
101  for r in items:
102    dirty_page_sums.append(sum(r[k] for k in common_proc_names))
103
104  print(f'Total dirty pages:\n{dirty_page_sums}\n')
105
106  mean_dirty_pages = [s / len(common_proc_names) for s in dirty_page_sums]
107  print(f'Mean dirty pages:\n{mean_dirty_pages}\n')
108
109  median_dirty_pages = [statistics.median(r[name] for name in common_proc_names) for r in items]
110  print(f'Median dirty pages:\n{median_dirty_pages}\n')
111
112  if args.verbose:
113    print(f'Largest dirty page diffs:')
114    for i in range(len(invoc_ids)):
115      for j in range(len(invoc_ids)):
116        if j <= i:
117          continue
118
119        page_count_diffs = [(proc_name, items[i][proc_name], items[j][proc_name], items[j][proc_name] - items[i][proc_name]) for proc_name in common_proc_names]
120        page_count_diffs = sorted(page_count_diffs, key=lambda x: abs(x[3]), reverse=True)
121        print(f'Between {invoc_ids[i]} and {invoc_ids[j]}: ')
122        pp.pprint(page_count_diffs[:10])
123
124
125if __name__ == '__main__':
126  main()
127