1#! /usr/bin/env python3 2 3""" 4Compare private dirty page counts between imgdiag test runs. 5 6This script compares private dirty page counts in the boot-image object section 7between specified imgdiag runs. 8 9Usage: 10 # Compare multiple imgdiag runs: 11 ./compare_imgdiag_runs.py ./I1234 ./I1235 ./I1236 12 13 # Check all command line flags: 14 ./compare_imgdiag_runs.py --help 15""" 16 17import argparse 18import glob 19import gzip 20import json 21import os 22import pprint 23import statistics 24 25""" 26These thresholds are used to verify that process sets from different runs 27are similar enough for meaningful comparison. Constants are based on 28the imgdiag run data of 2025-01. Update if necessary. 29""" 30 31# There are about 100 apps in imgdiag_top_100 ATP config + more than 100 system processes. 32MIN_COMMON_PROC_COUNT = 200 33# Allow for a relatively small (<20%) mismatch in process sets between runs. 34MAX_MISMATCH_PROC_COUNT = 40 35 36def main(): 37 pp = pprint.PrettyPrinter(indent=2) 38 parser = argparse.ArgumentParser( 39 description='Compare private dirty page counts between imgdiag ATP runs', 40 formatter_class=argparse.ArgumentDefaultsHelpFormatter, 41 ) 42 43 parser.add_argument( 44 'invocation_dirs', 45 nargs='+', 46 help='Directories with imgdiag output files', 47 ) 48 49 parser.add_argument( 50 '--verbose', 51 action=argparse.BooleanOptionalAction, 52 default=False, 53 help='Print out all mismatched processes and more info about dirty page diffs between individual processes', 54 ) 55 args = parser.parse_args() 56 57 runs = [] 58 for invoc_dir in args.invocation_dirs: 59 res = glob.glob(os.path.join(invoc_dir, '*dirty-page-counts*')) 60 if len(res) != 1: 61 raise ValueError(f"Expected to find exactly one *dirty-page-counts* file in {invoc_dir}, but found: {res}") 62 63 try: 64 if res[0].endswith('.gz'): 65 with gzip.open(res[0], 'rb') as f: 66 contents = json.load(f) 67 else: 68 with open(res[0], 'r') as f: 69 contents = json.load(f) 70 except: 71 print('Failed to read: ', res[0]) 72 raise 73 runs.append(contents) 74 75 basename = lambda p: os.path.basename(os.path.normpath(p)) 76 invoc_ids = [basename(path) for path in args.invocation_dirs] 77 print('Comparing: ', invoc_ids) 78 79 items = list() 80 for r in runs: 81 items.append({k[:k.rfind('_')]: v for k, v in r.items()}) 82 83 proc_names = list(set(i.keys()) for i in items) 84 common_proc_names = set.intersection(*proc_names) 85 mismatch_proc_names = set.union(*proc_names) - common_proc_names 86 print('Common proc count (used in the comparison): ', len(common_proc_names)) 87 if len(common_proc_names) < MIN_COMMON_PROC_COUNT: 88 print('WARNING: common processes count is too low.') 89 print(f'Should be at least {MIN_COMMON_PROC_COUNT}.') 90 91 print('Mismatching proc count (not present in all runs): ', len(mismatch_proc_names)) 92 if len(mismatch_proc_names) > MAX_MISMATCH_PROC_COUNT: 93 print('WARNING: too many mismatching process names.') 94 print(f'Should be lower than {MAX_MISMATCH_PROC_COUNT}.') 95 96 if args.verbose: 97 print("Mismatching process names:") 98 pp.pprint(mismatch_proc_names) 99 100 dirty_page_sums = list() 101 for r in items: 102 dirty_page_sums.append(sum(r[k] for k in common_proc_names)) 103 104 print(f'Total dirty pages:\n{dirty_page_sums}\n') 105 106 mean_dirty_pages = [s / len(common_proc_names) for s in dirty_page_sums] 107 print(f'Mean dirty pages:\n{mean_dirty_pages}\n') 108 109 median_dirty_pages = [statistics.median(r[name] for name in common_proc_names) for r in items] 110 print(f'Median dirty pages:\n{median_dirty_pages}\n') 111 112 if args.verbose: 113 print(f'Largest dirty page diffs:') 114 for i in range(len(invoc_ids)): 115 for j in range(len(invoc_ids)): 116 if j <= i: 117 continue 118 119 page_count_diffs = [(proc_name, items[i][proc_name], items[j][proc_name], items[j][proc_name] - items[i][proc_name]) for proc_name in common_proc_names] 120 page_count_diffs = sorted(page_count_diffs, key=lambda x: abs(x[3]), reverse=True) 121 print(f'Between {invoc_ids[i]} and {invoc_ids[j]}: ') 122 pp.pprint(page_count_diffs[:10]) 123 124 125if __name__ == '__main__': 126 main() 127