1#! /usr/bin/env python3 2# 3# Copyright 2023, The Android Open Source Project 4# 5# Licensed under the Apache License, Version 2.0 (the "License"); 6# you may not use this file except in compliance with the License. 7# You may obtain a copy of the License at 8# 9# http://www.apache.org/licenses/LICENSE-2.0 10# 11# Unless required by applicable law or agreed to in writing, software 12# distributed under the License is distributed on an "AS IS" BASIS, 13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14# See the License for the specific language governing permissions and 15# limitations under the License. 16 17import argparse 18from collections import defaultdict 19from enum import Enum 20import os 21import re 22 23 24class SortType(Enum): 25 NONE = 'none' 26 SIMPLE = 'simple' 27 OPT_NEIGHBOURS = 'opt_neighbours' 28 29 30def merge_same_procnames(entries): 31 path_regex = r'(.+)_(\d+).txt' 32 prog = re.compile(path_regex) 33 34 merged_entries = defaultdict(set) 35 for path, objs in entries: 36 basename = os.path.basename(path) 37 m = prog.match(basename) 38 if m: 39 merged_entries[m.group(1)].update(objs) 40 41 return sorted(merged_entries.items(), key=lambda x: len(x[1])) 42 43 44def opt_neighbours(sort_keys): 45 sort_keys = dict(sort_keys) 46 res = list() 47 48 # Start with a bin with the lowest process and objects count. 49 cur_key = min( 50 sort_keys.items(), key=lambda item: (item[0].bit_count(), len(item[1])) 51 )[0] 52 res.append((cur_key, sort_keys[cur_key])) 53 del sort_keys[cur_key] 54 55 # Find next most similar sort key and update the result. 56 while sort_keys: 57 58 def jaccard_index(x): 59 return (x & cur_key).bit_count() / (x | cur_key).bit_count() 60 61 next_key = max(sort_keys.keys(), key=jaccard_index) 62 res.append((next_key, sort_keys[next_key])) 63 del sort_keys[next_key] 64 cur_key = next_key 65 return res 66 67 68def process_dirty_entries(entries, sort_type): 69 dirty_image_objects = [] 70 71 union = set() 72 for k, v in entries: 73 union = union.union(v) 74 75 if sort_type == SortType.NONE: 76 dirty_obj_lines = [obj + '\n' for obj in sorted(union)] 77 return (dirty_obj_lines, dict()) 78 79 # sort_key -> [objs] 80 sort_keys = defaultdict(list) 81 for obj in union: 82 sort_key = 0 83 # Nth bit of sort_key is set if this object is dirty in Nth process. 84 for idx, (k, v) in enumerate(entries): 85 if obj in v: 86 sort_key = (sort_key << 1) | 1 87 else: 88 sort_key = sort_key << 1 89 90 sort_keys[sort_key].append(obj) 91 92 sort_keys = sorted(sort_keys.items()) 93 94 if sort_type == SortType.OPT_NEIGHBOURS: 95 sort_keys = opt_neighbours(sort_keys) 96 97 dirty_obj_lines = list() 98 for idx, (_, objs) in enumerate(sort_keys): 99 for obj in objs: 100 dirty_obj_lines.append(obj + ' ' + str(idx) + '\n') 101 102 return (dirty_obj_lines, sort_keys) 103 104 105def main(): 106 parser = argparse.ArgumentParser( 107 description=( 108 'Create dirty-image-objects file from specified imgdiag output files.' 109 ), 110 formatter_class=argparse.ArgumentDefaultsHelpFormatter, 111 ) 112 parser.add_argument( 113 'imgdiag_files', 114 nargs='+', 115 help='imgdiag files to use.', 116 ) 117 parser.add_argument( 118 '--sort-type', 119 choices=[e.value for e in SortType], 120 default=SortType.OPT_NEIGHBOURS.value, 121 help=( 122 'Object sorting type. "simple" puts objects with the same usage' 123 ' pattern in the same bins. "opt_neighbours" also tries to put bins' 124 ' with similar usage patterns close to each other.' 125 ), 126 ) 127 parser.add_argument( 128 '--merge-same-procnames', 129 action=argparse.BooleanOptionalAction, 130 default=False, 131 help=( 132 'Merge dirty objects from files with the same process name (different' 133 ' pid). Files are expected to end with "_{pid}.txt"' 134 ), 135 ) 136 parser.add_argument( 137 '--output-filename', 138 default='dirty-image-objects.txt', 139 help='Output file for dirty image objects.', 140 ) 141 parser.add_argument( 142 '--print-stats', 143 action=argparse.BooleanOptionalAction, 144 default=False, 145 help='Print dirty object stats.', 146 ) 147 148 args = parser.parse_args() 149 150 entries = list() 151 for path in args.imgdiag_files: 152 with open(path) as f: 153 lines = f.readlines() 154 prefix = 'dirty_obj: ' 155 lines = [l.strip().removeprefix(prefix) for l in lines if prefix in l] 156 entries.append((path, set(lines))) 157 158 entries = sorted(entries, key=lambda x: len(x[1])) 159 160 if args.merge_same_procnames: 161 entries = merge_same_procnames(entries) 162 163 print('Using processes:') 164 for k, v in entries: 165 print(f'{k}: {len(v)}') 166 print() 167 168 dirty_image_objects, sort_keys = process_dirty_entries( 169 entries=entries, sort_type=SortType(args.sort_type) 170 ) 171 172 with open(args.output_filename, 'w') as f: 173 f.writelines(dirty_image_objects) 174 175 if args.print_stats: 176 print(','.join(k for k, v in entries), ',obj_count') 177 total_count = 0 178 for sort_key, objs in sort_keys: 179 bits_csv = ','.join( 180 '{sort_key:0{width}b}'.format(sort_key=sort_key, width=len(entries)) 181 ) 182 print(bits_csv, ',', len(objs)) 183 total_count += len(objs) 184 print('total: ', total_count) 185 186 187if __name__ == '__main__': 188 main() 189