1#! /usr/bin/env python3 2# 3# Copyright 2023, The Android Open Source Project 4# 5# Licensed under the Apache License, Version 2.0 (the "License"); 6# you may not use this file except in compliance with the License. 7# You may obtain a copy of the License at 8# 9# http://www.apache.org/licenses/LICENSE-2.0 10# 11# Unless required by applicable law or agreed to in writing, software 12# distributed under the License is distributed on an "AS IS" BASIS, 13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14# See the License for the specific language governing permissions and 15# limitations under the License. 16 17import argparse 18from collections import defaultdict 19from enum import Enum 20import os 21import re 22 23 24class SortType(Enum): 25 NONE = 'none' 26 SIMPLE = 'simple' 27 OPT_NEIGHBOURS = 'opt_neighbours' 28 29 30def merge_same_procnames(entries): 31 path_regex = r'(.+)_(\d+).txt' 32 prog = re.compile(path_regex) 33 34 merged_entries = defaultdict(set) 35 for path, objs in entries: 36 basename = os.path.basename(path) 37 m = prog.match(basename) 38 if m: 39 merged_entries[m.group(1)].update(objs) 40 41 return sorted(merged_entries.items(), key=lambda x: len(x[1])) 42 43 44def opt_neighbours(sort_keys): 45 sort_keys = dict(sort_keys) 46 res = list() 47 48 # Start with a bin with the lowest process and objects count. 49 cur_key = min( 50 sort_keys.items(), key=lambda item: (item[0].bit_count(), len(item[1])) 51 )[0] 52 res.append((cur_key, sort_keys[cur_key])) 53 del sort_keys[cur_key] 54 55 # Find next most similar sort key and update the result. 56 while sort_keys: 57 58 def jaccard_index(x): 59 return (x & cur_key).bit_count() / (x | cur_key).bit_count() 60 61 next_key = max(sort_keys.keys(), key=jaccard_index) 62 res.append((next_key, sort_keys[next_key])) 63 del sort_keys[next_key] 64 cur_key = next_key 65 return res 66 67 68def process_dirty_entries(entries, sort_type): 69 dirty_image_objects = [] 70 71 union = set() 72 for k, v in entries: 73 union = union.union(v) 74 75 if sort_type == SortType.NONE: 76 dirty_obj_lines = [obj + '\n' for obj in sorted(union)] 77 return (dirty_obj_lines, dict()) 78 79 # sort_key -> [objs] 80 sort_keys = defaultdict(list) 81 for obj in union: 82 sort_key = 0 83 # Nth bit of sort_key is set if this object is dirty in Nth process. 84 for idx, (k, v) in enumerate(entries): 85 if obj in v: 86 sort_key = (sort_key << 1) | 1 87 else: 88 sort_key = sort_key << 1 89 90 sort_keys[sort_key].append(obj) 91 92 sort_keys = sorted(sort_keys.items()) 93 94 if sort_type == SortType.OPT_NEIGHBOURS: 95 sort_keys = opt_neighbours(sort_keys) 96 97 dirty_obj_lines = list() 98 for idx, (_, objs) in enumerate(sort_keys): 99 for obj in objs: 100 dirty_obj_lines.append(obj + ' ' + str(idx) + '\n') 101 102 return (dirty_obj_lines, sort_keys) 103 104def split_dirty_objects(dirty_objects): 105 art_objects = list() 106 framework_objects = list() 107 for obj in dirty_objects: 108 obj_without_location = obj.split(' ', 1)[1] 109 is_art_module_object = obj.startswith('/apex/com.android.art/') 110 is_primitive_array = obj.startswith('primitive') 111 if is_art_module_object or is_primitive_array: 112 art_objects.append(obj_without_location) 113 else: 114 framework_objects.append(obj_without_location) 115 return art_objects, framework_objects 116 117def main(): 118 parser = argparse.ArgumentParser( 119 description=( 120 'Create dirty-image-objects file from specified imgdiag output files.' 121 ), 122 formatter_class=argparse.ArgumentDefaultsHelpFormatter, 123 ) 124 parser.add_argument( 125 'imgdiag_files', 126 nargs='+', 127 help='imgdiag files to use.', 128 ) 129 parser.add_argument( 130 '--sort-type', 131 choices=[e.value for e in SortType], 132 default=SortType.OPT_NEIGHBOURS.value, 133 help=( 134 'Object sorting type. "simple" puts objects with the same usage' 135 ' pattern in the same bins. "opt_neighbours" also tries to put bins' 136 ' with similar usage patterns close to each other.' 137 ), 138 ) 139 parser.add_argument( 140 '--merge-same-procnames', 141 action=argparse.BooleanOptionalAction, 142 default=False, 143 help=( 144 'Merge dirty objects from files with the same process name (different' 145 ' pid). Files are expected to end with "_{pid}.txt"' 146 ), 147 ) 148 parser.add_argument( 149 '--output-filename', 150 default='dirty-image-objects.txt', 151 help='Output file for dirty image objects.', 152 ) 153 parser.add_argument( 154 '--print-stats', 155 action=argparse.BooleanOptionalAction, 156 default=False, 157 help='Print dirty object stats.', 158 ) 159 160 args = parser.parse_args() 161 162 entries = list() 163 for path in args.imgdiag_files: 164 with open(path) as f: 165 lines = f.readlines() 166 prefix = 'dirty_obj: ' 167 lines = [l.strip().removeprefix(prefix) for l in lines if prefix in l] 168 entries.append((path, set(lines))) 169 170 entries = sorted(entries, key=lambda x: len(x[1])) 171 172 if args.merge_same_procnames: 173 entries = merge_same_procnames(entries) 174 175 print('Using processes:') 176 for k, v in entries: 177 print(f'{k}: {len(v)}') 178 print() 179 180 dirty_image_objects, sort_keys = process_dirty_entries( 181 entries=entries, sort_type=SortType(args.sort_type) 182 ) 183 184 with open(args.output_filename, 'w') as f: 185 f.writelines(dirty_image_objects) 186 187 art_objs, framework_objs = split_dirty_objects(dirty_image_objects) 188 with open('art_' + args.output_filename, 'w') as f: 189 f.writelines(art_objs) 190 with open('framework_' + args.output_filename, 'w') as f: 191 f.writelines(framework_objs) 192 193 if args.print_stats: 194 print(','.join(k for k, v in entries), ',obj_count') 195 total_count = 0 196 for sort_key, objs in sort_keys: 197 bits_csv = ','.join( 198 '{sort_key:0{width}b}'.format(sort_key=sort_key, width=len(entries)) 199 ) 200 print(bits_csv, ',', len(objs)) 201 total_count += len(objs) 202 print('total: ', total_count) 203 204 205if __name__ == '__main__': 206 main() 207