• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#! /usr/bin/env python3
2#
3# Copyright 2023, The Android Open Source Project
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9#     http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16
17import argparse
18from collections import defaultdict
19from enum import Enum
20import os
21import re
22
23
24class SortType(Enum):
25  NONE = 'none'
26  SIMPLE = 'simple'
27  OPT_NEIGHBOURS = 'opt_neighbours'
28
29
30def merge_same_procnames(entries):
31  path_regex = r'(.+)_(\d+).txt'
32  prog = re.compile(path_regex)
33
34  merged_entries = defaultdict(set)
35  for path, objs in entries:
36    basename = os.path.basename(path)
37    m = prog.match(basename)
38    if m:
39      merged_entries[m.group(1)].update(objs)
40
41  return sorted(merged_entries.items(), key=lambda x: len(x[1]))
42
43
44def opt_neighbours(sort_keys):
45  sort_keys = dict(sort_keys)
46  res = list()
47
48  # Start with a bin with the lowest process and objects count.
49  cur_key = min(
50      sort_keys.items(), key=lambda item: (item[0].bit_count(), len(item[1]))
51  )[0]
52  res.append((cur_key, sort_keys[cur_key]))
53  del sort_keys[cur_key]
54
55  # Find next most similar sort key and update the result.
56  while sort_keys:
57
58    def jaccard_index(x):
59      return (x & cur_key).bit_count() / (x | cur_key).bit_count()
60
61    next_key = max(sort_keys.keys(), key=jaccard_index)
62    res.append((next_key, sort_keys[next_key]))
63    del sort_keys[next_key]
64    cur_key = next_key
65  return res
66
67
68def process_dirty_entries(entries, sort_type):
69  dirty_image_objects = []
70
71  union = set()
72  for k, v in entries:
73    union = union.union(v)
74
75  if sort_type == SortType.NONE:
76    dirty_obj_lines = [obj + '\n' for obj in sorted(union)]
77    return (dirty_obj_lines, dict())
78
79  # sort_key -> [objs]
80  sort_keys = defaultdict(list)
81  for obj in union:
82    sort_key = 0
83    # Nth bit of sort_key is set if this object is dirty in Nth process.
84    for idx, (k, v) in enumerate(entries):
85      if obj in v:
86        sort_key = (sort_key << 1) | 1
87      else:
88        sort_key = sort_key << 1
89
90    sort_keys[sort_key].append(obj)
91
92  sort_keys = sorted(sort_keys.items())
93
94  if sort_type == SortType.OPT_NEIGHBOURS:
95    sort_keys = opt_neighbours(sort_keys)
96
97  dirty_obj_lines = list()
98  for idx, (_, objs) in enumerate(sort_keys):
99    for obj in objs:
100      dirty_obj_lines.append(obj + ' ' + str(idx) + '\n')
101
102  return (dirty_obj_lines, sort_keys)
103
104def split_dirty_objects(dirty_objects):
105  art_objects = list()
106  framework_objects = list()
107  for obj in dirty_objects:
108    obj_without_location = obj.split(' ', 1)[1]
109    is_art_module_object = obj.startswith('/apex/com.android.art/')
110    is_primitive_array = obj.startswith('primitive')
111    if is_art_module_object or is_primitive_array:
112      art_objects.append(obj_without_location)
113    else:
114      framework_objects.append(obj_without_location)
115  return art_objects, framework_objects
116
117def main():
118  parser = argparse.ArgumentParser(
119      description=(
120          'Create dirty-image-objects file from specified imgdiag output files.'
121      ),
122      formatter_class=argparse.ArgumentDefaultsHelpFormatter,
123  )
124  parser.add_argument(
125      'imgdiag_files',
126      nargs='+',
127      help='imgdiag files to use.',
128  )
129  parser.add_argument(
130      '--sort-type',
131      choices=[e.value for e in SortType],
132      default=SortType.OPT_NEIGHBOURS.value,
133      help=(
134          'Object sorting type. "simple" puts objects with the same usage'
135          ' pattern in the same bins. "opt_neighbours" also tries to put bins'
136          ' with similar usage patterns close to each other.'
137      ),
138  )
139  parser.add_argument(
140      '--merge-same-procnames',
141      action=argparse.BooleanOptionalAction,
142      default=False,
143      help=(
144          'Merge dirty objects from files with the same process name (different'
145          ' pid). Files are expected to end with "_{pid}.txt"'
146      ),
147  )
148  parser.add_argument(
149      '--output-filename',
150      default='dirty-image-objects.txt',
151      help='Output file for dirty image objects.',
152  )
153  parser.add_argument(
154      '--print-stats',
155      action=argparse.BooleanOptionalAction,
156      default=False,
157      help='Print dirty object stats.',
158  )
159
160  args = parser.parse_args()
161
162  entries = list()
163  for path in args.imgdiag_files:
164    with open(path) as f:
165      lines = f.readlines()
166    prefix = 'dirty_obj: '
167    lines = [l.strip().removeprefix(prefix) for l in lines if prefix in l]
168    entries.append((path, set(lines)))
169
170  entries = sorted(entries, key=lambda x: len(x[1]))
171
172  if args.merge_same_procnames:
173    entries = merge_same_procnames(entries)
174
175  print('Using processes:')
176  for k, v in entries:
177    print(f'{k}: {len(v)}')
178  print()
179
180  dirty_image_objects, sort_keys = process_dirty_entries(
181      entries=entries, sort_type=SortType(args.sort_type)
182  )
183
184  with open(args.output_filename, 'w') as f:
185    f.writelines(dirty_image_objects)
186
187  art_objs, framework_objs = split_dirty_objects(dirty_image_objects)
188  with open('art_' + args.output_filename, 'w') as f:
189    f.writelines(art_objs)
190  with open('framework_' + args.output_filename, 'w') as f:
191    f.writelines(framework_objs)
192
193  if args.print_stats:
194    print(','.join(k for k, v in entries), ',obj_count')
195    total_count = 0
196    for sort_key, objs in sort_keys:
197      bits_csv = ','.join(
198          '{sort_key:0{width}b}'.format(sort_key=sort_key, width=len(entries))
199      )
200      print(bits_csv, ',', len(objs))
201      total_count += len(objs)
202    print('total: ', total_count)
203
204
205if __name__ == '__main__':
206  main()
207