• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/env python3
2# -*- coding: utf-8 -*-
3# Copyright 2019 The Chromium OS Authors. All rights reserved.
4# Use of this source code is governed by a BSD-style license that can be
5# found in the LICENSE file.
6
7"""AFDO Profile analysis tool.
8
9This script takes a good AFDO profile, a bad AFDO profile, and an external
10script which deems particular AFDO profiles as GOOD/BAD/SKIP, and an output
11file as arguments. Given these pieces of information, it analyzes the profiles
12to try and determine what exactly is bad about the bad profile. It does this
13with three main techniques: bisecting search, range search, and rough diff-ing.
14
15The external script communicates the 'goodness' of an AFDO profile through its
16exit code. The codes known to this script are:
17  - 0: the AFDO profile produced a good binary
18  - 1: the AFDO profile produced a bad binary
19  - 125: no result could be determined; just try another profile
20  - >127: quit immediately
21"""
22
23from __future__ import division, print_function
24
25import argparse
26import json
27import logging
28import os
29import random
30import subprocess
31import time
32from datetime import date
33from enum import IntEnum
34from tempfile import mkstemp
35
36
37class StatusEnum(IntEnum):
38  """Enum of valid statuses returned by profile decider."""
39  GOOD_STATUS = 0
40  BAD_STATUS = 1
41  SKIP_STATUS = 125
42  PROBLEM_STATUS = 127
43
44
45statuses = StatusEnum.__members__.values()
46
47_NUM_RUNS_RANGE_SEARCH = 20  # how many times range search should run its algo
48
49
50def json_to_text(json_prof):
51  text_profile = []
52  for func in json_prof:
53    text_profile.append(func)
54    text_profile.append(json_prof[func])
55  return ''.join(text_profile)
56
57
58def text_to_json(f):
59  """Performs basic parsing of an AFDO text-based profile.
60
61  This parsing expects an input file object with contents of the form generated
62  by bin/llvm-profdata (within an LLVM build).
63  """
64  results = {}
65  curr_func = None
66  curr_data = []
67  for line in f:
68    if not line.startswith(' '):
69      if curr_func:
70        results[curr_func] = ''.join(curr_data)
71        curr_data = []
72      curr_func, rest = line.split(':', 1)
73      curr_func = curr_func.strip()
74      curr_data.append(':' + rest)
75    else:
76      curr_data.append(line)
77
78  if curr_func:
79    results[curr_func] = ''.join(curr_data)
80  return results
81
82
83def prof_to_tmp(prof):
84  """Creates (and returns) temp filename for given JSON-based AFDO profile."""
85  fd, temp_path = mkstemp()
86  text_profile = json_to_text(prof)
87  with open(temp_path, 'w') as f:
88    f.write(text_profile)
89  os.close(fd)
90  return temp_path
91
92
93class DeciderState(object):
94  """Class for the external decider."""
95
96  def __init__(self, state_file, external_decider, seed):
97    self.accumulated_results = []  # over this run of the script
98    self.external_decider = external_decider
99    self.saved_results = []  # imported from a previous run of this script
100    self.state_file = state_file
101    self.seed = seed if seed is not None else time.time()
102
103  def load_state(self):
104    if not os.path.exists(self.state_file):
105      logging.info('State file %s is empty, starting from beginning',
106                   self.state_file)
107      return
108
109    with open(self.state_file, encoding='utf-8') as f:
110      try:
111        data = json.load(f)
112      except:
113        raise ValueError('Provided state file %s to resume from does not'
114                         ' contain a valid JSON.' % self.state_file)
115
116    if 'seed' not in data or 'accumulated_results' not in data:
117      raise ValueError('Provided state file %s to resume from does not contain'
118                       ' the correct information' % self.state_file)
119
120    self.seed = data['seed']
121    self.saved_results = data['accumulated_results']
122    logging.info('Restored state from %s...', self.state_file)
123
124  def save_state(self):
125    state = {'seed': self.seed, 'accumulated_results': self.accumulated_results}
126    tmp_file = self.state_file + '.new'
127    with open(tmp_file, 'w', encoding='utf-8') as f:
128      json.dump(state, f, indent=2)
129    os.rename(tmp_file, self.state_file)
130    logging.info('Logged state to %s...', self.state_file)
131
132  def run(self, prof, save_run=True):
133    """Run the external deciding script on the given profile."""
134    if self.saved_results and save_run:
135      result = self.saved_results.pop(0)
136      self.accumulated_results.append(result)
137      self.save_state()
138      return StatusEnum(result)
139
140    filename = prof_to_tmp(prof)
141
142    try:
143      return_code = subprocess.call([self.external_decider, filename])
144    finally:
145      os.remove(filename)
146
147    if return_code in statuses:
148      status = StatusEnum(return_code)
149      if status == StatusEnum.PROBLEM_STATUS:
150        prof_file = prof_to_tmp(prof)
151        raise RuntimeError('Provided decider script returned PROBLEM_STATUS '
152                           'when run on profile stored at %s. AFDO Profile '
153                           'analysis aborting' % prof_file)
154      if save_run:
155        self.accumulated_results.append(status.value)
156      logging.info('Run %d of external script %s returned %s',
157                   len(self.accumulated_results), self.external_decider,
158                   status.name)
159      self.save_state()
160      return status
161    raise ValueError(
162        'Provided external script had unexpected return code %d' % return_code)
163
164
165def bisect_profiles(decider, good, bad, common_funcs, lo, hi):
166  """Recursive function which bisects good and bad profiles.
167
168  Args:
169    decider: function which, given a JSON-based AFDO profile, returns an
170      element of 'statuses' based on the status of the profile
171    good: JSON-based good AFDO profile
172    bad: JSON-based bad AFDO profile
173    common_funcs: the list of functions which have top-level profiles in both
174      'good' and 'bad'
175    lo: lower bound of range being bisected on
176    hi: upper bound of range being bisected on
177
178  Returns a dictionary with two keys: 'individuals' and 'ranges'.
179  'individuals': a list of individual functions found to make the profile BAD
180  'ranges': a list of lists of function names. Each list of functions is a list
181    such that including all of those from the bad profile makes the good
182    profile BAD. It may not be the smallest problematic combination, but
183    definitely contains a problematic combination of profiles.
184  """
185
186  results = {'individuals': [], 'ranges': []}
187  if hi - lo <= 1:
188    logging.info('Found %s as a problematic function profile', common_funcs[lo])
189    results['individuals'].append(common_funcs[lo])
190    return results
191
192  mid = (lo + hi) // 2
193  lo_mid_prof = good.copy()  # covers bad from lo:mid
194  mid_hi_prof = good.copy()  # covers bad from mid:hi
195  for func in common_funcs[lo:mid]:
196    lo_mid_prof[func] = bad[func]
197  for func in common_funcs[mid:hi]:
198    mid_hi_prof[func] = bad[func]
199
200  lo_mid_verdict = decider.run(lo_mid_prof)
201  mid_hi_verdict = decider.run(mid_hi_prof)
202
203  if lo_mid_verdict == StatusEnum.BAD_STATUS:
204    result = bisect_profiles(decider, good, bad, common_funcs, lo, mid)
205    results['individuals'].extend(result['individuals'])
206    results['ranges'].extend(result['ranges'])
207  if mid_hi_verdict == StatusEnum.BAD_STATUS:
208    result = bisect_profiles(decider, good, bad, common_funcs, mid, hi)
209    results['individuals'].extend(result['individuals'])
210    results['ranges'].extend(result['ranges'])
211
212  # neither half is bad -> the issue is caused by several things occuring
213  # in conjunction, and this combination crosses 'mid'
214  if lo_mid_verdict == mid_hi_verdict == StatusEnum.GOOD_STATUS:
215    problem_range = range_search(decider, good, bad, common_funcs, lo, hi)
216    if problem_range:
217      logging.info('Found %s as a problematic combination of profiles',
218                   str(problem_range))
219      results['ranges'].append(problem_range)
220
221  return results
222
223
224def bisect_profiles_wrapper(decider, good, bad, perform_check=True):
225  """Wrapper for recursive profile bisection."""
226
227  # Validate good and bad profiles are such, otherwise bisection reports noise
228  # Note that while decider is a random mock, these assertions may fail.
229  if perform_check:
230    if decider.run(good, save_run=False) != StatusEnum.GOOD_STATUS:
231      raise ValueError('Supplied good profile is not actually GOOD')
232    if decider.run(bad, save_run=False) != StatusEnum.BAD_STATUS:
233      raise ValueError('Supplied bad profile is not actually BAD')
234
235  common_funcs = sorted(func for func in good if func in bad)
236  if not common_funcs:
237    return {'ranges': [], 'individuals': []}
238
239  # shuffle because the results of our analysis can be quite order-dependent
240  # but this list has no inherent ordering. By shuffling each time, the chances
241  # of finding new, potentially interesting results are increased each time
242  # the program is run
243  random.shuffle(common_funcs)
244  results = bisect_profiles(decider, good, bad, common_funcs, 0,
245                            len(common_funcs))
246  results['ranges'].sort()
247  results['individuals'].sort()
248  return results
249
250
251def range_search(decider, good, bad, common_funcs, lo, hi):
252  """Searches for problematic range crossing mid border.
253
254  The main inner algorithm is the following, which looks for the smallest
255  possible ranges with problematic combinations. It starts the upper bound at
256  the midpoint, and increments in halves until it gets a BAD profile.
257  Then, it increments the lower bound (in halves) until the resultant profile
258  is GOOD, and then we have a range that causes 'BAD'ness.
259
260  It does this _NUM_RUNS_RANGE_SEARCH times, and shuffles the functions being
261  looked at uniquely each time to try and get the smallest possible range
262  of functions in a reasonable timeframe.
263  """
264
265  average = lambda x, y: int(round((x + y) // 2.0))
266
267  def find_upper_border(good_copy, funcs, lo, hi, last_bad_val=None):
268    """Finds the upper border of problematic range."""
269    mid = average(lo, hi)
270    if mid in (lo, hi):
271      return last_bad_val or hi
272
273    for func in funcs[lo:mid]:
274      good_copy[func] = bad[func]
275    verdict = decider.run(good_copy)
276
277    # reset for next iteration
278    for func in funcs:
279      good_copy[func] = good[func]
280
281    if verdict == StatusEnum.BAD_STATUS:
282      return find_upper_border(good_copy, funcs, lo, mid, mid)
283    return find_upper_border(good_copy, funcs, mid, hi, last_bad_val)
284
285  def find_lower_border(good_copy, funcs, lo, hi, last_bad_val=None):
286    """Finds the lower border of problematic range."""
287    mid = average(lo, hi)
288    if mid in (lo, hi):
289      return last_bad_val or lo
290
291    for func in funcs[lo:mid]:
292      good_copy[func] = good[func]
293    verdict = decider.run(good_copy)
294
295    # reset for next iteration
296    for func in funcs:
297      good_copy[func] = bad[func]
298
299    if verdict == StatusEnum.BAD_STATUS:
300      return find_lower_border(good_copy, funcs, mid, hi, lo)
301    return find_lower_border(good_copy, funcs, lo, mid, last_bad_val)
302
303  lo_mid_funcs = []
304  mid_hi_funcs = []
305  min_range_funcs = []
306  for _ in range(_NUM_RUNS_RANGE_SEARCH):
307
308    if min_range_funcs:  # only examine range we've already narrowed to
309      random.shuffle(lo_mid_funcs)
310      random.shuffle(mid_hi_funcs)
311    else:  # consider lo-mid and mid-hi separately bc must cross border
312      mid = (lo + hi) // 2
313      lo_mid_funcs = common_funcs[lo:mid]
314      mid_hi_funcs = common_funcs[mid:hi]
315
316    funcs = lo_mid_funcs + mid_hi_funcs
317    hi = len(funcs)
318    mid = len(lo_mid_funcs)
319    lo = 0
320
321    # because we need the problematic pair to pop up before we can narrow it
322    prof = good.copy()
323    for func in lo_mid_funcs:
324      prof[func] = bad[func]
325
326    upper_border = find_upper_border(prof, funcs, mid, hi)
327    for func in lo_mid_funcs + funcs[mid:upper_border]:
328      prof[func] = bad[func]
329
330    lower_border = find_lower_border(prof, funcs, lo, mid)
331    curr_range_funcs = funcs[lower_border:upper_border]
332
333    if not min_range_funcs or len(curr_range_funcs) < len(min_range_funcs):
334      min_range_funcs = curr_range_funcs
335      lo_mid_funcs = lo_mid_funcs[lo_mid_funcs.index(min_range_funcs[0]):]
336      mid_hi_funcs = mid_hi_funcs[:mid_hi_funcs.index(min_range_funcs[-1]) + 1]
337      if len(min_range_funcs) == 2:
338        min_range_funcs.sort()
339        return min_range_funcs  # can't get any smaller
340
341  min_range_funcs.sort()
342  return min_range_funcs
343
344
345def check_good_not_bad(decider, good, bad):
346  """Check if bad prof becomes GOOD by adding funcs it lacks from good prof"""
347  bad_copy = bad.copy()
348  for func in good:
349    if func not in bad:
350      bad_copy[func] = good[func]
351  return decider.run(bad_copy) == StatusEnum.GOOD_STATUS
352
353
354def check_bad_not_good(decider, good, bad):
355  """Check if good prof BAD after adding funcs bad prof has that good doesnt"""
356  good_copy = good.copy()
357  for func in bad:
358    if func not in good:
359      good_copy[func] = bad[func]
360  return decider.run(good_copy) == StatusEnum.BAD_STATUS
361
362
363def parse_args():
364  parser = argparse.ArgumentParser(
365      description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
366  parser.add_argument(
367      '--good_prof',
368      required=True,
369      help='Text-based "Good" profile for analysis')
370  parser.add_argument(
371      '--bad_prof', required=True, help='Text-based "Bad" profile for analysis')
372  parser.add_argument(
373      '--external_decider',
374      required=True,
375      help='External script that, given an AFDO profile, returns '
376      'GOOD/BAD/SKIP')
377  parser.add_argument(
378      '--analysis_output_file',
379      required=True,
380      help='File to output JSON results to')
381  parser.add_argument(
382      '--state_file',
383      default='%s/afdo_analysis_state.json' % os.getcwd(),
384      help='File path containing state to load from initially, and will be '
385      'overwritten with new state on each iteration')
386  parser.add_argument(
387      '--no_resume',
388      action='store_true',
389      help='If enabled, no initial state will be loaded and the program will '
390      'run from the beginning')
391  parser.add_argument(
392      '--remove_state_on_completion',
393      action='store_true',
394      help='If enabled, state file will be removed once profile analysis is '
395      'completed')
396  parser.add_argument(
397      '--seed', type=float, help='Float specifying seed for randomness')
398  return parser.parse_args()
399
400
401def main(flags):
402  logging.getLogger().setLevel(logging.INFO)
403  if not flags.no_resume and flags.seed:  # conflicting seeds
404    raise RuntimeError('Ambiguous seed value; do not resume from existing '
405                       'state and also specify seed by command line flag')
406
407  decider = DeciderState(
408      flags.state_file, flags.external_decider, seed=flags.seed)
409  if not flags.no_resume:
410    decider.load_state()
411  random.seed(decider.seed)
412
413  with open(flags.good_prof) as good_f:
414    good_items = text_to_json(good_f)
415  with open(flags.bad_prof) as bad_f:
416    bad_items = text_to_json(bad_f)
417
418  bisect_results = bisect_profiles_wrapper(decider, good_items, bad_items)
419  gnb_result = check_good_not_bad(decider, good_items, bad_items)
420  bng_result = check_bad_not_good(decider, good_items, bad_items)
421
422  results = {
423      'seed': decider.seed,
424      'bisect_results': bisect_results,
425      'good_only_functions': gnb_result,
426      'bad_only_functions': bng_result
427  }
428  with open(flags.analysis_output_file, 'w', encoding='utf-8') as f:
429    json.dump(results, f, indent=2)
430  if flags.remove_state_on_completion:
431    os.remove(flags.state_file)
432    logging.info('Removed state file %s following completion of script...',
433                 flags.state_file)
434  else:
435    completed_state_file = '%s.completed.%s' % (flags.state_file,
436                                                str(date.today()))
437    os.rename(flags.state_file, completed_state_file)
438    logging.info('Stored completed state file as %s...', completed_state_file)
439  return results
440
441
442if __name__ == '__main__':
443  main(parse_args())
444