• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/env python3
2# -*- coding: utf-8 -*-
3# Copyright 2019 The Chromium OS Authors. All rights reserved.
4# Use of this source code is governed by a BSD-style license that can be
5# found in the LICENSE file.
6
7"""AFDO Profile analysis tool.
8
9This script takes a good AFDO profile, a bad AFDO profile, and an external
10script which deems particular AFDO profiles as GOOD/BAD/SKIP, and an output
11file as arguments. Given these pieces of information, it analyzes the profiles
12to try and determine what exactly is bad about the bad profile. It does this
13with three main techniques: bisecting search, range search, and rough diff-ing.
14
15The external script communicates the 'goodness' of an AFDO profile through its
16exit code. The codes known to this script are:
17  - 0: the AFDO profile produced a good binary
18  - 1: the AFDO profile produced a bad binary
19  - 125: no result could be determined; just try another profile
20  - >127: quit immediately
21"""
22
23from __future__ import division, print_function
24
25import argparse
26import json
27# Pylint recommends we use "from chromite.lib import cros_logging as logging".
28# Chromite specific policy message, we want to keep using the standard logging
29# pylint: disable=cros-logging-import
30import logging
31import os
32import random
33import subprocess
34import time
35from datetime import date
36from enum import IntEnum
37from tempfile import mkstemp
38
39
40class StatusEnum(IntEnum):
41  """Enum of valid statuses returned by profile decider."""
42  GOOD_STATUS = 0
43  BAD_STATUS = 1
44  SKIP_STATUS = 125
45  PROBLEM_STATUS = 127
46
47
48statuses = StatusEnum.__members__.values()
49
50_NUM_RUNS_RANGE_SEARCH = 20  # how many times range search should run its algo
51
52
53def json_to_text(json_prof):
54  text_profile = []
55  for func in json_prof:
56    text_profile.append(func)
57    text_profile.append(json_prof[func])
58  return ''.join(text_profile)
59
60
61def text_to_json(f):
62  """Performs basic parsing of an AFDO text-based profile.
63
64  This parsing expects an input file object with contents of the form generated
65  by bin/llvm-profdata (within an LLVM build).
66  """
67  results = {}
68  curr_func = None
69  curr_data = []
70  for line in f:
71    if not line.startswith(' '):
72      if curr_func:
73        results[curr_func] = ''.join(curr_data)
74        curr_data = []
75      curr_func, rest = line.split(':', 1)
76      curr_func = curr_func.strip()
77      curr_data.append(':' + rest)
78    else:
79      curr_data.append(line)
80
81  if curr_func:
82    results[curr_func] = ''.join(curr_data)
83  return results
84
85
86def prof_to_tmp(prof):
87  """Creates (and returns) temp filename for given JSON-based AFDO profile."""
88  fd, temp_path = mkstemp()
89  text_profile = json_to_text(prof)
90  with open(temp_path, 'w') as f:
91    f.write(text_profile)
92  os.close(fd)
93  return temp_path
94
95
96class DeciderState(object):
97  """Class for the external decider."""
98
99  def __init__(self, state_file, external_decider, seed):
100    self.accumulated_results = []  # over this run of the script
101    self.external_decider = external_decider
102    self.saved_results = []  # imported from a previous run of this script
103    self.state_file = state_file
104    self.seed = seed if seed is not None else time.time()
105
106  def load_state(self):
107    if not os.path.exists(self.state_file):
108      logging.info('State file %s is empty, starting from beginning',
109                   self.state_file)
110      return
111
112    with open(self.state_file, encoding='utf-8') as f:
113      try:
114        data = json.load(f)
115      except:
116        raise ValueError('Provided state file %s to resume from does not'
117                         ' contain a valid JSON.' % self.state_file)
118
119    if 'seed' not in data or 'accumulated_results' not in data:
120      raise ValueError('Provided state file %s to resume from does not contain'
121                       ' the correct information' % self.state_file)
122
123    self.seed = data['seed']
124    self.saved_results = data['accumulated_results']
125    logging.info('Restored state from %s...', self.state_file)
126
127  def save_state(self):
128    state = {'seed': self.seed, 'accumulated_results': self.accumulated_results}
129    tmp_file = self.state_file + '.new'
130    with open(tmp_file, 'w', encoding='utf-8') as f:
131      json.dump(state, f, indent=2)
132    os.rename(tmp_file, self.state_file)
133    logging.info('Logged state to %s...', self.state_file)
134
135  def run(self, prof, save_run=True):
136    """Run the external deciding script on the given profile."""
137    if self.saved_results and save_run:
138      result = self.saved_results.pop(0)
139      self.accumulated_results.append(result)
140      self.save_state()
141      return StatusEnum(result)
142
143    filename = prof_to_tmp(prof)
144
145    try:
146      return_code = subprocess.call([self.external_decider, filename])
147    finally:
148      os.remove(filename)
149
150    if return_code in statuses:
151      status = StatusEnum(return_code)
152      if status == StatusEnum.PROBLEM_STATUS:
153        prof_file = prof_to_tmp(prof)
154        raise RuntimeError('Provided decider script returned PROBLEM_STATUS '
155                           'when run on profile stored at %s. AFDO Profile '
156                           'analysis aborting' % prof_file)
157      if save_run:
158        self.accumulated_results.append(status.value)
159      logging.info('Run %d of external script %s returned %s',
160                   len(self.accumulated_results), self.external_decider,
161                   status.name)
162      self.save_state()
163      return status
164    raise ValueError(
165        'Provided external script had unexpected return code %d' % return_code)
166
167
168def bisect_profiles(decider, good, bad, common_funcs, lo, hi):
169  """Recursive function which bisects good and bad profiles.
170
171  Args:
172    decider: function which, given a JSON-based AFDO profile, returns an
173      element of 'statuses' based on the status of the profile
174    good: JSON-based good AFDO profile
175    bad: JSON-based bad AFDO profile
176    common_funcs: the list of functions which have top-level profiles in both
177      'good' and 'bad'
178    lo: lower bound of range being bisected on
179    hi: upper bound of range being bisected on
180
181  Returns a dictionary with two keys: 'individuals' and 'ranges'.
182  'individuals': a list of individual functions found to make the profile BAD
183  'ranges': a list of lists of function names. Each list of functions is a list
184    such that including all of those from the bad profile makes the good
185    profile BAD. It may not be the smallest problematic combination, but
186    definitely contains a problematic combination of profiles.
187  """
188
189  results = {'individuals': [], 'ranges': []}
190  if hi - lo <= 1:
191    logging.info('Found %s as a problematic function profile', common_funcs[lo])
192    results['individuals'].append(common_funcs[lo])
193    return results
194
195  mid = (lo + hi) // 2
196  lo_mid_prof = good.copy()  # covers bad from lo:mid
197  mid_hi_prof = good.copy()  # covers bad from mid:hi
198  for func in common_funcs[lo:mid]:
199    lo_mid_prof[func] = bad[func]
200  for func in common_funcs[mid:hi]:
201    mid_hi_prof[func] = bad[func]
202
203  lo_mid_verdict = decider.run(lo_mid_prof)
204  mid_hi_verdict = decider.run(mid_hi_prof)
205
206  if lo_mid_verdict == StatusEnum.BAD_STATUS:
207    result = bisect_profiles(decider, good, bad, common_funcs, lo, mid)
208    results['individuals'].extend(result['individuals'])
209    results['ranges'].extend(result['ranges'])
210  if mid_hi_verdict == StatusEnum.BAD_STATUS:
211    result = bisect_profiles(decider, good, bad, common_funcs, mid, hi)
212    results['individuals'].extend(result['individuals'])
213    results['ranges'].extend(result['ranges'])
214
215  # neither half is bad -> the issue is caused by several things occuring
216  # in conjunction, and this combination crosses 'mid'
217  if lo_mid_verdict == mid_hi_verdict == StatusEnum.GOOD_STATUS:
218    problem_range = range_search(decider, good, bad, common_funcs, lo, hi)
219    if problem_range:
220      logging.info('Found %s as a problematic combination of profiles',
221                   str(problem_range))
222      results['ranges'].append(problem_range)
223
224  return results
225
226
227def bisect_profiles_wrapper(decider, good, bad, perform_check=True):
228  """Wrapper for recursive profile bisection."""
229
230  # Validate good and bad profiles are such, otherwise bisection reports noise
231  # Note that while decider is a random mock, these assertions may fail.
232  if perform_check:
233    if decider.run(good, save_run=False) != StatusEnum.GOOD_STATUS:
234      raise ValueError('Supplied good profile is not actually GOOD')
235    if decider.run(bad, save_run=False) != StatusEnum.BAD_STATUS:
236      raise ValueError('Supplied bad profile is not actually BAD')
237
238  common_funcs = sorted(func for func in good if func in bad)
239  if not common_funcs:
240    return {'ranges': [], 'individuals': []}
241
242  # shuffle because the results of our analysis can be quite order-dependent
243  # but this list has no inherent ordering. By shuffling each time, the chances
244  # of finding new, potentially interesting results are increased each time
245  # the program is run
246  random.shuffle(common_funcs)
247  results = bisect_profiles(decider, good, bad, common_funcs, 0,
248                            len(common_funcs))
249  results['ranges'].sort()
250  results['individuals'].sort()
251  return results
252
253
254def range_search(decider, good, bad, common_funcs, lo, hi):
255  """Searches for problematic range crossing mid border.
256
257  The main inner algorithm is the following, which looks for the smallest
258  possible ranges with problematic combinations. It starts the upper bound at
259  the midpoint, and increments in halves until it gets a BAD profile.
260  Then, it increments the lower bound (in halves) until the resultant profile
261  is GOOD, and then we have a range that causes 'BAD'ness.
262
263  It does this _NUM_RUNS_RANGE_SEARCH times, and shuffles the functions being
264  looked at uniquely each time to try and get the smallest possible range
265  of functions in a reasonable timeframe.
266  """
267
268  average = lambda x, y: int(round((x + y) // 2.0))
269
270  def find_upper_border(good_copy, funcs, lo, hi, last_bad_val=None):
271    """Finds the upper border of problematic range."""
272    mid = average(lo, hi)
273    if mid in (lo, hi):
274      return last_bad_val or hi
275
276    for func in funcs[lo:mid]:
277      good_copy[func] = bad[func]
278    verdict = decider.run(good_copy)
279
280    # reset for next iteration
281    for func in funcs:
282      good_copy[func] = good[func]
283
284    if verdict == StatusEnum.BAD_STATUS:
285      return find_upper_border(good_copy, funcs, lo, mid, mid)
286    return find_upper_border(good_copy, funcs, mid, hi, last_bad_val)
287
288  def find_lower_border(good_copy, funcs, lo, hi, last_bad_val=None):
289    """Finds the lower border of problematic range."""
290    mid = average(lo, hi)
291    if mid in (lo, hi):
292      return last_bad_val or lo
293
294    for func in funcs[lo:mid]:
295      good_copy[func] = good[func]
296    verdict = decider.run(good_copy)
297
298    # reset for next iteration
299    for func in funcs:
300      good_copy[func] = bad[func]
301
302    if verdict == StatusEnum.BAD_STATUS:
303      return find_lower_border(good_copy, funcs, mid, hi, lo)
304    return find_lower_border(good_copy, funcs, lo, mid, last_bad_val)
305
306  lo_mid_funcs = []
307  mid_hi_funcs = []
308  min_range_funcs = []
309  for _ in range(_NUM_RUNS_RANGE_SEARCH):
310
311    if min_range_funcs:  # only examine range we've already narrowed to
312      random.shuffle(lo_mid_funcs)
313      random.shuffle(mid_hi_funcs)
314    else:  # consider lo-mid and mid-hi separately bc must cross border
315      mid = (lo + hi) // 2
316      lo_mid_funcs = common_funcs[lo:mid]
317      mid_hi_funcs = common_funcs[mid:hi]
318
319    funcs = lo_mid_funcs + mid_hi_funcs
320    hi = len(funcs)
321    mid = len(lo_mid_funcs)
322    lo = 0
323
324    # because we need the problematic pair to pop up before we can narrow it
325    prof = good.copy()
326    for func in lo_mid_funcs:
327      prof[func] = bad[func]
328
329    upper_border = find_upper_border(prof, funcs, mid, hi)
330    for func in lo_mid_funcs + funcs[mid:upper_border]:
331      prof[func] = bad[func]
332
333    lower_border = find_lower_border(prof, funcs, lo, mid)
334    curr_range_funcs = funcs[lower_border:upper_border]
335
336    if not min_range_funcs or len(curr_range_funcs) < len(min_range_funcs):
337      min_range_funcs = curr_range_funcs
338      lo_mid_funcs = lo_mid_funcs[lo_mid_funcs.index(min_range_funcs[0]):]
339      mid_hi_funcs = mid_hi_funcs[:mid_hi_funcs.index(min_range_funcs[-1]) + 1]
340      if len(min_range_funcs) == 2:
341        min_range_funcs.sort()
342        return min_range_funcs  # can't get any smaller
343
344  min_range_funcs.sort()
345  return min_range_funcs
346
347
348def check_good_not_bad(decider, good, bad):
349  """Check if bad prof becomes GOOD by adding funcs it lacks from good prof"""
350  bad_copy = bad.copy()
351  for func in good:
352    if func not in bad:
353      bad_copy[func] = good[func]
354  return decider.run(bad_copy) == StatusEnum.GOOD_STATUS
355
356
357def check_bad_not_good(decider, good, bad):
358  """Check if good prof BAD after adding funcs bad prof has that good doesnt"""
359  good_copy = good.copy()
360  for func in bad:
361    if func not in good:
362      good_copy[func] = bad[func]
363  return decider.run(good_copy) == StatusEnum.BAD_STATUS
364
365
366def parse_args():
367  parser = argparse.ArgumentParser(
368      description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
369  parser.add_argument(
370      '--good_prof',
371      required=True,
372      help='Text-based "Good" profile for analysis')
373  parser.add_argument(
374      '--bad_prof', required=True, help='Text-based "Bad" profile for analysis')
375  parser.add_argument(
376      '--external_decider',
377      required=True,
378      help='External script that, given an AFDO profile, returns '
379      'GOOD/BAD/SKIP')
380  parser.add_argument(
381      '--analysis_output_file',
382      required=True,
383      help='File to output JSON results to')
384  parser.add_argument(
385      '--state_file',
386      default='%s/afdo_analysis_state.json' % os.getcwd(),
387      help='File path containing state to load from initially, and will be '
388      'overwritten with new state on each iteration')
389  parser.add_argument(
390      '--no_resume',
391      action='store_true',
392      help='If enabled, no initial state will be loaded and the program will '
393      'run from the beginning')
394  parser.add_argument(
395      '--remove_state_on_completion',
396      action='store_true',
397      help='If enabled, state file will be removed once profile analysis is '
398      'completed')
399  parser.add_argument(
400      '--seed', type=float, help='Float specifying seed for randomness')
401  return parser.parse_args()
402
403
404def main(flags):
405  logging.getLogger().setLevel(logging.INFO)
406  if not flags.no_resume and flags.seed:  # conflicting seeds
407    raise RuntimeError('Ambiguous seed value; do not resume from existing '
408                       'state and also specify seed by command line flag')
409
410  decider = DeciderState(
411      flags.state_file, flags.external_decider, seed=flags.seed)
412  if not flags.no_resume:
413    decider.load_state()
414  random.seed(decider.seed)
415
416  with open(flags.good_prof) as good_f:
417    good_items = text_to_json(good_f)
418  with open(flags.bad_prof) as bad_f:
419    bad_items = text_to_json(bad_f)
420
421  bisect_results = bisect_profiles_wrapper(decider, good_items, bad_items)
422  gnb_result = check_good_not_bad(decider, good_items, bad_items)
423  bng_result = check_bad_not_good(decider, good_items, bad_items)
424
425  results = {
426      'seed': decider.seed,
427      'bisect_results': bisect_results,
428      'good_only_functions': gnb_result,
429      'bad_only_functions': bng_result
430  }
431  with open(flags.analysis_output_file, 'w', encoding='utf-8') as f:
432    json.dump(results, f, indent=2)
433  if flags.remove_state_on_completion:
434    os.remove(flags.state_file)
435    logging.info('Removed state file %s following completion of script...',
436                 flags.state_file)
437  else:
438    completed_state_file = '%s.completed.%s' % (flags.state_file,
439                                                str(date.today()))
440    os.rename(flags.state_file, completed_state_file)
441    logging.info('Stored completed state file as %s...', completed_state_file)
442  return results
443
444
445if __name__ == '__main__':
446  main(parse_args())
447