afdo_tools/bisection/afdo_prof_analysis.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# Copyright 2019 The Chromium OS Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.

"""AFDO Profile analysis tool.

This script takes a good AFDO profile, a bad AFDO profile, and an external
script which deems particular AFDO profiles as GOOD/BAD/SKIP, and an output
file as arguments. Given these pieces of information, it analyzes the profiles
to try and determine what exactly is bad about the bad profile. It does this
with three main techniques: bisecting search, range search, and rough diff-ing.

The external script communicates the 'goodness' of an AFDO profile through its
exit code. The codes known to this script are:
  - 0: the AFDO profile produced a good binary
  - 1: the AFDO profile produced a bad binary
  - 125: no result could be determined; just try another profile
  - >127: quit immediately
"""

from __future__ import division, print_function

import argparse
import json
import logging
import os
import random
import subprocess
import time
from datetime import date
from enum import IntEnum
from tempfile import mkstemp


class StatusEnum(IntEnum):
  """Enum of valid statuses returned by profile decider."""
  GOOD_STATUS = 0
  BAD_STATUS = 1
  SKIP_STATUS = 125
  PROBLEM_STATUS = 127


statuses = StatusEnum.__members__.values()

_NUM_RUNS_RANGE_SEARCH = 20  # how many times range search should run its algo


def json_to_text(json_prof):
  text_profile = []
  for func in json_prof:
    text_profile.append(func)
    text_profile.append(json_prof[func])
  return ''.join(text_profile)


def text_to_json(f):
  """Performs basic parsing of an AFDO text-based profile.

  This parsing expects an input file object with contents of the form generated
  by bin/llvm-profdata (within an LLVM build).
  """
  results = {}
  curr_func = None
  curr_data = []
  for line in f:
    if not line.startswith(' '):
      if curr_func:
        results[curr_func] = ''.join(curr_data)
        curr_data = []
      curr_func, rest = line.split(':', 1)
      curr_func = curr_func.strip()
      curr_data.append(':' + rest)
    else:
      curr_data.append(line)

  if curr_func:
    results[curr_func] = ''.join(curr_data)
  return results


def prof_to_tmp(prof):
  """Creates (and returns) temp filename for given JSON-based AFDO profile."""
  fd, temp_path = mkstemp()
  text_profile = json_to_text(prof)
  with open(temp_path, 'w') as f:
    f.write(text_profile)
  os.close(fd)
  return temp_path


class DeciderState(object):
  """Class for the external decider."""

  def __init__(self, state_file, external_decider, seed):
    self.accumulated_results = []  # over this run of the script
    self.external_decider = external_decider
    self.saved_results = []  # imported from a previous run of this script
    self.state_file = state_file
    self.seed = seed if seed is not None else time.time()

  def load_state(self):
    if not os.path.exists(self.state_file):
      logging.info('State file %s is empty, starting from beginning',
                   self.state_file)
      return

    with open(self.state_file, encoding='utf-8') as f:
      try:
        data = json.load(f)
      except:
        raise ValueError('Provided state file %s to resume from does not'
                         ' contain a valid JSON.' % self.state_file)

    if 'seed' not in data or 'accumulated_results' not in data:
      raise ValueError('Provided state file %s to resume from does not contain'
                       ' the correct information' % self.state_file)

    self.seed = data['seed']
    self.saved_results = data['accumulated_results']
    logging.info('Restored state from %s...', self.state_file)

  def save_state(self):
    state = {'seed': self.seed, 'accumulated_results': self.accumulated_results}
    tmp_file = self.state_file + '.new'
    with open(tmp_file, 'w', encoding='utf-8') as f:
      json.dump(state, f, indent=2)
    os.rename(tmp_file, self.state_file)
    logging.info('Logged state to %s...', self.state_file)

  def run(self, prof, save_run=True):
    """Run the external deciding script on the given profile."""
    if self.saved_results and save_run:
      result = self.saved_results.pop(0)
      self.accumulated_results.append(result)
      self.save_state()
      return StatusEnum(result)

    filename = prof_to_tmp(prof)

    try:
      return_code = subprocess.call([self.external_decider, filename])
    finally:
      os.remove(filename)

    if return_code in statuses:
      status = StatusEnum(return_code)
      if status == StatusEnum.PROBLEM_STATUS:
        prof_file = prof_to_tmp(prof)
        raise RuntimeError('Provided decider script returned PROBLEM_STATUS '
                           'when run on profile stored at %s. AFDO Profile '
                           'analysis aborting' % prof_file)
      if save_run:
        self.accumulated_results.append(status.value)
      logging.info('Run %d of external script %s returned %s',
                   len(self.accumulated_results), self.external_decider,
                   status.name)
      self.save_state()
      return status
    raise ValueError(
        'Provided external script had unexpected return code %d' % return_code)


def bisect_profiles(decider, good, bad, common_funcs, lo, hi):
  """Recursive function which bisects good and bad profiles.

  Args:
    decider: function which, given a JSON-based AFDO profile, returns an
      element of 'statuses' based on the status of the profile
    good: JSON-based good AFDO profile
    bad: JSON-based bad AFDO profile
    common_funcs: the list of functions which have top-level profiles in both
      'good' and 'bad'
    lo: lower bound of range being bisected on
    hi: upper bound of range being bisected on

  Returns a dictionary with two keys: 'individuals' and 'ranges'.
  'individuals': a list of individual functions found to make the profile BAD
  'ranges': a list of lists of function names. Each list of functions is a list
    such that including all of those from the bad profile makes the good
    profile BAD. It may not be the smallest problematic combination, but
    definitely contains a problematic combination of profiles.
  """

  results = {'individuals': [], 'ranges': []}
  if hi - lo <= 1:
    logging.info('Found %s as a problematic function profile', common_funcs[lo])
    results['individuals'].append(common_funcs[lo])
    return results

  mid = (lo + hi) // 2
  lo_mid_prof = good.copy()  # covers bad from lo:mid
  mid_hi_prof = good.copy()  # covers bad from mid:hi
  for func in common_funcs[lo:mid]:
    lo_mid_prof[func] = bad[func]
  for func in common_funcs[mid:hi]:
    mid_hi_prof[func] = bad[func]

  lo_mid_verdict = decider.run(lo_mid_prof)
  mid_hi_verdict = decider.run(mid_hi_prof)

  if lo_mid_verdict == StatusEnum.BAD_STATUS:
    result = bisect_profiles(decider, good, bad, common_funcs, lo, mid)
    results['individuals'].extend(result['individuals'])
    results['ranges'].extend(result['ranges'])
  if mid_hi_verdict == StatusEnum.BAD_STATUS:
    result = bisect_profiles(decider, good, bad, common_funcs, mid, hi)
    results['individuals'].extend(result['individuals'])
    results['ranges'].extend(result['ranges'])

  # neither half is bad -> the issue is caused by several things occuring
  # in conjunction, and this combination crosses 'mid'
  if lo_mid_verdict == mid_hi_verdict == StatusEnum.GOOD_STATUS:
    problem_range = range_search(decider, good, bad, common_funcs, lo, hi)
    if problem_range:
      logging.info('Found %s as a problematic combination of profiles',
                   str(problem_range))
      results['ranges'].append(problem_range)

  return results


def bisect_profiles_wrapper(decider, good, bad, perform_check=True):
  """Wrapper for recursive profile bisection."""

  # Validate good and bad profiles are such, otherwise bisection reports noise
  # Note that while decider is a random mock, these assertions may fail.
  if perform_check:
    if decider.run(good, save_run=False) != StatusEnum.GOOD_STATUS:
      raise ValueError('Supplied good profile is not actually GOOD')
    if decider.run(bad, save_run=False) != StatusEnum.BAD_STATUS:
      raise ValueError('Supplied bad profile is not actually BAD')

  common_funcs = sorted(func for func in good if func in bad)
  if not common_funcs:
    return {'ranges': [], 'individuals': []}

  # shuffle because the results of our analysis can be quite order-dependent
  # but this list has no inherent ordering. By shuffling each time, the chances
  # of finding new, potentially interesting results are increased each time
  # the program is run
  random.shuffle(common_funcs)
  results = bisect_profiles(decider, good, bad, common_funcs, 0,
                            len(common_funcs))
  results['ranges'].sort()
  results['individuals'].sort()
  return results


def range_search(decider, good, bad, common_funcs, lo, hi):
  """Searches for problematic range crossing mid border.

  The main inner algorithm is the following, which looks for the smallest
  possible ranges with problematic combinations. It starts the upper bound at
  the midpoint, and increments in halves until it gets a BAD profile.
  Then, it increments the lower bound (in halves) until the resultant profile
  is GOOD, and then we have a range that causes 'BAD'ness.

  It does this _NUM_RUNS_RANGE_SEARCH times, and shuffles the functions being
  looked at uniquely each time to try and get the smallest possible range
  of functions in a reasonable timeframe.
  """

  average = lambda x, y: int(round((x + y) // 2.0))

  def find_upper_border(good_copy, funcs, lo, hi, last_bad_val=None):
    """Finds the upper border of problematic range."""
    mid = average(lo, hi)
    if mid in (lo, hi):
      return last_bad_val or hi

    for func in funcs[lo:mid]:
      good_copy[func] = bad[func]
    verdict = decider.run(good_copy)

    # reset for next iteration
    for func in funcs:
      good_copy[func] = good[func]

    if verdict == StatusEnum.BAD_STATUS:
      return find_upper_border(good_copy, funcs, lo, mid, mid)
    return find_upper_border(good_copy, funcs, mid, hi, last_bad_val)

  def find_lower_border(good_copy, funcs, lo, hi, last_bad_val=None):
    """Finds the lower border of problematic range."""
    mid = average(lo, hi)
    if mid in (lo, hi):
      return last_bad_val or lo

    for func in funcs[lo:mid]:
      good_copy[func] = good[func]
    verdict = decider.run(good_copy)

    # reset for next iteration
    for func in funcs:
      good_copy[func] = bad[func]

    if verdict == StatusEnum.BAD_STATUS:
      return find_lower_border(good_copy, funcs, mid, hi, lo)
    return find_lower_border(good_copy, funcs, lo, mid, last_bad_val)

  lo_mid_funcs = []
  mid_hi_funcs = []
  min_range_funcs = []
  for _ in range(_NUM_RUNS_RANGE_SEARCH):

    if min_range_funcs:  # only examine range we've already narrowed to
      random.shuffle(lo_mid_funcs)
      random.shuffle(mid_hi_funcs)
    else:  # consider lo-mid and mid-hi separately bc must cross border
      mid = (lo + hi) // 2
      lo_mid_funcs = common_funcs[lo:mid]
      mid_hi_funcs = common_funcs[mid:hi]

    funcs = lo_mid_funcs + mid_hi_funcs
    hi = len(funcs)
    mid = len(lo_mid_funcs)
    lo = 0

    # because we need the problematic pair to pop up before we can narrow it
    prof = good.copy()
    for func in lo_mid_funcs:
      prof[func] = bad[func]

    upper_border = find_upper_border(prof, funcs, mid, hi)
    for func in lo_mid_funcs + funcs[mid:upper_border]:
      prof[func] = bad[func]

    lower_border = find_lower_border(prof, funcs, lo, mid)
    curr_range_funcs = funcs[lower_border:upper_border]

    if not min_range_funcs or len(curr_range_funcs) < len(min_range_funcs):
      min_range_funcs = curr_range_funcs
      lo_mid_funcs = lo_mid_funcs[lo_mid_funcs.index(min_range_funcs[0]):]
      mid_hi_funcs = mid_hi_funcs[:mid_hi_funcs.index(min_range_funcs[-1]) + 1]
      if len(min_range_funcs) == 2:
        min_range_funcs.sort()
        return min_range_funcs  # can't get any smaller

  min_range_funcs.sort()
  return min_range_funcs


def check_good_not_bad(decider, good, bad):
  """Check if bad prof becomes GOOD by adding funcs it lacks from good prof"""
  bad_copy = bad.copy()
  for func in good:
    if func not in bad:
      bad_copy[func] = good[func]
  return decider.run(bad_copy) == StatusEnum.GOOD_STATUS


def check_bad_not_good(decider, good, bad):
  """Check if good prof BAD after adding funcs bad prof has that good doesnt"""
  good_copy = good.copy()
  for func in bad:
    if func not in good:
      good_copy[func] = bad[func]
  return decider.run(good_copy) == StatusEnum.BAD_STATUS


def parse_args():
  parser = argparse.ArgumentParser(
      description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
  parser.add_argument(
      '--good_prof',
      required=True,
      help='Text-based "Good" profile for analysis')
  parser.add_argument(
      '--bad_prof', required=True, help='Text-based "Bad" profile for analysis')
  parser.add_argument(
      '--external_decider',
      required=True,
      help='External script that, given an AFDO profile, returns '
      'GOOD/BAD/SKIP')
  parser.add_argument(
      '--analysis_output_file',
      required=True,
      help='File to output JSON results to')
  parser.add_argument(
      '--state_file',
      default='%s/afdo_analysis_state.json' % os.getcwd(),
      help='File path containing state to load from initially, and will be '
      'overwritten with new state on each iteration')
  parser.add_argument(
      '--no_resume',
      action='store_true',
      help='If enabled, no initial state will be loaded and the program will '
      'run from the beginning')
  parser.add_argument(
      '--remove_state_on_completion',
      action='store_true',
      help='If enabled, state file will be removed once profile analysis is '
      'completed')
  parser.add_argument(
      '--seed', type=float, help='Float specifying seed for randomness')
  return parser.parse_args()


def main(flags):
  logging.getLogger().setLevel(logging.INFO)
  if not flags.no_resume and flags.seed:  # conflicting seeds
    raise RuntimeError('Ambiguous seed value; do not resume from existing '
                       'state and also specify seed by command line flag')

  decider = DeciderState(
      flags.state_file, flags.external_decider, seed=flags.seed)
  if not flags.no_resume:
    decider.load_state()
  random.seed(decider.seed)

  with open(flags.good_prof) as good_f:
    good_items = text_to_json(good_f)
  with open(flags.bad_prof) as bad_f:
    bad_items = text_to_json(bad_f)

  bisect_results = bisect_profiles_wrapper(decider, good_items, bad_items)
  gnb_result = check_good_not_bad(decider, good_items, bad_items)
  bng_result = check_bad_not_good(decider, good_items, bad_items)

  results = {
      'seed': decider.seed,
      'bisect_results': bisect_results,
      'good_only_functions': gnb_result,
      'bad_only_functions': bng_result
  }
  with open(flags.analysis_output_file, 'w', encoding='utf-8') as f:
    json.dump(results, f, indent=2)
  if flags.remove_state_on_completion:
    os.remove(flags.state_file)
    logging.info('Removed state file %s following completion of script...',
                 flags.state_file)
  else:
    completed_state_file = '%s.completed.%s' % (flags.state_file,
                                                str(date.today()))
    os.rename(flags.state_file, completed_state_file)
    logging.info('Stored completed state file as %s...', completed_state_file)
  return results


if __name__ == '__main__':
  main(parse_args())