1#!/usr/bin/env python3 2# -*- coding: utf-8 -*- 3# Copyright 2019 The Chromium OS Authors. All rights reserved. 4# Use of this source code is governed by a BSD-style license that can be 5# found in the LICENSE file. 6 7"""AFDO Profile analysis tool. 8 9This script takes a good AFDO profile, a bad AFDO profile, and an external 10script which deems particular AFDO profiles as GOOD/BAD/SKIP, and an output 11file as arguments. Given these pieces of information, it analyzes the profiles 12to try and determine what exactly is bad about the bad profile. It does this 13with three main techniques: bisecting search, range search, and rough diff-ing. 14 15The external script communicates the 'goodness' of an AFDO profile through its 16exit code. The codes known to this script are: 17 - 0: the AFDO profile produced a good binary 18 - 1: the AFDO profile produced a bad binary 19 - 125: no result could be determined; just try another profile 20 - >127: quit immediately 21""" 22 23from __future__ import division, print_function 24 25import argparse 26import json 27# Pylint recommends we use "from chromite.lib import cros_logging as logging". 28# Chromite specific policy message, we want to keep using the standard logging 29# pylint: disable=cros-logging-import 30import logging 31import os 32import random 33import subprocess 34import time 35from datetime import date 36from enum import IntEnum 37from tempfile import mkstemp 38 39 40class StatusEnum(IntEnum): 41 """Enum of valid statuses returned by profile decider.""" 42 GOOD_STATUS = 0 43 BAD_STATUS = 1 44 SKIP_STATUS = 125 45 PROBLEM_STATUS = 127 46 47 48statuses = StatusEnum.__members__.values() 49 50_NUM_RUNS_RANGE_SEARCH = 20 # how many times range search should run its algo 51 52 53def json_to_text(json_prof): 54 text_profile = [] 55 for func in json_prof: 56 text_profile.append(func) 57 text_profile.append(json_prof[func]) 58 return ''.join(text_profile) 59 60 61def text_to_json(f): 62 """Performs basic parsing of an AFDO text-based profile. 63 64 This parsing expects an input file object with contents of the form generated 65 by bin/llvm-profdata (within an LLVM build). 66 """ 67 results = {} 68 curr_func = None 69 curr_data = [] 70 for line in f: 71 if not line.startswith(' '): 72 if curr_func: 73 results[curr_func] = ''.join(curr_data) 74 curr_data = [] 75 curr_func, rest = line.split(':', 1) 76 curr_func = curr_func.strip() 77 curr_data.append(':' + rest) 78 else: 79 curr_data.append(line) 80 81 if curr_func: 82 results[curr_func] = ''.join(curr_data) 83 return results 84 85 86def prof_to_tmp(prof): 87 """Creates (and returns) temp filename for given JSON-based AFDO profile.""" 88 fd, temp_path = mkstemp() 89 text_profile = json_to_text(prof) 90 with open(temp_path, 'w') as f: 91 f.write(text_profile) 92 os.close(fd) 93 return temp_path 94 95 96class DeciderState(object): 97 """Class for the external decider.""" 98 99 def __init__(self, state_file, external_decider, seed): 100 self.accumulated_results = [] # over this run of the script 101 self.external_decider = external_decider 102 self.saved_results = [] # imported from a previous run of this script 103 self.state_file = state_file 104 self.seed = seed if seed is not None else time.time() 105 106 def load_state(self): 107 if not os.path.exists(self.state_file): 108 logging.info('State file %s is empty, starting from beginning', 109 self.state_file) 110 return 111 112 with open(self.state_file, encoding='utf-8') as f: 113 try: 114 data = json.load(f) 115 except: 116 raise ValueError('Provided state file %s to resume from does not' 117 ' contain a valid JSON.' % self.state_file) 118 119 if 'seed' not in data or 'accumulated_results' not in data: 120 raise ValueError('Provided state file %s to resume from does not contain' 121 ' the correct information' % self.state_file) 122 123 self.seed = data['seed'] 124 self.saved_results = data['accumulated_results'] 125 logging.info('Restored state from %s...', self.state_file) 126 127 def save_state(self): 128 state = {'seed': self.seed, 'accumulated_results': self.accumulated_results} 129 tmp_file = self.state_file + '.new' 130 with open(tmp_file, 'w', encoding='utf-8') as f: 131 json.dump(state, f, indent=2) 132 os.rename(tmp_file, self.state_file) 133 logging.info('Logged state to %s...', self.state_file) 134 135 def run(self, prof, save_run=True): 136 """Run the external deciding script on the given profile.""" 137 if self.saved_results and save_run: 138 result = self.saved_results.pop(0) 139 self.accumulated_results.append(result) 140 self.save_state() 141 return StatusEnum(result) 142 143 filename = prof_to_tmp(prof) 144 145 try: 146 return_code = subprocess.call([self.external_decider, filename]) 147 finally: 148 os.remove(filename) 149 150 if return_code in statuses: 151 status = StatusEnum(return_code) 152 if status == StatusEnum.PROBLEM_STATUS: 153 prof_file = prof_to_tmp(prof) 154 raise RuntimeError('Provided decider script returned PROBLEM_STATUS ' 155 'when run on profile stored at %s. AFDO Profile ' 156 'analysis aborting' % prof_file) 157 if save_run: 158 self.accumulated_results.append(status.value) 159 logging.info('Run %d of external script %s returned %s', 160 len(self.accumulated_results), self.external_decider, 161 status.name) 162 self.save_state() 163 return status 164 raise ValueError( 165 'Provided external script had unexpected return code %d' % return_code) 166 167 168def bisect_profiles(decider, good, bad, common_funcs, lo, hi): 169 """Recursive function which bisects good and bad profiles. 170 171 Args: 172 decider: function which, given a JSON-based AFDO profile, returns an 173 element of 'statuses' based on the status of the profile 174 good: JSON-based good AFDO profile 175 bad: JSON-based bad AFDO profile 176 common_funcs: the list of functions which have top-level profiles in both 177 'good' and 'bad' 178 lo: lower bound of range being bisected on 179 hi: upper bound of range being bisected on 180 181 Returns a dictionary with two keys: 'individuals' and 'ranges'. 182 'individuals': a list of individual functions found to make the profile BAD 183 'ranges': a list of lists of function names. Each list of functions is a list 184 such that including all of those from the bad profile makes the good 185 profile BAD. It may not be the smallest problematic combination, but 186 definitely contains a problematic combination of profiles. 187 """ 188 189 results = {'individuals': [], 'ranges': []} 190 if hi - lo <= 1: 191 logging.info('Found %s as a problematic function profile', common_funcs[lo]) 192 results['individuals'].append(common_funcs[lo]) 193 return results 194 195 mid = (lo + hi) // 2 196 lo_mid_prof = good.copy() # covers bad from lo:mid 197 mid_hi_prof = good.copy() # covers bad from mid:hi 198 for func in common_funcs[lo:mid]: 199 lo_mid_prof[func] = bad[func] 200 for func in common_funcs[mid:hi]: 201 mid_hi_prof[func] = bad[func] 202 203 lo_mid_verdict = decider.run(lo_mid_prof) 204 mid_hi_verdict = decider.run(mid_hi_prof) 205 206 if lo_mid_verdict == StatusEnum.BAD_STATUS: 207 result = bisect_profiles(decider, good, bad, common_funcs, lo, mid) 208 results['individuals'].extend(result['individuals']) 209 results['ranges'].extend(result['ranges']) 210 if mid_hi_verdict == StatusEnum.BAD_STATUS: 211 result = bisect_profiles(decider, good, bad, common_funcs, mid, hi) 212 results['individuals'].extend(result['individuals']) 213 results['ranges'].extend(result['ranges']) 214 215 # neither half is bad -> the issue is caused by several things occuring 216 # in conjunction, and this combination crosses 'mid' 217 if lo_mid_verdict == mid_hi_verdict == StatusEnum.GOOD_STATUS: 218 problem_range = range_search(decider, good, bad, common_funcs, lo, hi) 219 if problem_range: 220 logging.info('Found %s as a problematic combination of profiles', 221 str(problem_range)) 222 results['ranges'].append(problem_range) 223 224 return results 225 226 227def bisect_profiles_wrapper(decider, good, bad, perform_check=True): 228 """Wrapper for recursive profile bisection.""" 229 230 # Validate good and bad profiles are such, otherwise bisection reports noise 231 # Note that while decider is a random mock, these assertions may fail. 232 if perform_check: 233 if decider.run(good, save_run=False) != StatusEnum.GOOD_STATUS: 234 raise ValueError('Supplied good profile is not actually GOOD') 235 if decider.run(bad, save_run=False) != StatusEnum.BAD_STATUS: 236 raise ValueError('Supplied bad profile is not actually BAD') 237 238 common_funcs = sorted(func for func in good if func in bad) 239 if not common_funcs: 240 return {'ranges': [], 'individuals': []} 241 242 # shuffle because the results of our analysis can be quite order-dependent 243 # but this list has no inherent ordering. By shuffling each time, the chances 244 # of finding new, potentially interesting results are increased each time 245 # the program is run 246 random.shuffle(common_funcs) 247 results = bisect_profiles(decider, good, bad, common_funcs, 0, 248 len(common_funcs)) 249 results['ranges'].sort() 250 results['individuals'].sort() 251 return results 252 253 254def range_search(decider, good, bad, common_funcs, lo, hi): 255 """Searches for problematic range crossing mid border. 256 257 The main inner algorithm is the following, which looks for the smallest 258 possible ranges with problematic combinations. It starts the upper bound at 259 the midpoint, and increments in halves until it gets a BAD profile. 260 Then, it increments the lower bound (in halves) until the resultant profile 261 is GOOD, and then we have a range that causes 'BAD'ness. 262 263 It does this _NUM_RUNS_RANGE_SEARCH times, and shuffles the functions being 264 looked at uniquely each time to try and get the smallest possible range 265 of functions in a reasonable timeframe. 266 """ 267 268 average = lambda x, y: int(round((x + y) // 2.0)) 269 270 def find_upper_border(good_copy, funcs, lo, hi, last_bad_val=None): 271 """Finds the upper border of problematic range.""" 272 mid = average(lo, hi) 273 if mid in (lo, hi): 274 return last_bad_val or hi 275 276 for func in funcs[lo:mid]: 277 good_copy[func] = bad[func] 278 verdict = decider.run(good_copy) 279 280 # reset for next iteration 281 for func in funcs: 282 good_copy[func] = good[func] 283 284 if verdict == StatusEnum.BAD_STATUS: 285 return find_upper_border(good_copy, funcs, lo, mid, mid) 286 return find_upper_border(good_copy, funcs, mid, hi, last_bad_val) 287 288 def find_lower_border(good_copy, funcs, lo, hi, last_bad_val=None): 289 """Finds the lower border of problematic range.""" 290 mid = average(lo, hi) 291 if mid in (lo, hi): 292 return last_bad_val or lo 293 294 for func in funcs[lo:mid]: 295 good_copy[func] = good[func] 296 verdict = decider.run(good_copy) 297 298 # reset for next iteration 299 for func in funcs: 300 good_copy[func] = bad[func] 301 302 if verdict == StatusEnum.BAD_STATUS: 303 return find_lower_border(good_copy, funcs, mid, hi, lo) 304 return find_lower_border(good_copy, funcs, lo, mid, last_bad_val) 305 306 lo_mid_funcs = [] 307 mid_hi_funcs = [] 308 min_range_funcs = [] 309 for _ in range(_NUM_RUNS_RANGE_SEARCH): 310 311 if min_range_funcs: # only examine range we've already narrowed to 312 random.shuffle(lo_mid_funcs) 313 random.shuffle(mid_hi_funcs) 314 else: # consider lo-mid and mid-hi separately bc must cross border 315 mid = (lo + hi) // 2 316 lo_mid_funcs = common_funcs[lo:mid] 317 mid_hi_funcs = common_funcs[mid:hi] 318 319 funcs = lo_mid_funcs + mid_hi_funcs 320 hi = len(funcs) 321 mid = len(lo_mid_funcs) 322 lo = 0 323 324 # because we need the problematic pair to pop up before we can narrow it 325 prof = good.copy() 326 for func in lo_mid_funcs: 327 prof[func] = bad[func] 328 329 upper_border = find_upper_border(prof, funcs, mid, hi) 330 for func in lo_mid_funcs + funcs[mid:upper_border]: 331 prof[func] = bad[func] 332 333 lower_border = find_lower_border(prof, funcs, lo, mid) 334 curr_range_funcs = funcs[lower_border:upper_border] 335 336 if not min_range_funcs or len(curr_range_funcs) < len(min_range_funcs): 337 min_range_funcs = curr_range_funcs 338 lo_mid_funcs = lo_mid_funcs[lo_mid_funcs.index(min_range_funcs[0]):] 339 mid_hi_funcs = mid_hi_funcs[:mid_hi_funcs.index(min_range_funcs[-1]) + 1] 340 if len(min_range_funcs) == 2: 341 min_range_funcs.sort() 342 return min_range_funcs # can't get any smaller 343 344 min_range_funcs.sort() 345 return min_range_funcs 346 347 348def check_good_not_bad(decider, good, bad): 349 """Check if bad prof becomes GOOD by adding funcs it lacks from good prof""" 350 bad_copy = bad.copy() 351 for func in good: 352 if func not in bad: 353 bad_copy[func] = good[func] 354 return decider.run(bad_copy) == StatusEnum.GOOD_STATUS 355 356 357def check_bad_not_good(decider, good, bad): 358 """Check if good prof BAD after adding funcs bad prof has that good doesnt""" 359 good_copy = good.copy() 360 for func in bad: 361 if func not in good: 362 good_copy[func] = bad[func] 363 return decider.run(good_copy) == StatusEnum.BAD_STATUS 364 365 366def parse_args(): 367 parser = argparse.ArgumentParser( 368 description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) 369 parser.add_argument( 370 '--good_prof', 371 required=True, 372 help='Text-based "Good" profile for analysis') 373 parser.add_argument( 374 '--bad_prof', required=True, help='Text-based "Bad" profile for analysis') 375 parser.add_argument( 376 '--external_decider', 377 required=True, 378 help='External script that, given an AFDO profile, returns ' 379 'GOOD/BAD/SKIP') 380 parser.add_argument( 381 '--analysis_output_file', 382 required=True, 383 help='File to output JSON results to') 384 parser.add_argument( 385 '--state_file', 386 default='%s/afdo_analysis_state.json' % os.getcwd(), 387 help='File path containing state to load from initially, and will be ' 388 'overwritten with new state on each iteration') 389 parser.add_argument( 390 '--no_resume', 391 action='store_true', 392 help='If enabled, no initial state will be loaded and the program will ' 393 'run from the beginning') 394 parser.add_argument( 395 '--remove_state_on_completion', 396 action='store_true', 397 help='If enabled, state file will be removed once profile analysis is ' 398 'completed') 399 parser.add_argument( 400 '--seed', type=float, help='Float specifying seed for randomness') 401 return parser.parse_args() 402 403 404def main(flags): 405 logging.getLogger().setLevel(logging.INFO) 406 if not flags.no_resume and flags.seed: # conflicting seeds 407 raise RuntimeError('Ambiguous seed value; do not resume from existing ' 408 'state and also specify seed by command line flag') 409 410 decider = DeciderState( 411 flags.state_file, flags.external_decider, seed=flags.seed) 412 if not flags.no_resume: 413 decider.load_state() 414 random.seed(decider.seed) 415 416 with open(flags.good_prof) as good_f: 417 good_items = text_to_json(good_f) 418 with open(flags.bad_prof) as bad_f: 419 bad_items = text_to_json(bad_f) 420 421 bisect_results = bisect_profiles_wrapper(decider, good_items, bad_items) 422 gnb_result = check_good_not_bad(decider, good_items, bad_items) 423 bng_result = check_bad_not_good(decider, good_items, bad_items) 424 425 results = { 426 'seed': decider.seed, 427 'bisect_results': bisect_results, 428 'good_only_functions': gnb_result, 429 'bad_only_functions': bng_result 430 } 431 with open(flags.analysis_output_file, 'w', encoding='utf-8') as f: 432 json.dump(results, f, indent=2) 433 if flags.remove_state_on_completion: 434 os.remove(flags.state_file) 435 logging.info('Removed state file %s following completion of script...', 436 flags.state_file) 437 else: 438 completed_state_file = '%s.completed.%s' % (flags.state_file, 439 str(date.today())) 440 os.rename(flags.state_file, completed_state_file) 441 logging.info('Stored completed state file as %s...', completed_state_file) 442 return results 443 444 445if __name__ == '__main__': 446 main(parse_args()) 447