1# Copyright 2016 Googie Inc. All rights Reserved. 2# 3# This script is used to help the compiler wrapper in the ChromeOS and 4# Android build systems bisect for bad object files. 5"""Utilities for bisection of ChromeOS and Android object files. 6 7This module contains a set of utilities to allow bisection between 8two sets (good and bad) of object files. Mostly used to find compiler 9bugs. 10 11Reference page: 12https://sites.google.com/a/google.com/chromeos-toolchain-team-home2/home/team-tools-and-scripts/bisecting-chromeos-compiler-problems/bisection-compiler-wrapper 13 14Design doc: 15https://docs.google.com/document/d/1yDgaUIa2O5w6dc3sSTe1ry-1ehKajTGJGQCbyn0fcEM 16""" 17 18from __future__ import print_function 19 20import contextlib 21import fcntl 22import os 23import shutil 24import subprocess 25import sys 26 27VALID_MODES = ('POPULATE_GOOD', 'POPULATE_BAD', 'TRIAGE') 28GOOD_CACHE = 'good' 29BAD_CACHE = 'bad' 30LIST_FILE = os.path.join(GOOD_CACHE, '_LIST') 31 32CONTINUE_ON_MISSING = os.environ.get('BISECT_CONTINUE_ON_MISSING', None) == '1' 33WRAPPER_SAFE_MODE = os.environ.get('BISECT_WRAPPER_SAFE_MODE', None) == '1' 34 35 36class Error(Exception): 37 """The general compiler wrapper error class.""" 38 39 40@contextlib.contextmanager 41def lock_file(path, mode): 42 """Lock file and block if other process has lock on file. 43 44 Acquire exclusive lock for file. Only blocks other processes if they attempt 45 to also acquire lock through this method. If only reading (modes 'r' and 'rb') 46 then the lock is shared (i.e. many reads can happen concurrently, but only one 47 process may write at a time). 48 49 This function is a contextmanager, meaning it's meant to be used with the 50 "with" statement in Python. This is so cleanup and setup happens automatically 51 and cleanly. Execution of the outer "with" statement happens at the "yield" 52 statement. Execution resumes after the yield when the outer "with" statement 53 ends. 54 55 Args: 56 path: path to file being locked 57 mode: mode to open file with ('w', 'r', etc.) 58 """ 59 with open(path, mode) as f: 60 # Apply FD_CLOEXEC argument to fd. This ensures that the file descriptor 61 # won't be leaked to any child processes. 62 current_args = fcntl.fcntl(f.fileno(), fcntl.F_GETFD) 63 fcntl.fcntl(f.fileno(), fcntl.F_SETFD, current_args | fcntl.FD_CLOEXEC) 64 65 # Reads can share the lock as no race conditions exist. If write is needed, 66 # give writing process exclusive access to the file. 67 if f.mode == 'r' or f.mode == 'rb': 68 lock_type = fcntl.LOCK_SH 69 else: 70 lock_type = fcntl.LOCK_EX 71 72 try: 73 fcntl.lockf(f, lock_type) 74 yield f 75 f.flush() 76 finally: 77 fcntl.lockf(f, fcntl.LOCK_UN) 78 79 80def log_to_file(path, execargs, link_from=None, link_to=None): 81 """Common logging function. 82 83 Log current working directory, current execargs, and a from-to relationship 84 between files. 85 """ 86 with lock_file(path, 'a') as log: 87 log.write('cd: %s; %s\n' % (os.getcwd(), ' '.join(execargs))) 88 if link_from and link_to: 89 log.write('%s -> %s\n' % (link_from, link_to)) 90 91 92def exec_and_return(execargs): 93 """Execute process and return. 94 95 Execute according to execargs and return immediately. Don't inspect 96 stderr or stdout. 97 """ 98 return subprocess.call(execargs) 99 100 101def which_cache(obj_file): 102 """Determine which cache an object belongs to. 103 104 The binary search tool creates two files for each search iteration listing 105 the full set of bad objects and full set of good objects. We use this to 106 determine where an object file should be linked from (good or bad). 107 """ 108 bad_set_file = os.environ.get('BISECT_BAD_SET') 109 if in_object_list(obj_file, bad_set_file): 110 return BAD_CACHE 111 else: 112 return GOOD_CACHE 113 114 115def makedirs(path): 116 """Try to create directories in path.""" 117 try: 118 os.makedirs(path) 119 except os.error: 120 if not os.path.isdir(path): 121 raise 122 123 124def get_obj_path(execargs): 125 """Get the object path for the object file in the list of arguments. 126 127 Returns: 128 Absolute object path from execution args (-o argument). If no object being 129 outputted, then return empty string. -o argument is checked only if -c is 130 also present. 131 """ 132 try: 133 i = execargs.index('-o') 134 _ = execargs.index('-c') 135 except ValueError: 136 return '' 137 138 obj_path = execargs[i + 1] 139 # Ignore args that do not create a file. 140 if obj_path in ( 141 '-', 142 '/dev/null',): 143 return '' 144 # Ignore files ending in .tmp. 145 if obj_path.endswith(('.tmp',)): 146 return '' 147 # Ignore configuration files generated by Automake/Autoconf/CMake etc. 148 if (obj_path.endswith('conftest.o') or 149 obj_path.endswith('CMakeFiles/test.o') or 150 obj_path.find('CMakeTmp') != -1 or 151 os.path.abspath(obj_path).find('CMakeTmp') != -1): 152 return '' 153 154 return os.path.abspath(obj_path) 155 156 157def get_dep_path(execargs): 158 """Get the dep file path for the dep file in the list of arguments. 159 160 Returns: 161 Absolute path of dependency file path from execution args (-o argument). If 162 no dependency being outputted then return empty string. 163 """ 164 if '-MD' not in execargs and '-MMD' not in execargs: 165 return '' 166 167 # If -MF is given this is the path of the dependency file. Otherwise the 168 # dependency file is the value of -o but with a .d extension 169 if '-MF' in execargs: 170 i = execargs.index('-MF') 171 dep_path = execargs[i + 1] 172 return os.path.abspath(dep_path) 173 174 full_obj_path = get_obj_path(execargs) 175 if not full_obj_path: 176 return '' 177 178 return full_obj_path[:-2] + '.d' 179 180 181def get_dwo_path(execargs): 182 """Get the dwo file path for the dwo file in the list of arguments. 183 184 Returns: 185 Absolute dwo file path from execution args (-gsplit-dwarf argument) If no 186 dwo file being outputted then return empty string. 187 """ 188 if '-gsplit-dwarf' not in execargs: 189 return '' 190 191 full_obj_path = get_obj_path(execargs) 192 if not full_obj_path: 193 return '' 194 195 return full_obj_path[:-2] + '.dwo' 196 197 198def in_object_list(obj_name, list_filename): 199 """Check if object file name exist in file with object list.""" 200 if not obj_name: 201 return False 202 203 with lock_file(list_filename, 'r') as list_file: 204 for line in list_file: 205 if line.strip() == obj_name: 206 return True 207 208 return False 209 210 211def get_side_effects(execargs): 212 """Determine side effects generated by compiler 213 214 Returns: 215 List of paths of objects that the compiler generates as side effects. 216 """ 217 side_effects = [] 218 219 # Cache dependency files 220 full_dep_path = get_dep_path(execargs) 221 if full_dep_path: 222 side_effects.append(full_dep_path) 223 224 # Cache dwo files 225 full_dwo_path = get_dwo_path(execargs) 226 if full_dwo_path: 227 side_effects.append(full_dwo_path) 228 229 return side_effects 230 231 232def cache_file(execargs, bisect_dir, cache, abs_file_path): 233 """Cache compiler output file (.o/.d/.dwo). 234 235 Args: 236 execargs: compiler execution arguments. 237 bisect_dir: The directory where bisection caches live. 238 cache: Which cache the file will be cached to (GOOD/BAD). 239 abs_file_path: Absolute path to file being cached. 240 Returns: 241 True if caching was successful, False otherwise. 242 """ 243 # os.path.join fails with absolute paths, use + instead 244 bisect_path = os.path.join(bisect_dir, cache) + abs_file_path 245 bisect_path_dir = os.path.dirname(bisect_path) 246 makedirs(bisect_path_dir) 247 pop_log = os.path.join(bisect_dir, cache, '_POPULATE_LOG') 248 log_to_file(pop_log, execargs, abs_file_path, bisect_path) 249 250 try: 251 if os.path.exists(abs_file_path): 252 if os.path.exists(bisect_path): 253 # File exists 254 population_dir = os.path.join(bisect_dir, cache) 255 with lock_file(os.path.join(population_dir, '_DUPS'), 256 'a') as dup_object_list: 257 dup_object_list.write('%s\n' % abs_file_path) 258 raise Exception( 259 'Trying to cache file %s multiple times.' % abs_file_path) 260 261 shutil.copy2(abs_file_path, bisect_path) 262 # Set cache object to be read-only so later compilations can't 263 # accidentally overwrite it. 264 os.chmod(bisect_path, 0o444) 265 return True 266 else: 267 # File not found (happens when compilation fails but error code is still 0) 268 return False 269 except Exception: 270 print('Could not cache file %s' % abs_file_path, file=sys.stderr) 271 raise 272 273 274def restore_file(bisect_dir, cache, abs_file_path): 275 """Restore file from cache (.o/.d/.dwo). 276 277 Args: 278 bisect_dir: The directory where bisection caches live. 279 cache: Which cache the file will be restored from (GOOD/BAD). 280 abs_file_path: Absolute path to file being restored. 281 """ 282 # os.path.join fails with absolute paths, use + instead 283 cached_path = os.path.join(bisect_dir, cache) + abs_file_path 284 if os.path.exists(cached_path): 285 if os.path.exists(abs_file_path): 286 os.remove(abs_file_path) 287 os.link(cached_path, abs_file_path) 288 else: 289 raise Error(('%s is missing from %s cache! Unsure how to proceed. Make ' 290 'will now crash.' % (cache, cached_path))) 291 292 293def bisect_populate(execargs, bisect_dir, population_name): 294 """Add necessary information to the bisect cache for the given execution. 295 296 Extract the necessary information for bisection from the compiler 297 execution arguments and put it into the bisection cache. This 298 includes copying the created object file, adding the object 299 file path to the cache list and keeping a log of the execution. 300 301 Args: 302 execargs: compiler execution arguments. 303 bisect_dir: bisection directory. 304 population_name: name of the cache being populated (good/bad). 305 """ 306 retval = exec_and_return(execargs) 307 if retval: 308 return retval 309 310 full_obj_path = get_obj_path(execargs) 311 # This is not a normal compiler call because it doesn't have a -o argument, 312 # or the -o argument has an unusable output file. 313 # It's likely that this compiler call was actually made to invoke the linker, 314 # or as part of a configuratoin test. In this case we want to simply call the 315 # compiler and return. 316 if not full_obj_path: 317 return retval 318 319 # Return if not able to cache the object file 320 if not cache_file(execargs, bisect_dir, population_name, full_obj_path): 321 return retval 322 323 population_dir = os.path.join(bisect_dir, population_name) 324 with lock_file(os.path.join(population_dir, '_LIST'), 'a') as object_list: 325 object_list.write('%s\n' % full_obj_path) 326 327 for side_effect in get_side_effects(execargs): 328 _ = cache_file(execargs, bisect_dir, population_name, side_effect) 329 330 return retval 331 332 333def bisect_triage(execargs, bisect_dir): 334 """Use object object file from appropriate cache (good/bad). 335 336 Given a populated bisection directory, use the object file saved 337 into one of the caches (good/bad) according to what is specified 338 in the good/bad sets. The good/bad sets are generated by the 339 high level binary search tool. Additionally restore any possible 340 side effects of compiler. 341 342 Args: 343 execargs: compiler execution arguments. 344 bisect_dir: populated bisection directory. 345 """ 346 full_obj_path = get_obj_path(execargs) 347 obj_list = os.path.join(bisect_dir, LIST_FILE) 348 349 # If the output isn't an object file just call compiler 350 if not full_obj_path: 351 return exec_and_return(execargs) 352 353 # If this isn't a bisected object just call compiler 354 # This shouldn't happen! 355 if not in_object_list(full_obj_path, obj_list): 356 if CONTINUE_ON_MISSING: 357 log_file = os.path.join(bisect_dir, '_MISSING_CACHED_OBJ_LOG') 358 log_to_file(log_file, execargs, '? compiler', full_obj_path) 359 return exec_and_return(execargs) 360 else: 361 raise Error(('%s is missing from cache! To ignore export ' 362 'BISECT_CONTINUE_ON_MISSING=1. See documentation for more ' 363 'details on this option.' % full_obj_path)) 364 365 cache = which_cache(full_obj_path) 366 367 # If using safe WRAPPER_SAFE_MODE option call compiler and overwrite the 368 # result from the good/bad cache. This option is safe and covers all compiler 369 # side effects, but is very slow! 370 if WRAPPER_SAFE_MODE: 371 retval = exec_and_return(execargs) 372 if retval: 373 return retval 374 os.remove(full_obj_path) 375 restore_file(bisect_dir, cache, full_obj_path) 376 return retval 377 378 # Generate compiler side effects. Trick Make into thinking compiler was 379 # actually executed. 380 for side_effect in get_side_effects(execargs): 381 restore_file(bisect_dir, cache, side_effect) 382 383 # If generated object file happened to be pruned/cleaned by Make then link it 384 # over from cache again. 385 if not os.path.exists(full_obj_path): 386 restore_file(bisect_dir, cache, full_obj_path) 387 388 return 0 389 390 391def bisect_driver(bisect_stage, bisect_dir, execargs): 392 """Call appropriate bisection stage according to value in bisect_stage.""" 393 if bisect_stage == 'POPULATE_GOOD': 394 return bisect_populate(execargs, bisect_dir, GOOD_CACHE) 395 elif bisect_stage == 'POPULATE_BAD': 396 return bisect_populate(execargs, bisect_dir, BAD_CACHE) 397 elif bisect_stage == 'TRIAGE': 398 return bisect_triage(execargs, bisect_dir) 399 else: 400 raise ValueError('wrong value for BISECT_STAGE: %s' % bisect_stage) 401