1# -*- coding: utf-8 -*- 2# Copyright 2020 The Chromium OS Authors. All rights reserved. 3# Use of this source code is governed by a BSD-style license that can be 4# found in the LICENSE file. 5# 6# This script is used to help the compiler wrapper in the ChromeOS and 7# Android build systems bisect for bad object files. 8 9"""Utilities for bisection of ChromeOS and Android object files. 10 11This module contains a set of utilities to allow bisection between 12two sets (good and bad) of object files. Mostly used to find compiler 13bugs. 14 15Reference page: 16https://sites.google.com/a/google.com/chromeos-toolchain-team-home2/home/team-tools-and-scripts/bisecting-chromeos-compiler-problems/bisection-compiler-wrapper 17 18Design doc: 19https://docs.google.com/document/d/1yDgaUIa2O5w6dc3sSTe1ry-1ehKajTGJGQCbyn0fcEM 20""" 21 22from __future__ import print_function 23 24import contextlib 25import fcntl 26import os 27import shutil 28import subprocess 29import stat 30import sys 31 32VALID_MODES = ('POPULATE_GOOD', 'POPULATE_BAD', 'TRIAGE') 33GOOD_CACHE = 'good' 34BAD_CACHE = 'bad' 35LIST_FILE = os.path.join(GOOD_CACHE, '_LIST') 36 37CONTINUE_ON_MISSING = os.environ.get('BISECT_CONTINUE_ON_MISSING', None) == '1' 38CONTINUE_ON_REDUNDANCY = os.environ.get('BISECT_CONTINUE_ON_REDUNDANCY', 39 None) == '1' 40WRAPPER_SAFE_MODE = os.environ.get('BISECT_WRAPPER_SAFE_MODE', None) == '1' 41 42 43class Error(Exception): 44 """The general compiler wrapper error class.""" 45 46 47@contextlib.contextmanager 48def lock_file(path, mode): 49 """Lock file and block if other process has lock on file. 50 51 Acquire exclusive lock for file. Only blocks other processes if they attempt 52 to also acquire lock through this method. If only reading (modes 'r' and 'rb') 53 then the lock is shared (i.e. many reads can happen concurrently, but only one 54 process may write at a time). 55 56 This function is a contextmanager, meaning it's meant to be used with the 57 "with" statement in Python. This is so cleanup and setup happens automatically 58 and cleanly. Execution of the outer "with" statement happens at the "yield" 59 statement. Execution resumes after the yield when the outer "with" statement 60 ends. 61 62 Args: 63 path: path to file being locked 64 mode: mode to open file with ('w', 'r', etc.) 65 """ 66 with open(path, mode) as f: 67 # Apply FD_CLOEXEC argument to fd. This ensures that the file descriptor 68 # won't be leaked to any child processes. 69 current_args = fcntl.fcntl(f.fileno(), fcntl.F_GETFD) 70 fcntl.fcntl(f.fileno(), fcntl.F_SETFD, current_args | fcntl.FD_CLOEXEC) 71 72 # Reads can share the lock as no race conditions exist. If write is needed, 73 # give writing process exclusive access to the file. 74 if f.mode == 'r' or f.mode == 'rb': 75 lock_type = fcntl.LOCK_SH 76 else: 77 lock_type = fcntl.LOCK_EX 78 79 try: 80 fcntl.lockf(f, lock_type) 81 yield f 82 f.flush() 83 finally: 84 fcntl.lockf(f, fcntl.LOCK_UN) 85 86 87def log_to_file(path, execargs, link_from=None, link_to=None): 88 """Common logging function. 89 90 Log current working directory, current execargs, and a from-to relationship 91 between files. 92 """ 93 with lock_file(path, 'a') as log: 94 log.write('cd: %s; %s\n' % (os.getcwd(), ' '.join(execargs))) 95 if link_from and link_to: 96 log.write('%s -> %s\n' % (link_from, link_to)) 97 98 99def exec_and_return(execargs): 100 """Execute process and return. 101 102 Execute according to execargs and return immediately. Don't inspect 103 stderr or stdout. 104 """ 105 return subprocess.call(execargs) 106 107 108def which_cache(obj_file): 109 """Determine which cache an object belongs to. 110 111 The binary search tool creates two files for each search iteration listing 112 the full set of bad objects and full set of good objects. We use this to 113 determine where an object file should be linked from (good or bad). 114 """ 115 bad_set_file = os.environ.get('BISECT_BAD_SET') 116 if in_object_list(obj_file, bad_set_file): 117 return BAD_CACHE 118 else: 119 return GOOD_CACHE 120 121 122def makedirs(path): 123 """Try to create directories in path.""" 124 try: 125 os.makedirs(path) 126 except os.error: 127 if not os.path.isdir(path): 128 raise 129 130 131def get_obj_path(execargs): 132 """Get the object path for the object file in the list of arguments. 133 134 Returns: 135 Absolute object path from execution args (-o argument). If no object being 136 outputted, then return empty string. -o argument is checked only if -c is 137 also present. 138 """ 139 try: 140 i = execargs.index('-o') 141 _ = execargs.index('-c') 142 except ValueError: 143 return '' 144 145 obj_path = execargs[i + 1] 146 # Ignore args that do not create a file. 147 if obj_path in ( 148 '-', 149 '/dev/null', 150 ): 151 return '' 152 # Ignore files ending in .tmp. 153 if obj_path.endswith(('.tmp',)): 154 return '' 155 # Ignore configuration files generated by Automake/Autoconf/CMake etc. 156 if (obj_path.endswith('conftest.o') or 157 obj_path.endswith('CMakeFiles/test.o') or 158 obj_path.find('CMakeTmp') != -1 or 159 os.path.abspath(obj_path).find('CMakeTmp') != -1): 160 return '' 161 162 return os.path.abspath(obj_path) 163 164 165def get_dep_path(execargs): 166 """Get the dep file path for the dep file in the list of arguments. 167 168 Returns: 169 Absolute path of dependency file path from execution args (-o argument). If 170 no dependency being outputted then return empty string. 171 """ 172 if '-MD' not in execargs and '-MMD' not in execargs: 173 return '' 174 175 # If -MF is given this is the path of the dependency file. Otherwise the 176 # dependency file is the value of -o but with a .d extension 177 if '-MF' in execargs: 178 i = execargs.index('-MF') 179 dep_path = execargs[i + 1] 180 return os.path.abspath(dep_path) 181 182 full_obj_path = get_obj_path(execargs) 183 if not full_obj_path: 184 return '' 185 186 return full_obj_path[:-2] + '.d' 187 188 189def get_dwo_path(execargs): 190 """Get the dwo file path for the dwo file in the list of arguments. 191 192 Returns: 193 Absolute dwo file path from execution args (-gsplit-dwarf argument) If no 194 dwo file being outputted then return empty string. 195 """ 196 if '-gsplit-dwarf' not in execargs: 197 return '' 198 199 full_obj_path = get_obj_path(execargs) 200 if not full_obj_path: 201 return '' 202 203 return full_obj_path[:-2] + '.dwo' 204 205 206def in_object_list(obj_name, list_filename): 207 """Check if object file name exist in file with object list.""" 208 if not obj_name: 209 return False 210 211 with lock_file(list_filename, 'r') as list_file: 212 for line in list_file: 213 if line.strip() == obj_name: 214 return True 215 216 return False 217 218 219def get_side_effects(execargs): 220 """Determine side effects generated by compiler 221 222 Returns: 223 List of paths of objects that the compiler generates as side effects. 224 """ 225 side_effects = [] 226 227 # Cache dependency files 228 full_dep_path = get_dep_path(execargs) 229 if full_dep_path: 230 side_effects.append(full_dep_path) 231 232 # Cache dwo files 233 full_dwo_path = get_dwo_path(execargs) 234 if full_dwo_path: 235 side_effects.append(full_dwo_path) 236 237 return side_effects 238 239 240def cache_file(execargs, bisect_dir, cache, abs_file_path): 241 """Cache compiler output file (.o/.d/.dwo). 242 243 Args: 244 execargs: compiler execution arguments. 245 bisect_dir: The directory where bisection caches live. 246 cache: Which cache the file will be cached to (GOOD/BAD). 247 abs_file_path: Absolute path to file being cached. 248 249 Returns: 250 True if caching was successful, False otherwise. 251 """ 252 # os.path.join fails with absolute paths, use + instead 253 bisect_path = os.path.join(bisect_dir, cache) + abs_file_path 254 bisect_path_dir = os.path.dirname(bisect_path) 255 makedirs(bisect_path_dir) 256 pop_log = os.path.join(bisect_dir, cache, '_POPULATE_LOG') 257 log_to_file(pop_log, execargs, abs_file_path, bisect_path) 258 259 try: 260 if os.path.exists(abs_file_path): 261 if os.path.exists(bisect_path): 262 # File exists 263 population_dir = os.path.join(bisect_dir, cache) 264 with lock_file(os.path.join(population_dir, '_DUPS'), 265 'a') as dup_object_list: 266 dup_object_list.write('%s\n' % abs_file_path) 267 if CONTINUE_ON_REDUNDANCY: 268 return True 269 raise Exception( 270 'Trying to cache file %s multiple times. To avoid the error, set ' \ 271 'BISECT_CONTINUE_ON_REDUNDANCY to 1. For reference, the list of ' \ 272 'such files will be written to %s' % (abs_file_path, os.path.join( 273 population_dir, '_DUPS'))) 274 275 shutil.copy2(abs_file_path, bisect_path) 276 # Set cache object to be read-only so later compilations can't 277 # accidentally overwrite it. 278 os.chmod(bisect_path, 0o444) 279 return True 280 else: 281 # File not found (happens when compilation fails but error code is still 282 # 0) 283 return False 284 except Exception: 285 print('Could not cache file %s' % abs_file_path, file=sys.stderr) 286 raise 287 288 289def restore_file(bisect_dir, cache, abs_file_path): 290 """Restore file from cache (.o/.d/.dwo). 291 292 Args: 293 bisect_dir: The directory where bisection caches live. 294 cache: Which cache the file will be restored from (GOOD/BAD). 295 abs_file_path: Absolute path to file being restored. 296 """ 297 # os.path.join fails with absolute paths, use + instead 298 cached_path = os.path.join(bisect_dir, cache) + abs_file_path 299 if os.path.exists(cached_path): 300 if os.path.exists(abs_file_path): 301 os.remove(abs_file_path) 302 shutil.copy2(cached_path, abs_file_path) 303 # Add write permission to the restored object files as some packages 304 # (such as kernels) may need write permission to delete files. 305 os.chmod(abs_file_path, os.stat(abs_file_path).st_mode | stat.S_IWUSR) 306 else: 307 raise Error(('%s is missing from %s cache! Unsure how to proceed. Make ' 308 'will now crash.' % (cache, cached_path))) 309 310 311def bisect_populate(execargs, bisect_dir, population_name): 312 """Add necessary information to the bisect cache for the given execution. 313 314 Extract the necessary information for bisection from the compiler 315 execution arguments and put it into the bisection cache. This 316 includes copying the created object file, adding the object 317 file path to the cache list and keeping a log of the execution. 318 319 Args: 320 execargs: compiler execution arguments. 321 bisect_dir: bisection directory. 322 population_name: name of the cache being populated (good/bad). 323 """ 324 retval = exec_and_return(execargs) 325 if retval: 326 return retval 327 328 full_obj_path = get_obj_path(execargs) 329 # This is not a normal compiler call because it doesn't have a -o argument, 330 # or the -o argument has an unusable output file. 331 # It's likely that this compiler call was actually made to invoke the linker, 332 # or as part of a configuratoin test. In this case we want to simply call the 333 # compiler and return. 334 if not full_obj_path: 335 return retval 336 337 # Return if not able to cache the object file 338 if not cache_file(execargs, bisect_dir, population_name, full_obj_path): 339 return retval 340 341 population_dir = os.path.join(bisect_dir, population_name) 342 with lock_file(os.path.join(population_dir, '_LIST'), 'a') as object_list: 343 object_list.write('%s\n' % full_obj_path) 344 345 for side_effect in get_side_effects(execargs): 346 _ = cache_file(execargs, bisect_dir, population_name, side_effect) 347 348 return retval 349 350 351def bisect_triage(execargs, bisect_dir): 352 """Use object object file from appropriate cache (good/bad). 353 354 Given a populated bisection directory, use the object file saved 355 into one of the caches (good/bad) according to what is specified 356 in the good/bad sets. The good/bad sets are generated by the 357 high level binary search tool. Additionally restore any possible 358 side effects of compiler. 359 360 Args: 361 execargs: compiler execution arguments. 362 bisect_dir: populated bisection directory. 363 """ 364 full_obj_path = get_obj_path(execargs) 365 obj_list = os.path.join(bisect_dir, LIST_FILE) 366 367 # If the output isn't an object file just call compiler 368 if not full_obj_path: 369 return exec_and_return(execargs) 370 371 # If this isn't a bisected object just call compiler 372 # This shouldn't happen! 373 if not in_object_list(full_obj_path, obj_list): 374 if CONTINUE_ON_MISSING: 375 log_file = os.path.join(bisect_dir, '_MISSING_CACHED_OBJ_LOG') 376 log_to_file(log_file, execargs, '? compiler', full_obj_path) 377 return exec_and_return(execargs) 378 else: 379 raise Error(('%s is missing from cache! To ignore export ' 380 'BISECT_CONTINUE_ON_MISSING=1. See documentation for more ' 381 'details on this option.' % full_obj_path)) 382 383 cache = which_cache(full_obj_path) 384 385 # If using safe WRAPPER_SAFE_MODE option call compiler and overwrite the 386 # result from the good/bad cache. This option is safe and covers all compiler 387 # side effects, but is very slow! 388 if WRAPPER_SAFE_MODE: 389 retval = exec_and_return(execargs) 390 if retval: 391 return retval 392 os.remove(full_obj_path) 393 restore_file(bisect_dir, cache, full_obj_path) 394 return retval 395 396 # Generate compiler side effects. Trick Make into thinking compiler was 397 # actually executed. 398 for side_effect in get_side_effects(execargs): 399 restore_file(bisect_dir, cache, side_effect) 400 401 # If generated object file happened to be pruned/cleaned by Make then link it 402 # over from cache again. 403 if not os.path.exists(full_obj_path): 404 restore_file(bisect_dir, cache, full_obj_path) 405 406 return 0 407 408 409def bisect_driver(bisect_stage, bisect_dir, execargs): 410 """Call appropriate bisection stage according to value in bisect_stage.""" 411 if bisect_stage == 'POPULATE_GOOD': 412 return bisect_populate(execargs, bisect_dir, GOOD_CACHE) 413 elif bisect_stage == 'POPULATE_BAD': 414 return bisect_populate(execargs, bisect_dir, BAD_CACHE) 415 elif bisect_stage == 'TRIAGE': 416 return bisect_triage(execargs, bisect_dir) 417 else: 418 raise ValueError('wrong value for BISECT_STAGE: %s' % bisect_stage) 419