• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# -*- coding: utf-8 -*-
2# Copyright 2020 The Chromium OS Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5#
6# This script is used to help the compiler wrapper in the ChromeOS and
7# Android build systems bisect for bad object files.
8
9"""Utilities for bisection of ChromeOS and Android object files.
10
11This module contains a set of utilities to allow bisection between
12two sets (good and bad) of object files. Mostly used to find compiler
13bugs.
14
15Reference page:
16https://sites.google.com/a/google.com/chromeos-toolchain-team-home2/home/team-tools-and-scripts/bisecting-chromeos-compiler-problems/bisection-compiler-wrapper
17
18Design doc:
19https://docs.google.com/document/d/1yDgaUIa2O5w6dc3sSTe1ry-1ehKajTGJGQCbyn0fcEM
20"""
21
22from __future__ import print_function
23
24import contextlib
25import fcntl
26import os
27import shutil
28import subprocess
29import stat
30import sys
31
32VALID_MODES = ('POPULATE_GOOD', 'POPULATE_BAD', 'TRIAGE')
33GOOD_CACHE = 'good'
34BAD_CACHE = 'bad'
35LIST_FILE = os.path.join(GOOD_CACHE, '_LIST')
36
37CONTINUE_ON_MISSING = os.environ.get('BISECT_CONTINUE_ON_MISSING', None) == '1'
38CONTINUE_ON_REDUNDANCY = os.environ.get('BISECT_CONTINUE_ON_REDUNDANCY',
39                                        None) == '1'
40WRAPPER_SAFE_MODE = os.environ.get('BISECT_WRAPPER_SAFE_MODE', None) == '1'
41
42
43class Error(Exception):
44  """The general compiler wrapper error class."""
45
46
47@contextlib.contextmanager
48def lock_file(path, mode):
49  """Lock file and block if other process has lock on file.
50
51  Acquire exclusive lock for file. Only blocks other processes if they attempt
52  to also acquire lock through this method. If only reading (modes 'r' and 'rb')
53  then the lock is shared (i.e. many reads can happen concurrently, but only one
54  process may write at a time).
55
56  This function is a contextmanager, meaning it's meant to be used with the
57  "with" statement in Python. This is so cleanup and setup happens automatically
58  and cleanly. Execution of the outer "with" statement happens at the "yield"
59  statement. Execution resumes after the yield when the outer "with" statement
60  ends.
61
62  Args:
63    path: path to file being locked
64    mode: mode to open file with ('w', 'r', etc.)
65  """
66  with open(path, mode) as f:
67    # Apply FD_CLOEXEC argument to fd. This ensures that the file descriptor
68    # won't be leaked to any child processes.
69    current_args = fcntl.fcntl(f.fileno(), fcntl.F_GETFD)
70    fcntl.fcntl(f.fileno(), fcntl.F_SETFD, current_args | fcntl.FD_CLOEXEC)
71
72    # Reads can share the lock as no race conditions exist. If write is needed,
73    # give writing process exclusive access to the file.
74    if f.mode == 'r' or f.mode == 'rb':
75      lock_type = fcntl.LOCK_SH
76    else:
77      lock_type = fcntl.LOCK_EX
78
79    try:
80      fcntl.lockf(f, lock_type)
81      yield f
82      f.flush()
83    finally:
84      fcntl.lockf(f, fcntl.LOCK_UN)
85
86
87def log_to_file(path, execargs, link_from=None, link_to=None):
88  """Common logging function.
89
90  Log current working directory, current execargs, and a from-to relationship
91  between files.
92  """
93  with lock_file(path, 'a') as log:
94    log.write('cd: %s; %s\n' % (os.getcwd(), ' '.join(execargs)))
95    if link_from and link_to:
96      log.write('%s -> %s\n' % (link_from, link_to))
97
98
99def exec_and_return(execargs):
100  """Execute process and return.
101
102  Execute according to execargs and return immediately. Don't inspect
103  stderr or stdout.
104  """
105  return subprocess.call(execargs)
106
107
108def which_cache(obj_file):
109  """Determine which cache an object belongs to.
110
111  The binary search tool creates two files for each search iteration listing
112  the full set of bad objects and full set of good objects. We use this to
113  determine where an object file should be linked from (good or bad).
114  """
115  bad_set_file = os.environ.get('BISECT_BAD_SET')
116  if in_object_list(obj_file, bad_set_file):
117    return BAD_CACHE
118  else:
119    return GOOD_CACHE
120
121
122def makedirs(path):
123  """Try to create directories in path."""
124  try:
125    os.makedirs(path)
126  except os.error:
127    if not os.path.isdir(path):
128      raise
129
130
131def get_obj_path(execargs):
132  """Get the object path for the object file in the list of arguments.
133
134  Returns:
135    Absolute object path from execution args (-o argument). If no object being
136    outputted, then return empty string. -o argument is checked only if -c is
137    also present.
138  """
139  try:
140    i = execargs.index('-o')
141    _ = execargs.index('-c')
142  except ValueError:
143    return ''
144
145  obj_path = execargs[i + 1]
146  # Ignore args that do not create a file.
147  if obj_path in (
148      '-',
149      '/dev/null',
150  ):
151    return ''
152  # Ignore files ending in .tmp.
153  if obj_path.endswith(('.tmp',)):
154    return ''
155  # Ignore configuration files generated by Automake/Autoconf/CMake etc.
156  if (obj_path.endswith('conftest.o') or
157      obj_path.endswith('CMakeFiles/test.o') or
158      obj_path.find('CMakeTmp') != -1 or
159      os.path.abspath(obj_path).find('CMakeTmp') != -1):
160    return ''
161
162  return os.path.abspath(obj_path)
163
164
165def get_dep_path(execargs):
166  """Get the dep file path for the dep file in the list of arguments.
167
168  Returns:
169    Absolute path of dependency file path from execution args (-o argument). If
170    no dependency being outputted then return empty string.
171  """
172  if '-MD' not in execargs and '-MMD' not in execargs:
173    return ''
174
175  # If -MF is given this is the path of the dependency file. Otherwise the
176  # dependency file is the value of -o but with a .d extension
177  if '-MF' in execargs:
178    i = execargs.index('-MF')
179    dep_path = execargs[i + 1]
180    return os.path.abspath(dep_path)
181
182  full_obj_path = get_obj_path(execargs)
183  if not full_obj_path:
184    return ''
185
186  return full_obj_path[:-2] + '.d'
187
188
189def get_dwo_path(execargs):
190  """Get the dwo file path for the dwo file in the list of arguments.
191
192  Returns:
193    Absolute dwo file path from execution args (-gsplit-dwarf argument) If no
194    dwo file being outputted then return empty string.
195  """
196  if '-gsplit-dwarf' not in execargs:
197    return ''
198
199  full_obj_path = get_obj_path(execargs)
200  if not full_obj_path:
201    return ''
202
203  return full_obj_path[:-2] + '.dwo'
204
205
206def in_object_list(obj_name, list_filename):
207  """Check if object file name exist in file with object list."""
208  if not obj_name:
209    return False
210
211  with lock_file(list_filename, 'r') as list_file:
212    for line in list_file:
213      if line.strip() == obj_name:
214        return True
215
216    return False
217
218
219def get_side_effects(execargs):
220  """Determine side effects generated by compiler
221
222  Returns:
223    List of paths of objects that the compiler generates as side effects.
224  """
225  side_effects = []
226
227  # Cache dependency files
228  full_dep_path = get_dep_path(execargs)
229  if full_dep_path:
230    side_effects.append(full_dep_path)
231
232  # Cache dwo files
233  full_dwo_path = get_dwo_path(execargs)
234  if full_dwo_path:
235    side_effects.append(full_dwo_path)
236
237  return side_effects
238
239
240def cache_file(execargs, bisect_dir, cache, abs_file_path):
241  """Cache compiler output file (.o/.d/.dwo).
242
243  Args:
244    execargs: compiler execution arguments.
245    bisect_dir: The directory where bisection caches live.
246    cache: Which cache the file will be cached to (GOOD/BAD).
247    abs_file_path: Absolute path to file being cached.
248
249  Returns:
250    True if caching was successful, False otherwise.
251  """
252  # os.path.join fails with absolute paths, use + instead
253  bisect_path = os.path.join(bisect_dir, cache) + abs_file_path
254  bisect_path_dir = os.path.dirname(bisect_path)
255  makedirs(bisect_path_dir)
256  pop_log = os.path.join(bisect_dir, cache, '_POPULATE_LOG')
257  log_to_file(pop_log, execargs, abs_file_path, bisect_path)
258
259  try:
260    if os.path.exists(abs_file_path):
261      if os.path.exists(bisect_path):
262        # File exists
263        population_dir = os.path.join(bisect_dir, cache)
264        with lock_file(os.path.join(population_dir, '_DUPS'),
265                       'a') as dup_object_list:
266          dup_object_list.write('%s\n' % abs_file_path)
267        if CONTINUE_ON_REDUNDANCY:
268          return True
269        raise Exception(
270            'Trying to cache file %s multiple times. To avoid the error, set ' \
271            'BISECT_CONTINUE_ON_REDUNDANCY to 1. For reference, the list of ' \
272            'such files will be written to %s' % (abs_file_path, os.path.join(
273                population_dir, '_DUPS')))
274
275      shutil.copy2(abs_file_path, bisect_path)
276      # Set cache object to be read-only so later compilations can't
277      # accidentally overwrite it.
278      os.chmod(bisect_path, 0o444)
279      return True
280    else:
281      # File not found (happens when compilation fails but error code is still
282      # 0)
283      return False
284  except Exception:
285    print('Could not cache file %s' % abs_file_path, file=sys.stderr)
286    raise
287
288
289def restore_file(bisect_dir, cache, abs_file_path):
290  """Restore file from cache (.o/.d/.dwo).
291
292  Args:
293    bisect_dir: The directory where bisection caches live.
294    cache: Which cache the file will be restored from (GOOD/BAD).
295    abs_file_path: Absolute path to file being restored.
296  """
297  # os.path.join fails with absolute paths, use + instead
298  cached_path = os.path.join(bisect_dir, cache) + abs_file_path
299  if os.path.exists(cached_path):
300    if os.path.exists(abs_file_path):
301      os.remove(abs_file_path)
302    shutil.copy2(cached_path, abs_file_path)
303    # Add write permission to the restored object files as some packages
304    # (such as kernels) may need write permission to delete files.
305    os.chmod(abs_file_path, os.stat(abs_file_path).st_mode | stat.S_IWUSR)
306  else:
307    raise Error(('%s is missing from %s cache! Unsure how to proceed. Make '
308                 'will now crash.' % (cache, cached_path)))
309
310
311def bisect_populate(execargs, bisect_dir, population_name):
312  """Add necessary information to the bisect cache for the given execution.
313
314  Extract the necessary information for bisection from the compiler
315  execution arguments and put it into the bisection cache. This
316  includes copying the created object file, adding the object
317  file path to the cache list and keeping a log of the execution.
318
319  Args:
320    execargs: compiler execution arguments.
321    bisect_dir: bisection directory.
322    population_name: name of the cache being populated (good/bad).
323  """
324  retval = exec_and_return(execargs)
325  if retval:
326    return retval
327
328  full_obj_path = get_obj_path(execargs)
329  # This is not a normal compiler call because it doesn't have a -o argument,
330  # or the -o argument has an unusable output file.
331  # It's likely that this compiler call was actually made to invoke the linker,
332  # or as part of a configuratoin test. In this case we want to simply call the
333  # compiler and return.
334  if not full_obj_path:
335    return retval
336
337  # Return if not able to cache the object file
338  if not cache_file(execargs, bisect_dir, population_name, full_obj_path):
339    return retval
340
341  population_dir = os.path.join(bisect_dir, population_name)
342  with lock_file(os.path.join(population_dir, '_LIST'), 'a') as object_list:
343    object_list.write('%s\n' % full_obj_path)
344
345  for side_effect in get_side_effects(execargs):
346    _ = cache_file(execargs, bisect_dir, population_name, side_effect)
347
348  return retval
349
350
351def bisect_triage(execargs, bisect_dir):
352  """Use object object file from appropriate cache (good/bad).
353
354  Given a populated bisection directory, use the object file saved
355  into one of the caches (good/bad) according to what is specified
356  in the good/bad sets. The good/bad sets are generated by the
357  high level binary search tool. Additionally restore any possible
358  side effects of compiler.
359
360  Args:
361    execargs: compiler execution arguments.
362    bisect_dir: populated bisection directory.
363  """
364  full_obj_path = get_obj_path(execargs)
365  obj_list = os.path.join(bisect_dir, LIST_FILE)
366
367  # If the output isn't an object file just call compiler
368  if not full_obj_path:
369    return exec_and_return(execargs)
370
371  # If this isn't a bisected object just call compiler
372  # This shouldn't happen!
373  if not in_object_list(full_obj_path, obj_list):
374    if CONTINUE_ON_MISSING:
375      log_file = os.path.join(bisect_dir, '_MISSING_CACHED_OBJ_LOG')
376      log_to_file(log_file, execargs, '? compiler', full_obj_path)
377      return exec_and_return(execargs)
378    else:
379      raise Error(('%s is missing from cache! To ignore export '
380                   'BISECT_CONTINUE_ON_MISSING=1. See documentation for more '
381                   'details on this option.' % full_obj_path))
382
383  cache = which_cache(full_obj_path)
384
385  # If using safe WRAPPER_SAFE_MODE option call compiler and overwrite the
386  # result from the good/bad cache. This option is safe and covers all compiler
387  # side effects, but is very slow!
388  if WRAPPER_SAFE_MODE:
389    retval = exec_and_return(execargs)
390    if retval:
391      return retval
392    os.remove(full_obj_path)
393    restore_file(bisect_dir, cache, full_obj_path)
394    return retval
395
396  # Generate compiler side effects. Trick Make into thinking compiler was
397  # actually executed.
398  for side_effect in get_side_effects(execargs):
399    restore_file(bisect_dir, cache, side_effect)
400
401  # If generated object file happened to be pruned/cleaned by Make then link it
402  # over from cache again.
403  if not os.path.exists(full_obj_path):
404    restore_file(bisect_dir, cache, full_obj_path)
405
406  return 0
407
408
409def bisect_driver(bisect_stage, bisect_dir, execargs):
410  """Call appropriate bisection stage according to value in bisect_stage."""
411  if bisect_stage == 'POPULATE_GOOD':
412    return bisect_populate(execargs, bisect_dir, GOOD_CACHE)
413  elif bisect_stage == 'POPULATE_BAD':
414    return bisect_populate(execargs, bisect_dir, BAD_CACHE)
415  elif bisect_stage == 'TRIAGE':
416    return bisect_triage(execargs, bisect_dir)
417  else:
418    raise ValueError('wrong value for BISECT_STAGE: %s' % bisect_stage)
419