1# -*- coding: utf-8 -*- 2# The LLVM Compiler Infrastructure 3# 4# This file is distributed under the University of Illinois Open Source 5# License. See LICENSE.TXT for details. 6""" This module is responsible to capture the compiler invocation of any 7build process. The result of that should be a compilation database. 8 9This implementation is using the LD_PRELOAD or DYLD_INSERT_LIBRARIES 10mechanisms provided by the dynamic linker. The related library is implemented 11in C language and can be found under 'libear' directory. 12 13The 'libear' library is capturing all child process creation and logging the 14relevant information about it into separate files in a specified directory. 15The parameter of this process is the output directory name, where the report 16files shall be placed. This parameter is passed as an environment variable. 17 18The module also implements compiler wrappers to intercept the compiler calls. 19 20The module implements the build command execution and the post-processing of 21the output files, which will condensates into a compilation database. """ 22 23import sys 24import os 25import os.path 26import re 27import itertools 28import json 29import glob 30import argparse 31import logging 32import subprocess 33from libear import build_libear, TemporaryDirectory 34from libscanbuild import command_entry_point 35from libscanbuild import duplicate_check, tempdir, initialize_logging 36from libscanbuild.compilation import split_command 37from libscanbuild.shell import encode, decode 38 39__all__ = ['capture', 'intercept_build_main', 'intercept_build_wrapper'] 40 41GS = chr(0x1d) 42RS = chr(0x1e) 43US = chr(0x1f) 44 45COMPILER_WRAPPER_CC = 'intercept-cc' 46COMPILER_WRAPPER_CXX = 'intercept-c++' 47 48 49@command_entry_point 50def intercept_build_main(bin_dir): 51 """ Entry point for 'intercept-build' command. """ 52 53 parser = create_parser() 54 args = parser.parse_args() 55 56 initialize_logging(args.verbose) 57 logging.debug('Parsed arguments: %s', args) 58 59 if not args.build: 60 parser.print_help() 61 return 0 62 63 return capture(args, bin_dir) 64 65 66def capture(args, bin_dir): 67 """ The entry point of build command interception. """ 68 69 def post_processing(commands): 70 """ To make a compilation database, it needs to filter out commands 71 which are not compiler calls. Needs to find the source file name 72 from the arguments. And do shell escaping on the command. 73 74 To support incremental builds, it is desired to read elements from 75 an existing compilation database from a previous run. These elements 76 shall be merged with the new elements. """ 77 78 # create entries from the current run 79 current = itertools.chain.from_iterable( 80 # creates a sequence of entry generators from an exec, 81 format_entry(command) for command in commands) 82 # read entries from previous run 83 if 'append' in args and args.append and os.path.isfile(args.cdb): 84 with open(args.cdb) as handle: 85 previous = iter(json.load(handle)) 86 else: 87 previous = iter([]) 88 # filter out duplicate entries from both 89 duplicate = duplicate_check(entry_hash) 90 return (entry 91 for entry in itertools.chain(previous, current) 92 if os.path.exists(entry['file']) and not duplicate(entry)) 93 94 with TemporaryDirectory(prefix='intercept-', dir=tempdir()) as tmp_dir: 95 # run the build command 96 environment = setup_environment(args, tmp_dir, bin_dir) 97 logging.debug('run build in environment: %s', environment) 98 exit_code = subprocess.call(args.build, env=environment) 99 logging.info('build finished with exit code: %d', exit_code) 100 # read the intercepted exec calls 101 exec_traces = itertools.chain.from_iterable( 102 parse_exec_trace(os.path.join(tmp_dir, filename)) 103 for filename in sorted(glob.iglob(os.path.join(tmp_dir, '*.cmd')))) 104 # do post processing only if that was requested 105 if 'raw_entries' not in args or not args.raw_entries: 106 entries = post_processing(exec_traces) 107 else: 108 entries = exec_traces 109 # dump the compilation database 110 with open(args.cdb, 'w+') as handle: 111 json.dump(list(entries), handle, sort_keys=True, indent=4) 112 return exit_code 113 114 115def setup_environment(args, destination, bin_dir): 116 """ Sets up the environment for the build command. 117 118 It sets the required environment variables and execute the given command. 119 The exec calls will be logged by the 'libear' preloaded library or by the 120 'wrapper' programs. """ 121 122 c_compiler = args.cc if 'cc' in args else 'cc' 123 cxx_compiler = args.cxx if 'cxx' in args else 'c++' 124 125 libear_path = None if args.override_compiler or is_preload_disabled( 126 sys.platform) else build_libear(c_compiler, destination) 127 128 environment = dict(os.environ) 129 environment.update({'INTERCEPT_BUILD_TARGET_DIR': destination}) 130 131 if not libear_path: 132 logging.debug('intercept gonna use compiler wrappers') 133 environment.update({ 134 'CC': os.path.join(bin_dir, COMPILER_WRAPPER_CC), 135 'CXX': os.path.join(bin_dir, COMPILER_WRAPPER_CXX), 136 'INTERCEPT_BUILD_CC': c_compiler, 137 'INTERCEPT_BUILD_CXX': cxx_compiler, 138 'INTERCEPT_BUILD_VERBOSE': 'DEBUG' if args.verbose > 2 else 'INFO' 139 }) 140 elif sys.platform == 'darwin': 141 logging.debug('intercept gonna preload libear on OSX') 142 environment.update({ 143 'DYLD_INSERT_LIBRARIES': libear_path, 144 'DYLD_FORCE_FLAT_NAMESPACE': '1' 145 }) 146 else: 147 logging.debug('intercept gonna preload libear on UNIX') 148 environment.update({'LD_PRELOAD': libear_path}) 149 150 return environment 151 152 153def intercept_build_wrapper(cplusplus): 154 """ Entry point for `intercept-cc` and `intercept-c++` compiler wrappers. 155 156 It does generate execution report into target directory. And execute 157 the wrapped compilation with the real compiler. The parameters for 158 report and execution are from environment variables. 159 160 Those parameters which for 'libear' library can't have meaningful 161 values are faked. """ 162 163 # initialize wrapper logging 164 logging.basicConfig(format='intercept: %(levelname)s: %(message)s', 165 level=os.getenv('INTERCEPT_BUILD_VERBOSE', 'INFO')) 166 # write report 167 try: 168 target_dir = os.getenv('INTERCEPT_BUILD_TARGET_DIR') 169 if not target_dir: 170 raise UserWarning('exec report target directory not found') 171 pid = str(os.getpid()) 172 target_file = os.path.join(target_dir, pid + '.cmd') 173 logging.debug('writing exec report to: %s', target_file) 174 with open(target_file, 'ab') as handler: 175 working_dir = os.getcwd() 176 command = US.join(sys.argv) + US 177 content = RS.join([pid, pid, 'wrapper', working_dir, command]) + GS 178 handler.write(content.encode('utf-8')) 179 except IOError: 180 logging.exception('writing exec report failed') 181 except UserWarning as warning: 182 logging.warning(warning) 183 # execute with real compiler 184 compiler = os.getenv('INTERCEPT_BUILD_CXX', 'c++') if cplusplus \ 185 else os.getenv('INTERCEPT_BUILD_CC', 'cc') 186 compilation = [compiler] + sys.argv[1:] 187 logging.debug('execute compiler: %s', compilation) 188 return subprocess.call(compilation) 189 190 191def parse_exec_trace(filename): 192 """ Parse the file generated by the 'libear' preloaded library. 193 194 Given filename points to a file which contains the basic report 195 generated by the interception library or wrapper command. A single 196 report file _might_ contain multiple process creation info. """ 197 198 logging.debug('parse exec trace file: %s', filename) 199 with open(filename, 'r') as handler: 200 content = handler.read() 201 for group in filter(bool, content.split(GS)): 202 records = group.split(RS) 203 yield { 204 'pid': records[0], 205 'ppid': records[1], 206 'function': records[2], 207 'directory': records[3], 208 'command': records[4].split(US)[:-1] 209 } 210 211 212def format_entry(exec_trace): 213 """ Generate the desired fields for compilation database entries. """ 214 215 def abspath(cwd, name): 216 """ Create normalized absolute path from input filename. """ 217 fullname = name if os.path.isabs(name) else os.path.join(cwd, name) 218 return os.path.normpath(fullname) 219 220 logging.debug('format this command: %s', exec_trace['command']) 221 compilation = split_command(exec_trace['command']) 222 if compilation: 223 for source in compilation.files: 224 compiler = 'c++' if compilation.compiler == 'c++' else 'cc' 225 command = [compiler, '-c'] + compilation.flags + [source] 226 logging.debug('formated as: %s', command) 227 yield { 228 'directory': exec_trace['directory'], 229 'command': encode(command), 230 'file': abspath(exec_trace['directory'], source) 231 } 232 233 234def is_preload_disabled(platform): 235 """ Library-based interposition will fail silently if SIP is enabled, 236 so this should be detected. You can detect whether SIP is enabled on 237 Darwin by checking whether (1) there is a binary called 'csrutil' in 238 the path and, if so, (2) whether the output of executing 'csrutil status' 239 contains 'System Integrity Protection status: enabled'. 240 241 Same problem on linux when SELinux is enabled. The status query program 242 'sestatus' and the output when it's enabled 'SELinux status: enabled'. """ 243 244 if platform == 'darwin': 245 pattern = re.compile(r'System Integrity Protection status:\s+enabled') 246 command = ['csrutil', 'status'] 247 elif platform in {'linux', 'linux2'}: 248 pattern = re.compile(r'SELinux status:\s+enabled') 249 command = ['sestatus'] 250 else: 251 return False 252 253 try: 254 lines = subprocess.check_output(command).decode('utf-8') 255 return any((pattern.match(line) for line in lines.splitlines())) 256 except: 257 return False 258 259 260def entry_hash(entry): 261 """ Implement unique hash method for compilation database entries. """ 262 263 # For faster lookup in set filename is reverted 264 filename = entry['file'][::-1] 265 # For faster lookup in set directory is reverted 266 directory = entry['directory'][::-1] 267 # On OS X the 'cc' and 'c++' compilers are wrappers for 268 # 'clang' therefore both call would be logged. To avoid 269 # this the hash does not contain the first word of the 270 # command. 271 command = ' '.join(decode(entry['command'])[1:]) 272 273 return '<>'.join([filename, directory, command]) 274 275 276def create_parser(): 277 """ Command line argument parser factory method. """ 278 279 parser = argparse.ArgumentParser( 280 formatter_class=argparse.ArgumentDefaultsHelpFormatter) 281 282 parser.add_argument( 283 '--verbose', '-v', 284 action='count', 285 default=0, 286 help="""Enable verbose output from '%(prog)s'. A second and third 287 flag increases verbosity.""") 288 parser.add_argument( 289 '--cdb', 290 metavar='<file>', 291 default="compile_commands.json", 292 help="""The JSON compilation database.""") 293 group = parser.add_mutually_exclusive_group() 294 group.add_argument( 295 '--append', 296 action='store_true', 297 help="""Append new entries to existing compilation database.""") 298 group.add_argument( 299 '--disable-filter', '-n', 300 dest='raw_entries', 301 action='store_true', 302 help="""Intercepted child process creation calls (exec calls) are all 303 logged to the output. The output is not a compilation database. 304 This flag is for debug purposes.""") 305 306 advanced = parser.add_argument_group('advanced options') 307 advanced.add_argument( 308 '--override-compiler', 309 action='store_true', 310 help="""Always resort to the compiler wrapper even when better 311 intercept methods are available.""") 312 advanced.add_argument( 313 '--use-cc', 314 metavar='<path>', 315 dest='cc', 316 default='cc', 317 help="""When '%(prog)s' analyzes a project by interposing a compiler 318 wrapper, which executes a real compiler for compilation and 319 do other tasks (record the compiler invocation). Because of 320 this interposing, '%(prog)s' does not know what compiler your 321 project normally uses. Instead, it simply overrides the CC 322 environment variable, and guesses your default compiler. 323 324 If you need '%(prog)s' to use a specific compiler for 325 *compilation* then you can use this option to specify a path 326 to that compiler.""") 327 advanced.add_argument( 328 '--use-c++', 329 metavar='<path>', 330 dest='cxx', 331 default='c++', 332 help="""This is the same as "--use-cc" but for C++ code.""") 333 334 parser.add_argument( 335 dest='build', 336 nargs=argparse.REMAINDER, 337 help="""Command to run.""") 338 339 return parser 340