1#!/usr/bin/env python 2"""Calls C-Reduce to create a minimal reproducer for clang crashes. 3 4Output files: 5 *.reduced.sh -- crash reproducer with minimal arguments 6 *.reduced.cpp -- the reduced file 7 *.test.sh -- interestingness test for C-Reduce 8""" 9 10from __future__ import print_function 11from argparse import ArgumentParser, RawTextHelpFormatter 12import os 13import re 14import stat 15import sys 16import subprocess 17import pipes 18import shlex 19import tempfile 20import shutil 21from distutils.spawn import find_executable 22 23verbose = False 24creduce_cmd = None 25clang_cmd = None 26 27def verbose_print(*args, **kwargs): 28 if verbose: 29 print(*args, **kwargs) 30 31def check_file(fname): 32 fname = os.path.normpath(fname) 33 if not os.path.isfile(fname): 34 sys.exit("ERROR: %s does not exist" % (fname)) 35 return fname 36 37def check_cmd(cmd_name, cmd_dir, cmd_path=None): 38 """ 39 Returns absolute path to cmd_path if it is given, 40 or absolute path to cmd_dir/cmd_name. 41 """ 42 if cmd_path: 43 # Make the path absolute so the creduce test can be run from any directory. 44 cmd_path = os.path.abspath(cmd_path) 45 cmd = find_executable(cmd_path) 46 if cmd: 47 return cmd 48 sys.exit("ERROR: executable `%s` not found" % (cmd_path)) 49 50 cmd = find_executable(cmd_name, path=cmd_dir) 51 if cmd: 52 return cmd 53 54 if not cmd_dir: 55 cmd_dir = "$PATH" 56 sys.exit("ERROR: `%s` not found in %s" % (cmd_name, cmd_dir)) 57 58def quote_cmd(cmd): 59 return ' '.join(pipes.quote(arg) for arg in cmd) 60 61def write_to_script(text, filename): 62 with open(filename, 'w') as f: 63 f.write(text) 64 os.chmod(filename, os.stat(filename).st_mode | stat.S_IEXEC) 65 66class Reduce(object): 67 def __init__(self, crash_script, file_to_reduce): 68 crash_script_name, crash_script_ext = os.path.splitext(crash_script) 69 file_reduce_name, file_reduce_ext = os.path.splitext(file_to_reduce) 70 71 self.testfile = file_reduce_name + '.test.sh' 72 self.crash_script = crash_script_name + '.reduced' + crash_script_ext 73 self.file_to_reduce = file_reduce_name + '.reduced' + file_reduce_ext 74 shutil.copy(file_to_reduce, self.file_to_reduce) 75 76 self.clang = clang_cmd 77 self.clang_args = [] 78 self.expected_output = [] 79 self.needs_stack_trace = False 80 self.creduce_flags = ["--tidy"] 81 82 self.read_clang_args(crash_script, file_to_reduce) 83 self.read_expected_output() 84 85 def get_crash_cmd(self, cmd=None, args=None, filename=None): 86 if not cmd: 87 cmd = self.clang 88 if not args: 89 args = self.clang_args 90 if not filename: 91 filename = self.file_to_reduce 92 93 return [cmd] + args + [filename] 94 95 def read_clang_args(self, crash_script, filename): 96 print("\nReading arguments from crash script...") 97 with open(crash_script) as f: 98 # Assume clang call is the first non comment line. 99 cmd = [] 100 for line in f: 101 if not line.lstrip().startswith('#'): 102 cmd = shlex.split(line) 103 break 104 if not cmd: 105 sys.exit("Could not find command in the crash script."); 106 107 # Remove clang and filename from the command 108 # Assume the last occurrence of the filename is the clang input file 109 del cmd[0] 110 for i in range(len(cmd)-1, -1, -1): 111 if cmd[i] == filename: 112 del cmd[i] 113 break 114 self.clang_args = cmd 115 verbose_print("Clang arguments:", quote_cmd(self.clang_args)) 116 117 def read_expected_output(self): 118 print("\nGetting expected crash output...") 119 p = subprocess.Popen(self.get_crash_cmd(), 120 stdout=subprocess.PIPE, 121 stderr=subprocess.STDOUT) 122 crash_output, _ = p.communicate() 123 result = [] 124 125 # Remove color codes 126 ansi_escape = r'\x1b\[[0-?]*m' 127 crash_output = re.sub(ansi_escape, '', crash_output.decode('utf-8')) 128 129 # Look for specific error messages 130 regexes = [r"Assertion .+ failed", # Linux assert() 131 r"Assertion failed: .+,", # FreeBSD/Mac assert() 132 r"fatal error: error in backend: .+", 133 r"LLVM ERROR: .+", 134 r"UNREACHABLE executed at .+?!", 135 r"LLVM IR generation of declaration '.+'", 136 r"Generating code for declaration '.+'", 137 r"\*\*\* Bad machine code: .+ \*\*\*"] 138 for msg_re in regexes: 139 match = re.search(msg_re, crash_output) 140 if match: 141 msg = match.group(0) 142 result = [msg] 143 print("Found message:", msg) 144 break 145 146 # If no message was found, use the top five stack trace functions, 147 # ignoring some common functions 148 # Five is a somewhat arbitrary number; the goal is to get a small number 149 # of identifying functions with some leeway for common functions 150 if not result: 151 self.needs_stack_trace = True 152 stacktrace_re = r'[0-9]+\s+0[xX][0-9a-fA-F]+\s*([^(]+)\(' 153 filters = ["PrintStackTrace", "RunSignalHandlers", "CleanupOnSignal", 154 "HandleCrash", "SignalHandler", "__restore_rt", "gsignal", "abort"] 155 def skip_function(func_name): 156 return any(name in func_name for name in filters) 157 158 matches = re.findall(stacktrace_re, crash_output) 159 result = [x for x in matches if x and not skip_function(x)][:5] 160 for msg in result: 161 print("Found stack trace function:", msg) 162 163 if not result: 164 print("ERROR: no crash was found") 165 print("The crash output was:\n========\n%s========" % crash_output) 166 sys.exit(1) 167 168 self.expected_output = result 169 170 def check_expected_output(self, args=None, filename=None): 171 if not args: 172 args = self.clang_args 173 if not filename: 174 filename = self.file_to_reduce 175 176 p = subprocess.Popen(self.get_crash_cmd(args=args, filename=filename), 177 stdout=subprocess.PIPE, 178 stderr=subprocess.STDOUT) 179 crash_output, _ = p.communicate() 180 return all(msg in crash_output.decode('utf-8') for msg in 181 self.expected_output) 182 183 def write_interestingness_test(self): 184 print("\nCreating the interestingness test...") 185 186 # Disable symbolization if it's not required to avoid slow symbolization. 187 disable_symbolization = '' 188 if not self.needs_stack_trace: 189 disable_symbolization = 'export LLVM_DISABLE_SYMBOLIZATION=1' 190 191 output = """#!/bin/bash 192%s 193if %s >& t.log ; then 194 exit 1 195fi 196""" % (disable_symbolization, quote_cmd(self.get_crash_cmd())) 197 198 for msg in self.expected_output: 199 output += 'grep -F %s t.log || exit 1\n' % pipes.quote(msg) 200 201 write_to_script(output, self.testfile) 202 self.check_interestingness() 203 204 def check_interestingness(self): 205 testfile = os.path.abspath(self.testfile) 206 207 # Check that the test considers the original file interesting 208 with open(os.devnull, 'w') as devnull: 209 returncode = subprocess.call(testfile, stdout=devnull) 210 if returncode: 211 sys.exit("The interestingness test does not pass for the original file.") 212 213 # Check that an empty file is not interesting 214 # Instead of modifying the filename in the test file, just run the command 215 with tempfile.NamedTemporaryFile() as empty_file: 216 is_interesting = self.check_expected_output(filename=empty_file.name) 217 if is_interesting: 218 sys.exit("The interestingness test passes for an empty file.") 219 220 def clang_preprocess(self): 221 print("\nTrying to preprocess the source file...") 222 with tempfile.NamedTemporaryFile() as tmpfile: 223 cmd_preprocess = self.get_crash_cmd() + ['-E', '-o', tmpfile.name] 224 cmd_preprocess_no_lines = cmd_preprocess + ['-P'] 225 try: 226 subprocess.check_call(cmd_preprocess_no_lines) 227 if self.check_expected_output(filename=tmpfile.name): 228 print("Successfully preprocessed with line markers removed") 229 shutil.copy(tmpfile.name, self.file_to_reduce) 230 else: 231 subprocess.check_call(cmd_preprocess) 232 if self.check_expected_output(filename=tmpfile.name): 233 print("Successfully preprocessed without removing line markers") 234 shutil.copy(tmpfile.name, self.file_to_reduce) 235 else: 236 print("No longer crashes after preprocessing -- " 237 "using original source") 238 except subprocess.CalledProcessError: 239 print("Preprocessing failed") 240 241 @staticmethod 242 def filter_args(args, opts_equal=[], opts_startswith=[], 243 opts_one_arg_startswith=[]): 244 result = [] 245 skip_next = False 246 for arg in args: 247 if skip_next: 248 skip_next = False 249 continue 250 if any(arg == a for a in opts_equal): 251 continue 252 if any(arg.startswith(a) for a in opts_startswith): 253 continue 254 if any(arg.startswith(a) for a in opts_one_arg_startswith): 255 skip_next = True 256 continue 257 result.append(arg) 258 return result 259 260 def try_remove_args(self, args, msg=None, extra_arg=None, **kwargs): 261 new_args = self.filter_args(args, **kwargs) 262 263 if extra_arg: 264 if extra_arg in new_args: 265 new_args.remove(extra_arg) 266 new_args.append(extra_arg) 267 268 if (new_args != args and 269 self.check_expected_output(args=new_args)): 270 if msg: 271 verbose_print(msg) 272 return new_args 273 return args 274 275 def try_remove_arg_by_index(self, args, index): 276 new_args = args[:index] + args[index+1:] 277 removed_arg = args[index] 278 279 # Heuristic for grouping arguments: 280 # remove next argument if it doesn't start with "-" 281 if index < len(new_args) and not new_args[index].startswith('-'): 282 del new_args[index] 283 removed_arg += ' ' + args[index+1] 284 285 if self.check_expected_output(args=new_args): 286 verbose_print("Removed", removed_arg) 287 return new_args, index 288 return args, index+1 289 290 def simplify_clang_args(self): 291 """Simplify clang arguments before running C-Reduce to reduce the time the 292 interestingness test takes to run. 293 """ 294 print("\nSimplifying the clang command...") 295 296 # Remove some clang arguments to speed up the interestingness test 297 new_args = self.clang_args 298 new_args = self.try_remove_args(new_args, 299 msg="Removed debug info options", 300 opts_startswith=["-gcodeview", 301 "-debug-info-kind=", 302 "-debugger-tuning="]) 303 304 new_args = self.try_remove_args(new_args, 305 msg="Removed --show-includes", 306 opts_startswith=["--show-includes"]) 307 # Not suppressing warnings (-w) sometimes prevents the crash from occurring 308 # after preprocessing 309 new_args = self.try_remove_args(new_args, 310 msg="Replaced -W options with -w", 311 extra_arg='-w', 312 opts_startswith=["-W"]) 313 new_args = self.try_remove_args(new_args, 314 msg="Replaced optimization level with -O0", 315 extra_arg="-O0", 316 opts_startswith=["-O"]) 317 318 # Try to remove compilation steps 319 new_args = self.try_remove_args(new_args, msg="Added -emit-llvm", 320 extra_arg="-emit-llvm") 321 new_args = self.try_remove_args(new_args, msg="Added -fsyntax-only", 322 extra_arg="-fsyntax-only") 323 324 # Try to make implicit int an error for more sensible test output 325 new_args = self.try_remove_args(new_args, msg="Added -Werror=implicit-int", 326 opts_equal=["-w"], 327 extra_arg="-Werror=implicit-int") 328 329 self.clang_args = new_args 330 verbose_print("Simplified command:", quote_cmd(self.get_crash_cmd())) 331 332 def reduce_clang_args(self): 333 """Minimize the clang arguments after running C-Reduce, to get the smallest 334 command that reproduces the crash on the reduced file. 335 """ 336 print("\nReducing the clang crash command...") 337 338 new_args = self.clang_args 339 340 # Remove some often occurring args 341 new_args = self.try_remove_args(new_args, msg="Removed -D options", 342 opts_startswith=["-D"]) 343 new_args = self.try_remove_args(new_args, msg="Removed -D options", 344 opts_one_arg_startswith=["-D"]) 345 new_args = self.try_remove_args(new_args, msg="Removed -I options", 346 opts_startswith=["-I"]) 347 new_args = self.try_remove_args(new_args, msg="Removed -I options", 348 opts_one_arg_startswith=["-I"]) 349 new_args = self.try_remove_args(new_args, msg="Removed -W options", 350 opts_startswith=["-W"]) 351 352 # Remove other cases that aren't covered by the heuristic 353 new_args = self.try_remove_args(new_args, msg="Removed -mllvm", 354 opts_one_arg_startswith=["-mllvm"]) 355 356 i = 0 357 while i < len(new_args): 358 new_args, i = self.try_remove_arg_by_index(new_args, i) 359 360 self.clang_args = new_args 361 362 reduced_cmd = quote_cmd(self.get_crash_cmd()) 363 write_to_script(reduced_cmd, self.crash_script) 364 print("Reduced command:", reduced_cmd) 365 366 def run_creduce(self): 367 print("\nRunning C-Reduce...") 368 try: 369 p = subprocess.Popen([creduce_cmd] + self.creduce_flags + 370 [self.testfile, self.file_to_reduce]) 371 p.communicate() 372 except KeyboardInterrupt: 373 # Hack to kill C-Reduce because it jumps into its own pgid 374 print('\n\nctrl-c detected, killed creduce') 375 p.kill() 376 377def main(): 378 global verbose 379 global creduce_cmd 380 global clang_cmd 381 382 parser = ArgumentParser(description=__doc__, 383 formatter_class=RawTextHelpFormatter) 384 parser.add_argument('crash_script', type=str, nargs=1, 385 help="Name of the script that generates the crash.") 386 parser.add_argument('file_to_reduce', type=str, nargs=1, 387 help="Name of the file to be reduced.") 388 parser.add_argument('--llvm-bin', dest='llvm_bin', type=str, 389 help="Path to the LLVM bin directory.") 390 parser.add_argument('--clang', dest='clang', type=str, 391 help="The path to the `clang` executable. " 392 "By default uses the llvm-bin directory.") 393 parser.add_argument('--creduce', dest='creduce', type=str, 394 help="The path to the `creduce` executable. " 395 "Required if `creduce` is not in PATH environment.") 396 parser.add_argument('-v', '--verbose', action='store_true') 397 args = parser.parse_args() 398 399 verbose = args.verbose 400 llvm_bin = os.path.abspath(args.llvm_bin) if args.llvm_bin else None 401 creduce_cmd = check_cmd('creduce', None, args.creduce) 402 clang_cmd = check_cmd('clang', llvm_bin, args.clang) 403 404 crash_script = check_file(args.crash_script[0]) 405 file_to_reduce = check_file(args.file_to_reduce[0]) 406 407 r = Reduce(crash_script, file_to_reduce) 408 409 r.simplify_clang_args() 410 r.write_interestingness_test() 411 r.clang_preprocess() 412 r.run_creduce() 413 r.reduce_clang_args() 414 415if __name__ == '__main__': 416 main() 417