1#!/usr/bin/env python2.7 2 3"""A script to generate FileCheck statements for regression tests. 4 5This script is a utility to update LLVM opt or llc test cases with new 6FileCheck patterns. It can either update all of the tests in the file or 7a single test function. 8 9Example usage: 10$ update_test_checks.py --tool=../bin/opt test/foo.ll 11 12Workflow: 131. Make a compiler patch that requires updating some number of FileCheck lines 14 in regression test files. 152. Save the patch and revert it from your local work area. 163. Update the RUN-lines in the affected regression tests to look canonical. 17 Example: "; RUN: opt < %s -instcombine -S | FileCheck %s" 184. Refresh the FileCheck lines for either the entire file or select functions by 19 running this script. 205. Commit the fresh baseline of checks. 216. Apply your patch from step 1 and rebuild your local binaries. 227. Re-run this script on affected regression tests. 238. Check the diffs to ensure the script has done something reasonable. 249. Submit a patch including the regression test diffs for review. 25 26A common pattern is to have the script insert complete checking of every 27instruction. Then, edit it down to only check the relevant instructions. 28The script is designed to make adding checks to a test case fast, it is *not* 29designed to be authoratitive about what constitutes a good test! 30""" 31 32import argparse 33import itertools 34import os # Used to advertise this file's name ("autogenerated_note"). 35import string 36import subprocess 37import sys 38import tempfile 39import re 40 41ADVERT = '; NOTE: Assertions have been autogenerated by ' 42 43# RegEx: this is where the magic happens. 44 45SCRUB_LEADING_WHITESPACE_RE = re.compile(r'^(\s+)') 46SCRUB_WHITESPACE_RE = re.compile(r'(?!^(| \w))[ \t]+', flags=re.M) 47SCRUB_TRAILING_WHITESPACE_RE = re.compile(r'[ \t]+$', flags=re.M) 48SCRUB_X86_SHUFFLES_RE = ( 49 re.compile( 50 r'^(\s*\w+) [^#\n]+#+ ((?:[xyz]mm\d+|mem)( \{%k\d+\}( \{z\})?)? = .*)$', 51 flags=re.M)) 52SCRUB_X86_SP_RE = re.compile(r'\d+\(%(esp|rsp)\)') 53SCRUB_X86_RIP_RE = re.compile(r'[.\w]+\(%rip\)') 54SCRUB_X86_LCP_RE = re.compile(r'\.LCPI[0-9]+_[0-9]+') 55SCRUB_KILL_COMMENT_RE = re.compile(r'^ *#+ +kill:.*\n') 56SCRUB_IR_COMMENT_RE = re.compile(r'\s*;.*') 57 58RUN_LINE_RE = re.compile('^\s*;\s*RUN:\s*(.*)$') 59IR_FUNCTION_RE = re.compile('^\s*define\s+(?:internal\s+)?[^@]*@([\w-]+)\s*\(') 60LLC_FUNCTION_RE = re.compile( 61 r'^_?(?P<func>[^:]+):[ \t]*#+[ \t]*@(?P=func)\n[^:]*?' 62 r'(?P<body>^##?[ \t]+[^:]+:.*?)\s*' 63 r'^\s*(?:[^:\n]+?:\s*\n\s*\.size|\.cfi_endproc|\.globl|\.comm|\.(?:sub)?section)', 64 flags=(re.M | re.S)) 65OPT_FUNCTION_RE = re.compile( 66 r'^\s*define\s+(?:internal\s+)?[^@]*@(?P<func>[\w-]+?)\s*\(' 67 r'(\s+)?[^{]*\{\n(?P<body>.*?)\}', 68 flags=(re.M | re.S)) 69CHECK_PREFIX_RE = re.compile('--check-prefix=(\S+)') 70CHECK_RE = re.compile(r'^\s*;\s*([^:]+?)(?:-NEXT|-NOT|-DAG|-LABEL)?:') 71IR_VALUE_DEF_RE = re.compile(r'\s+%(.*) =') 72 73 74# Invoke the tool that is being tested. 75def invoke_tool(args, cmd_args, ir): 76 with open(ir) as ir_file: 77 stdout = subprocess.check_output(args.tool_binary + ' ' + cmd_args, 78 shell=True, stdin=ir_file) 79 # Fix line endings to unix CR style. 80 stdout = stdout.replace('\r\n', '\n') 81 return stdout 82 83 84# FIXME: Separate the x86-specific scrubbers, so this can be used for other targets. 85def scrub_asm(asm): 86 # Detect shuffle asm comments and hide the operands in favor of the comments. 87 asm = SCRUB_X86_SHUFFLES_RE.sub(r'\1 {{.*#+}} \2', asm) 88 # Generically match the stack offset of a memory operand. 89 asm = SCRUB_X86_SP_RE.sub(r'{{[0-9]+}}(%\1)', asm) 90 # Generically match a RIP-relative memory operand. 91 asm = SCRUB_X86_RIP_RE.sub(r'{{.*}}(%rip)', asm) 92 # Generically match a LCP symbol. 93 asm = SCRUB_X86_LCP_RE.sub(r'{{\.LCPI.*}}', asm) 94 # Strip kill operands inserted into the asm. 95 asm = SCRUB_KILL_COMMENT_RE.sub('', asm) 96 return asm 97 98 99def scrub_body(body, tool_basename): 100 # Scrub runs of whitespace out of the assembly, but leave the leading 101 # whitespace in place. 102 body = SCRUB_WHITESPACE_RE.sub(r' ', body) 103 # Expand the tabs used for indentation. 104 body = string.expandtabs(body, 2) 105 # Strip trailing whitespace. 106 body = SCRUB_TRAILING_WHITESPACE_RE.sub(r'', body) 107 if tool_basename == "llc": 108 body = scrub_asm(body) 109 return body 110 111 112# Build up a dictionary of all the function bodies. 113def build_function_body_dictionary(raw_tool_output, prefixes, func_dict, verbose, tool_basename): 114 if tool_basename == "llc": 115 func_regex = LLC_FUNCTION_RE 116 else: 117 func_regex = OPT_FUNCTION_RE 118 for m in func_regex.finditer(raw_tool_output): 119 if not m: 120 continue 121 func = m.group('func') 122 scrubbed_body = scrub_body(m.group('body'), tool_basename) 123 if func.startswith('stress'): 124 # We only use the last line of the function body for stress tests. 125 scrubbed_body = '\n'.join(scrubbed_body.splitlines()[-1:]) 126 if verbose: 127 print >>sys.stderr, 'Processing function: ' + func 128 for l in scrubbed_body.splitlines(): 129 print >>sys.stderr, ' ' + l 130 for prefix in prefixes: 131 if func in func_dict[prefix] and func_dict[prefix][func] != scrubbed_body: 132 if prefix == prefixes[-1]: 133 print >>sys.stderr, ('WARNING: Found conflicting asm under the ' 134 'same prefix: %r!' % (prefix,)) 135 else: 136 func_dict[prefix][func] = None 137 continue 138 139 func_dict[prefix][func] = scrubbed_body 140 141 142# Create a FileCheck variable name based on an IR name. 143def get_value_name(var): 144 if var.isdigit(): 145 var = 'TMP' + var 146 var = var.replace('.', '_') 147 return var.upper() 148 149 150# Create a FileCheck variable from regex. 151def get_value_definition(var): 152 return '[[' + get_value_name(var) + ':%.*]]' 153 154 155# Use a FileCheck variable. 156def get_value_use(var): 157 return '[[' + get_value_name(var) + ']]' 158 159 160# Replace IR value defs and uses with FileCheck variables. 161def genericize_check_lines(lines): 162 lines_with_def = [] 163 vars_seen = [] 164 for line in lines: 165 # An IR variable named '%.' matches the FileCheck regex string. 166 line = line.replace('%.', '%dot') 167 m = IR_VALUE_DEF_RE.match(line) 168 if m: 169 vars_seen.append(m.group(1)) 170 line = line.replace('%' + m.group(1), get_value_definition(m.group(1))) 171 172 lines_with_def.append(line) 173 174 # A single def isn't worth replacing? 175 #if len(vars_seen) < 2: 176 # return lines 177 178 output_lines = [] 179 vars_seen.sort(key=len, reverse=True) 180 for line in lines_with_def: 181 for var in vars_seen: 182 line = line.replace('%' + var, get_value_use(var)) 183 output_lines.append(line) 184 185 return output_lines 186 187 188def add_checks(output_lines, prefix_list, func_dict, func_name, tool_basename): 189 # Select a label format based on the whether we're checking asm or IR. 190 if tool_basename == "llc": 191 check_label_format = "; %s-LABEL: %s:" 192 else: 193 check_label_format = "; %s-LABEL: @%s(" 194 195 printed_prefixes = [] 196 for checkprefixes, _ in prefix_list: 197 for checkprefix in checkprefixes: 198 if checkprefix in printed_prefixes: 199 break 200 if not func_dict[checkprefix][func_name]: 201 continue 202 # Add some space between different check prefixes, but not after the last 203 # check line (before the test code). 204 #if len(printed_prefixes) != 0: 205 # output_lines.append(';') 206 printed_prefixes.append(checkprefix) 207 output_lines.append(check_label_format % (checkprefix, func_name)) 208 func_body = func_dict[checkprefix][func_name].splitlines() 209 210 # For IR output, change all defs to FileCheck variables, so we're immune 211 # to variable naming fashions. 212 if tool_basename == "opt": 213 func_body = genericize_check_lines(func_body) 214 215 # This could be selectively enabled with an optional invocation argument. 216 # Disabled for now: better to check everything. Be safe rather than sorry. 217 218 # Handle the first line of the function body as a special case because 219 # it's often just noise (a useless asm comment or entry label). 220 #if func_body[0].startswith("#") or func_body[0].startswith("entry:"): 221 # is_blank_line = True 222 #else: 223 # output_lines.append('; %s: %s' % (checkprefix, func_body[0])) 224 # is_blank_line = False 225 226 # For llc tests, there may be asm directives between the label and the 227 # first checked line (most likely that first checked line is "# BB#0"). 228 if tool_basename == "opt": 229 is_blank_line = False 230 else: 231 is_blank_line = True; 232 233 for func_line in func_body: 234 if func_line.strip() == '': 235 is_blank_line = True 236 continue 237 # Do not waste time checking IR comments. 238 if tool_basename == "opt": 239 func_line = SCRUB_IR_COMMENT_RE.sub(r'', func_line) 240 241 # Skip blank lines instead of checking them. 242 if is_blank_line == True: 243 output_lines.append('; %s: %s' % (checkprefix, func_line)) 244 else: 245 output_lines.append('; %s-NEXT: %s' % (checkprefix, func_line)) 246 is_blank_line = False 247 248 # Add space between different check prefixes and also before the first 249 # line of code in the test function. 250 output_lines.append(';') 251 break 252 return output_lines 253 254 255def should_add_line_to_output(input_line, prefix_set): 256 # Skip any blank comment lines in the IR. 257 if input_line.strip() == ';': 258 return False 259 # Skip any blank lines in the IR. 260 #if input_line.strip() == '': 261 # return False 262 # And skip any CHECK lines. We're building our own. 263 m = CHECK_RE.match(input_line) 264 if m and m.group(1) in prefix_set: 265 return False 266 267 return True 268 269 270def main(): 271 from argparse import RawTextHelpFormatter 272 parser = argparse.ArgumentParser(description=__doc__, formatter_class=RawTextHelpFormatter) 273 parser.add_argument('-v', '--verbose', action='store_true', 274 help='Show verbose output') 275 parser.add_argument('--tool-binary', default='llc', 276 help='The tool used to generate the test case') 277 parser.add_argument( 278 '--function', help='The function in the test file to update') 279 parser.add_argument('tests', nargs='+') 280 args = parser.parse_args() 281 282 autogenerated_note = (ADVERT + 'utils/' + os.path.basename(__file__)) 283 284 tool_basename = os.path.basename(args.tool_binary) 285 if (tool_basename != "llc" and tool_basename != "opt"): 286 print >>sys.stderr, 'ERROR: Unexpected tool name: ' + tool_basename 287 sys.exit(1) 288 289 for test in args.tests: 290 if args.verbose: 291 print >>sys.stderr, 'Scanning for RUN lines in test file: %s' % (test,) 292 with open(test) as f: 293 input_lines = [l.rstrip() for l in f] 294 295 run_lines = [m.group(1) 296 for m in [RUN_LINE_RE.match(l) for l in input_lines] if m] 297 if args.verbose: 298 print >>sys.stderr, 'Found %d RUN lines:' % (len(run_lines),) 299 for l in run_lines: 300 print >>sys.stderr, ' RUN: ' + l 301 302 prefix_list = [] 303 for l in run_lines: 304 (tool_cmd, filecheck_cmd) = tuple([cmd.strip() for cmd in l.split('|', 1)]) 305 306 if not tool_cmd.startswith(tool_basename + ' '): 307 print >>sys.stderr, 'WARNING: Skipping non-%s RUN line: %s' % (tool_basename, l) 308 continue 309 310 if not filecheck_cmd.startswith('FileCheck '): 311 print >>sys.stderr, 'WARNING: Skipping non-FileChecked RUN line: ' + l 312 continue 313 314 tool_cmd_args = tool_cmd[len(tool_basename):].strip() 315 tool_cmd_args = tool_cmd_args.replace('< %s', '').replace('%s', '').strip() 316 317 check_prefixes = [m.group(1) 318 for m in CHECK_PREFIX_RE.finditer(filecheck_cmd)] 319 if not check_prefixes: 320 check_prefixes = ['CHECK'] 321 322 # FIXME: We should use multiple check prefixes to common check lines. For 323 # now, we just ignore all but the last. 324 prefix_list.append((check_prefixes, tool_cmd_args)) 325 326 func_dict = {} 327 for prefixes, _ in prefix_list: 328 for prefix in prefixes: 329 func_dict.update({prefix: dict()}) 330 for prefixes, tool_args in prefix_list: 331 if args.verbose: 332 print >>sys.stderr, 'Extracted tool cmd: ' + tool_basename + ' ' + tool_args 333 print >>sys.stderr, 'Extracted FileCheck prefixes: ' + str(prefixes) 334 335 raw_tool_output = invoke_tool(args, tool_args, test) 336 build_function_body_dictionary(raw_tool_output, prefixes, func_dict, args.verbose, tool_basename) 337 338 is_in_function = False 339 is_in_function_start = False 340 prefix_set = set([prefix for prefixes, _ in prefix_list for prefix in prefixes]) 341 if args.verbose: 342 print >>sys.stderr, 'Rewriting FileCheck prefixes: %s' % (prefix_set,) 343 output_lines = [] 344 output_lines.append(autogenerated_note) 345 346 for input_line in input_lines: 347 if is_in_function_start: 348 if input_line == '': 349 continue 350 if input_line.lstrip().startswith(';'): 351 m = CHECK_RE.match(input_line) 352 if not m or m.group(1) not in prefix_set: 353 output_lines.append(input_line) 354 continue 355 356 # Print out the various check lines here. 357 output_lines = add_checks(output_lines, prefix_list, func_dict, name, tool_basename) 358 is_in_function_start = False 359 360 if is_in_function: 361 if should_add_line_to_output(input_line, prefix_set) == True: 362 # This input line of the function body will go as-is into the output. 363 # Except make leading whitespace uniform: 2 spaces. 364 input_line = SCRUB_LEADING_WHITESPACE_RE.sub(r' ', input_line) 365 output_lines.append(input_line) 366 else: 367 continue 368 if input_line.strip() == '}': 369 is_in_function = False 370 continue 371 372 # Discard any previous script advertising. 373 if input_line.startswith(ADVERT): 374 continue 375 376 # If it's outside a function, it just gets copied to the output. 377 output_lines.append(input_line) 378 379 m = IR_FUNCTION_RE.match(input_line) 380 if not m: 381 continue 382 name = m.group(1) 383 if args.function is not None and name != args.function: 384 # When filtering on a specific function, skip all others. 385 continue 386 is_in_function = is_in_function_start = True 387 388 if args.verbose: 389 print>>sys.stderr, 'Writing %d lines to %s...' % (len(output_lines), test) 390 391 with open(test, 'wb') as f: 392 f.writelines([l + '\n' for l in output_lines]) 393 394 395if __name__ == '__main__': 396 main() 397 398