• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/env python2.7
2
3"""A test case update script.
4
5This script is a utility to update LLVM 'llvm-mca' based test cases with new
6FileCheck patterns.
7"""
8
9import argparse
10from collections import defaultdict
11import glob
12import os
13import sys
14import warnings
15
16from UpdateTestChecks import common
17
18
19COMMENT_CHAR = '#'
20ADVERT_PREFIX = '{} NOTE: Assertions have been autogenerated by '.format(
21    COMMENT_CHAR)
22ADVERT = '{}utils/{}'.format(ADVERT_PREFIX, os.path.basename(__file__))
23
24
25class Error(Exception):
26  """ Generic Error that can be raised without printing a traceback.
27  """
28  pass
29
30
31def _warn(msg):
32  """ Log a user warning to stderr.
33  """
34  warnings.warn(msg, Warning, stacklevel=2)
35
36
37def _configure_warnings(args):
38  warnings.resetwarnings()
39  if args.w:
40    warnings.simplefilter('ignore')
41  if args.Werror:
42    warnings.simplefilter('error')
43
44
45def _showwarning(message, category, filename, lineno, file=None, line=None):
46  """ Version of warnings.showwarning that won't attempt to print out the
47      line at the location of the warning if the line text is not explicitly
48      specified.
49  """
50  if file is None:
51    file = sys.stderr
52  if line is None:
53    line = ''
54  file.write(warnings.formatwarning(message, category, filename, lineno, line))
55
56
57def _parse_args():
58  parser = argparse.ArgumentParser(description=__doc__)
59  parser.add_argument('-v', '--verbose',
60                      action='store_true',
61                      help='show verbose output')
62  parser.add_argument('-w',
63                      action='store_true',
64                      help='suppress warnings')
65  parser.add_argument('-Werror',
66                      action='store_true',
67                      help='promote warnings to errors')
68  parser.add_argument('--llvm-mca-binary',
69                      metavar='<path>',
70                      default='llvm-mca',
71                      help='the binary to use to generate the test case '
72                           '(default: llvm-mca)')
73  parser.add_argument('tests',
74                      metavar='<test-path>',
75                      nargs='+')
76  args = parser.parse_args()
77
78  _configure_warnings(args)
79
80  if not args.llvm_mca_binary:
81    raise Error('--llvm-mca-binary value cannot be empty string')
82
83  if os.path.basename(args.llvm_mca_binary) != 'llvm-mca':
84    _warn('unexpected binary name: {}'.format(args.llvm_mca_binary))
85
86  return args
87
88
89def _find_run_lines(input_lines, args):
90  raw_lines = [m.group(1)
91               for m in [common.RUN_LINE_RE.match(l) for l in input_lines]
92               if m]
93  run_lines = [raw_lines[0]] if len(raw_lines) > 0 else []
94  for l in raw_lines[1:]:
95    if run_lines[-1].endswith(r'\\'):
96      run_lines[-1] = run_lines[-1].rstrip('\\') + ' ' + l
97    else:
98      run_lines.append(l)
99
100  if args.verbose:
101    sys.stderr.write('Found {} RUN line{}:\n'.format(
102        len(run_lines), '' if len(run_lines) == 1 else 's'))
103    for line in run_lines:
104      sys.stderr.write('  RUN: {}\n'.format(line))
105
106  return run_lines
107
108
109def _get_run_infos(run_lines, args):
110  run_infos = []
111  for run_line in run_lines:
112    try:
113      (tool_cmd, filecheck_cmd) = tuple([cmd.strip()
114                                        for cmd in run_line.split('|', 1)])
115    except ValueError:
116      _warn('could not split tool and filecheck commands: {}'.format(run_line))
117      continue
118
119    tool_basename = os.path.basename(args.llvm_mca_binary)
120
121    if not tool_cmd.startswith(tool_basename + ' '):
122      _warn('skipping non-{} RUN line: {}'.format(tool_basename, run_line))
123      continue
124
125    if not filecheck_cmd.startswith('FileCheck '):
126      _warn('skipping non-FileCheck RUN line: {}'.format(run_line))
127      continue
128
129    tool_cmd_args = tool_cmd[len(tool_basename):].strip()
130    tool_cmd_args = tool_cmd_args.replace('< %s', '').replace('%s', '').strip()
131
132    check_prefixes = [item
133                      for m in common.CHECK_PREFIX_RE.finditer(filecheck_cmd)
134                      for item in m.group(1).split(',')]
135    if not check_prefixes:
136      check_prefixes = ['CHECK']
137
138    run_infos.append((check_prefixes, tool_cmd_args))
139
140  return run_infos
141
142
143def _break_down_block(block_info, common_prefix):
144  """ Given a block_info, see if we can analyze it further to let us break it
145      down by prefix per-line rather than per-block.
146  """
147  texts = block_info.keys()
148  prefixes = list(block_info.values())
149  # Split the lines from each of the incoming block_texts and zip them so that
150  # each element contains the corresponding lines from each text.  E.g.
151  #
152  # block_text_1: A   # line 1
153  #               B   # line 2
154  #
155  # block_text_2: A   # line 1
156  #               C   # line 2
157  #
158  # would become:
159  #
160  # [(A, A),   # line 1
161  #  (B, C)]   # line 2
162  #
163  line_tuples = list(zip(*list((text.splitlines() for text in texts))))
164
165  # To simplify output, we'll only proceed if the very first line of the block
166  # texts is common to each of them.
167  if len(set(line_tuples[0])) != 1:
168    return []
169
170  result = []
171  lresult = defaultdict(list)
172  for i, line in enumerate(line_tuples):
173    if len(set(line)) == 1:
174      # We're about to output a line with the common prefix.  This is a sync
175      # point so flush any batched-up lines one prefix at a time to the output
176      # first.
177      for prefix in sorted(lresult):
178        result.extend(lresult[prefix])
179      lresult = defaultdict(list)
180
181      # The line is common to each block so output with the common prefix.
182      result.append((common_prefix, line[0]))
183    else:
184      # The line is not common to each block, or we don't have a common prefix.
185      # If there are no prefixes available, warn and bail out.
186      if not prefixes[0]:
187        _warn('multiple lines not disambiguated by prefixes:\n{}\n'
188              'Some blocks may be skipped entirely as a result.'.format(
189                  '\n'.join('  - {}'.format(l) for l in line)))
190        return []
191
192      # Iterate through the line from each of the blocks and add the line with
193      # the corresponding prefix to the current batch of results so that we can
194      # later output them per-prefix.
195      for i, l in enumerate(line):
196        for prefix in prefixes[i]:
197          lresult[prefix].append((prefix, l))
198
199  # Flush any remaining batched-up lines one prefix at a time to the output.
200  for prefix in sorted(lresult):
201    result.extend(lresult[prefix])
202  return result
203
204
205def _get_useful_prefix_info(run_infos):
206  """ Given the run_infos, calculate any prefixes that are common to every one,
207      and the length of the longest prefix string.
208  """
209  try:
210    all_sets = [set(s) for s in list(zip(*run_infos))[0]]
211    common_to_all = set.intersection(*all_sets)
212    longest_prefix_len = max(len(p) for p in set.union(*all_sets))
213  except IndexError:
214    common_to_all = []
215    longest_prefix_len = 0
216  else:
217    if len(common_to_all) > 1:
218      _warn('Multiple prefixes common to all RUN lines: {}'.format(
219          common_to_all))
220    if common_to_all:
221      common_to_all = sorted(common_to_all)[0]
222  return common_to_all, longest_prefix_len
223
224
225def _get_block_infos(run_infos, test_path, args, common_prefix):  # noqa
226  """ For each run line, run the tool with the specified args and collect the
227      output. We use the concept of 'blocks' for uniquing, where a block is
228      a series of lines of text with no more than one newline character between
229      each one.  For example:
230
231      This
232      is
233      one
234      block
235
236      This is
237      another block
238
239      This is yet another block
240
241      We then build up a 'block_infos' structure containing a dict where the
242      text of each block is the key and a list of the sets of prefixes that may
243      generate that particular block.  This then goes through a series of
244      transformations to minimise the amount of CHECK lines that need to be
245      written by taking advantage of common prefixes.
246  """
247
248  def _block_key(tool_args, prefixes):
249    """ Get a hashable key based on the current tool_args and prefixes.
250    """
251    return ' '.join([tool_args] + prefixes)
252
253  all_blocks = {}
254  max_block_len = 0
255
256  # Run the tool for each run line to generate all of the blocks.
257  for prefixes, tool_args in run_infos:
258    key = _block_key(tool_args, prefixes)
259    raw_tool_output = common.invoke_tool(args.llvm_mca_binary,
260                                         tool_args,
261                                         test_path)
262
263    # Replace any lines consisting of purely whitespace with empty lines.
264    raw_tool_output = '\n'.join(line if line.strip() else ''
265                                for line in raw_tool_output.splitlines())
266
267    # Split blocks, stripping all trailing whitespace, but keeping preceding
268    # whitespace except for newlines so that columns will line up visually.
269    all_blocks[key] = [b.lstrip('\n').rstrip()
270                       for b in raw_tool_output.split('\n\n')]
271    max_block_len = max(max_block_len, len(all_blocks[key]))
272
273  # If necessary, pad the lists of blocks with empty blocks so that they are
274  # all the same length.
275  for key in all_blocks:
276    len_to_pad = max_block_len - len(all_blocks[key])
277    all_blocks[key] += [''] * len_to_pad
278
279  # Create the block_infos structure where it is a nested dict in the form of:
280  # block number -> block text -> list of prefix sets
281  block_infos = defaultdict(lambda: defaultdict(list))
282  for prefixes, tool_args in run_infos:
283    key = _block_key(tool_args, prefixes)
284    for block_num, block_text in enumerate(all_blocks[key]):
285      block_infos[block_num][block_text].append(set(prefixes))
286
287  # Now go through the block_infos structure and attempt to smartly prune the
288  # number of prefixes per block to the minimal set possible to output.
289  for block_num in range(len(block_infos)):
290    # When there are multiple block texts for a block num, remove any
291    # prefixes that are common to more than one of them.
292    # E.g. [ [{ALL,FOO}] , [{ALL,BAR}] ] -> [ [{FOO}] , [{BAR}] ]
293    all_sets = [s for s in block_infos[block_num].values()]
294    pruned_sets = []
295
296    for i, setlist in enumerate(all_sets):
297      other_set_values = set([elem for j, setlist2 in enumerate(all_sets)
298                              for set_ in setlist2 for elem in set_
299                              if i != j])
300      pruned_sets.append([s - other_set_values for s in setlist])
301
302    for i, block_text in enumerate(block_infos[block_num]):
303
304      # When a block text matches multiple sets of prefixes, try removing any
305      # prefixes that aren't common to all of them.
306      # E.g. [ {ALL,FOO} , {ALL,BAR} ] -> [{ALL}]
307      common_values = set.intersection(*pruned_sets[i])
308      if common_values:
309        pruned_sets[i] = [common_values]
310
311      # Everything should be uniqued as much as possible by now.  Apply the
312      # newly pruned sets to the block_infos structure.
313      # If there are any blocks of text that still match multiple prefixes,
314      # output a warning.
315      current_set = set()
316      for s in pruned_sets[i]:
317        s = sorted(list(s))
318        if s:
319          current_set.add(s[0])
320          if len(s) > 1:
321            _warn('Multiple prefixes generating same output: {} '
322                  '(discarding {})'.format(','.join(s), ','.join(s[1:])))
323
324      block_infos[block_num][block_text] = sorted(list(current_set))
325
326    # If we have multiple block_texts, try to break them down further to avoid
327    # the case where we have very similar block_texts repeated after each
328    # other.
329    if common_prefix and len(block_infos[block_num]) > 1:
330      # We'll only attempt this if each of the block_texts have the same number
331      # of lines as each other.
332      same_num_Lines = (len(set(len(k.splitlines())
333                                for k in block_infos[block_num].keys())) == 1)
334      if same_num_Lines:
335        breakdown = _break_down_block(block_infos[block_num], common_prefix)
336        if breakdown:
337          block_infos[block_num] = breakdown
338
339  return block_infos
340
341
342def _write_block(output, block, not_prefix_set, common_prefix, prefix_pad):
343  """ Write an individual block, with correct padding on the prefixes.
344  """
345  end_prefix = ':     '
346  previous_prefix = None
347  num_lines_of_prefix = 0
348
349  for prefix, line in block:
350    if prefix in not_prefix_set:
351      _warn('not writing for prefix {0} due to presence of "{0}-NOT:" '
352            'in input file.'.format(prefix))
353      continue
354
355    # If the previous line isn't already blank and we're writing more than one
356    # line for the current prefix output a blank line first, unless either the
357    # current of previous prefix is common to all.
358    num_lines_of_prefix += 1
359    if prefix != previous_prefix:
360      if output and output[-1]:
361        if num_lines_of_prefix > 1 or any(p == common_prefix
362                                          for p in (prefix, previous_prefix)):
363          output.append('')
364      num_lines_of_prefix = 0
365      previous_prefix = prefix
366
367    output.append(
368        '{} {}{}{} {}'.format(COMMENT_CHAR,
369                              prefix,
370                              end_prefix,
371                              ' ' * (prefix_pad - len(prefix)),
372                              line).rstrip())
373    end_prefix = '-NEXT:'
374
375  output.append('')
376
377
378def _write_output(test_path, input_lines, prefix_list, block_infos,  # noqa
379                  args, common_prefix, prefix_pad):
380  prefix_set = set([prefix for prefixes, _ in prefix_list
381                    for prefix in prefixes])
382  not_prefix_set = set()
383
384  output_lines = []
385  for input_line in input_lines:
386    if input_line.startswith(ADVERT_PREFIX):
387      continue
388
389    if input_line.startswith(COMMENT_CHAR):
390      m = common.CHECK_RE.match(input_line)
391      try:
392        prefix = m.group(1)
393      except AttributeError:
394        prefix = None
395
396      if '{}-NOT:'.format(prefix) in input_line:
397        not_prefix_set.add(prefix)
398
399      if prefix not in prefix_set or prefix in not_prefix_set:
400        output_lines.append(input_line)
401        continue
402
403    if common.should_add_line_to_output(input_line, prefix_set):
404      # This input line of the function body will go as-is into the output.
405      # Except make leading whitespace uniform: 2 spaces.
406      input_line = common.SCRUB_LEADING_WHITESPACE_RE.sub(r'  ', input_line)
407
408      # Skip empty lines if the previous output line is also empty.
409      if input_line or output_lines[-1]:
410        output_lines.append(input_line)
411    else:
412      continue
413
414  # Add a blank line before the new checks if required.
415  if len(output_lines) > 0 and output_lines[-1]:
416    output_lines.append('')
417
418  output_check_lines = []
419  for block_num in range(len(block_infos)):
420    for block_text in sorted(block_infos[block_num]):
421      if not block_text:
422        continue
423
424      if type(block_infos[block_num]) is list:
425        # The block is of the type output from _break_down_block().
426        _write_block(output_check_lines,
427                     block_infos[block_num],
428                     not_prefix_set,
429                     common_prefix,
430                     prefix_pad)
431        break
432      elif block_infos[block_num][block_text]:
433        # _break_down_block() was unable to do do anything so output the block
434        # as-is.
435        lines = block_text.split('\n')
436        for prefix in block_infos[block_num][block_text]:
437          _write_block(output_check_lines,
438                       [(prefix, line) for line in lines],
439                       not_prefix_set,
440                       common_prefix,
441                       prefix_pad)
442
443  if output_check_lines:
444    output_lines.insert(0, ADVERT)
445    output_lines.extend(output_check_lines)
446
447  # The file should not end with two newlines. It creates unnecessary churn.
448  while len(output_lines) > 0 and output_lines[-1] == '':
449    output_lines.pop()
450
451  if input_lines == output_lines:
452    sys.stderr.write('            [unchanged]\n')
453    return
454  sys.stderr.write('      [{} lines total]\n'.format(len(output_lines)))
455
456  if args.verbose:
457    sys.stderr.write(
458        'Writing {} lines to {}...\n\n'.format(len(output_lines), test_path))
459
460  with open(test_path, 'wb') as f:
461    f.writelines(['{}\n'.format(l).encode() for l in output_lines])
462
463def main():
464  args = _parse_args()
465  test_paths = [test for pattern in args.tests for test in glob.glob(pattern)]
466  for test_path in test_paths:
467    sys.stderr.write('Test: {}\n'.format(test_path))
468
469    # Call this per test. By default each warning will only be written once
470    # per source location. Reset the warning filter so that now each warning
471    # will be written once per source location per test.
472    _configure_warnings(args)
473
474    if args.verbose:
475      sys.stderr.write(
476          'Scanning for RUN lines in test file: {}\n'.format(test_path))
477
478    if not os.path.isfile(test_path):
479      raise Error('could not find test file: {}'.format(test_path))
480
481    with open(test_path) as f:
482      input_lines = [l.rstrip() for l in f]
483
484    run_lines = _find_run_lines(input_lines, args)
485    run_infos = _get_run_infos(run_lines, args)
486    common_prefix, prefix_pad = _get_useful_prefix_info(run_infos)
487    block_infos = _get_block_infos(run_infos, test_path, args, common_prefix)
488    _write_output(test_path,
489                  input_lines,
490                  run_infos,
491                  block_infos,
492                  args,
493                  common_prefix,
494                  prefix_pad)
495
496  return 0
497
498
499if __name__ == '__main__':
500  try:
501    warnings.showwarning = _showwarning
502    sys.exit(main())
503  except Error as e:
504    sys.stdout.write('error: {}\n'.format(e))
505    sys.exit(1)
506