1#!/usr/bin/env python 2 3from __future__ import absolute_import, division, print_function 4import argparse 5import os.path as path 6 7 8def read_tests(f): 9 basename, _ = path.splitext(path.basename(f)) 10 tests = [] 11 prev_pattern = None 12 13 for lineno, line in enumerate(open(f), 1): 14 fields = list(filter(None, map(str.strip, line.split('\t')))) 15 if not (4 <= len(fields) <= 5) \ 16 or 'E' not in fields[0] or fields[0][0] == '#': 17 continue 18 19 terse_opts, pat, text, sgroups = fields[0:4] 20 groups = [] # groups as integer ranges 21 if sgroups == 'NOMATCH': 22 groups = [] 23 elif ',' in sgroups: 24 noparen = map(lambda s: s.strip('()'), sgroups.split(')(')) 25 for g in noparen: 26 s, e = map(str.strip, g.split(',')) 27 groups.append([int(s), int(e)]) 28 break 29 else: 30 # This skips tests that should result in an error. 31 # There aren't many, so I think we can just capture those 32 # manually. Possibly fix this in future. 33 continue 34 35 opts = [] 36 if text == "NULL": 37 text = "" 38 if pat == 'SAME': 39 pat = prev_pattern 40 if '$' in terse_opts: 41 pat = pat.encode('utf-8').decode('unicode_escape') 42 text = text.encode('utf-8').decode('unicode_escape') 43 text = text.encode('unicode_escape').decode('utf-8') 44 opts.append('escaped') 45 else: 46 opts.append('escaped') 47 text = text.encode('unicode_escape').decode('utf-8') 48 if 'i' in terse_opts: 49 opts.append('case-insensitive') 50 51 pat = pat.encode('unicode_escape').decode('utf-8') 52 pat = pat.replace('\\\\', '\\') 53 tests.append({ 54 'name': '"%s%d"' % (basename, lineno), 55 'options': repr(opts), 56 'pattern': "'''%s'''" % pat, 57 'input': "'''%s'''" % text, 58 'matches': str(groups), 59 }) 60 prev_pattern = pat 61 return tests 62 63 64if __name__ == '__main__': 65 parser = argparse.ArgumentParser( 66 description='Generate match tests from an AT&T POSIX test file.') 67 aa = parser.add_argument 68 aa('datfile', help='A dat AT&T POSIX test file.') 69 args = parser.parse_args() 70 71 tests = read_tests(args.datfile) 72 for t in tests: 73 print('[[tests]]') 74 for k, v in t.items(): 75 print('%s = %s' % (k, v)) 76 print('') 77