1#!/usr/bin/env python3 2# This script converts a C file to use the PEP 384 type definition API 3# Usage: abitype.py < old_code > new_code 4import re, sys 5 6###### Replacement of PyTypeObject static instances ############## 7 8# classify each token, giving it a one-letter code: 9# S: static 10# T: PyTypeObject 11# I: ident 12# W: whitespace 13# =, {, }, ; : themselves 14def classify(): 15 res = [] 16 for t,v in tokens: 17 if t == 'other' and v in "={};": 18 res.append(v) 19 elif t == 'ident': 20 if v == 'PyTypeObject': 21 res.append('T') 22 elif v == 'static': 23 res.append('S') 24 else: 25 res.append('I') 26 elif t == 'ws': 27 res.append('W') 28 else: 29 res.append('.') 30 return ''.join(res) 31 32# Obtain a list of fields of a PyTypeObject, in declaration order, 33# skipping ob_base 34# All comments are dropped from the variable (which are typically 35# just the slot names, anyway), and information is discarded whether 36# the original type was static. 37def get_fields(start, real_end): 38 pos = start 39 # static? 40 if tokens[pos][1] == 'static': 41 pos += 2 42 # PyTypeObject 43 pos += 2 44 # name 45 name = tokens[pos][1] 46 pos += 1 47 while tokens[pos][1] != '{': 48 pos += 1 49 pos += 1 50 # PyVarObject_HEAD_INIT 51 while tokens[pos][0] in ('ws', 'comment'): 52 pos += 1 53 if tokens[pos][1] != 'PyVarObject_HEAD_INIT': 54 raise Exception('%s has no PyVarObject_HEAD_INIT' % name) 55 while tokens[pos][1] != ')': 56 pos += 1 57 pos += 1 58 # field definitions: various tokens, comma-separated 59 fields = [] 60 while True: 61 while tokens[pos][0] in ('ws', 'comment'): 62 pos += 1 63 end = pos 64 while tokens[end][1] not in ',}': 65 if tokens[end][1] == '(': 66 nesting = 1 67 while nesting: 68 end += 1 69 if tokens[end][1] == '(': nesting+=1 70 if tokens[end][1] == ')': nesting-=1 71 end += 1 72 assert end < real_end 73 # join field, excluding separator and trailing ws 74 end1 = end-1 75 while tokens[end1][0] in ('ws', 'comment'): 76 end1 -= 1 77 fields.append(''.join(t[1] for t in tokens[pos:end1+1])) 78 if tokens[end][1] == '}': 79 break 80 pos = end+1 81 return name, fields 82 83# List of type slots as of Python 3.2, omitting ob_base 84typeslots = [ 85 'tp_name', 86 'tp_basicsize', 87 'tp_itemsize', 88 'tp_dealloc', 89 'tp_print', 90 'tp_getattr', 91 'tp_setattr', 92 'tp_reserved', 93 'tp_repr', 94 'tp_as_number', 95 'tp_as_sequence', 96 'tp_as_mapping', 97 'tp_hash', 98 'tp_call', 99 'tp_str', 100 'tp_getattro', 101 'tp_setattro', 102 'tp_as_buffer', 103 'tp_flags', 104 'tp_doc', 105 'tp_traverse', 106 'tp_clear', 107 'tp_richcompare', 108 'tp_weaklistoffset', 109 'tp_iter', 110 'iternextfunc', 111 'tp_methods', 112 'tp_members', 113 'tp_getset', 114 'tp_base', 115 'tp_dict', 116 'tp_descr_get', 117 'tp_descr_set', 118 'tp_dictoffset', 119 'tp_init', 120 'tp_alloc', 121 'tp_new', 122 'tp_free', 123 'tp_is_gc', 124 'tp_bases', 125 'tp_mro', 126 'tp_cache', 127 'tp_subclasses', 128 'tp_weaklist', 129 'tp_del', 130 'tp_version_tag', 131] 132 133# Generate a PyType_Spec definition 134def make_slots(name, fields): 135 res = [] 136 res.append('static PyType_Slot %s_slots[] = {' % name) 137 # defaults for spec 138 spec = { 'tp_itemsize':'0' } 139 for i, val in enumerate(fields): 140 if val.endswith('0'): 141 continue 142 if typeslots[i] in ('tp_name', 'tp_doc', 'tp_basicsize', 143 'tp_itemsize', 'tp_flags'): 144 spec[typeslots[i]] = val 145 continue 146 res.append(' {Py_%s, %s},' % (typeslots[i], val)) 147 res.append('};') 148 res.append('static PyType_Spec %s_spec = {' % name) 149 res.append(' %s,' % spec['tp_name']) 150 res.append(' %s,' % spec['tp_basicsize']) 151 res.append(' %s,' % spec['tp_itemsize']) 152 res.append(' %s,' % spec['tp_flags']) 153 res.append(' %s_slots,' % name) 154 res.append('};\n') 155 return '\n'.join(res) 156 157 158if __name__ == '__main__': 159 160 ############ Simplistic C scanner ################################## 161 tokenizer = re.compile( 162 r"(?P<preproc>#.*\n)" 163 r"|(?P<comment>/\*.*?\*/)" 164 r"|(?P<ident>[a-zA-Z_][a-zA-Z0-9_]*)" 165 r"|(?P<ws>[ \t\n]+)" 166 r"|(?P<other>.)", 167 re.MULTILINE) 168 169 tokens = [] 170 source = sys.stdin.read() 171 pos = 0 172 while pos != len(source): 173 m = tokenizer.match(source, pos) 174 tokens.append([m.lastgroup, m.group()]) 175 pos += len(tokens[-1][1]) 176 if tokens[-1][0] == 'preproc': 177 # continuation lines are considered 178 # only in preprocess statements 179 while tokens[-1][1].endswith('\\\n'): 180 nl = source.find('\n', pos) 181 if nl == -1: 182 line = source[pos:] 183 else: 184 line = source[pos:nl+1] 185 tokens[-1][1] += line 186 pos += len(line) 187 188 # Main loop: replace all static PyTypeObjects until 189 # there are none left. 190 while 1: 191 c = classify() 192 m = re.search('(SW)?TWIW?=W?{.*?};', c) 193 if not m: 194 break 195 start = m.start() 196 end = m.end() 197 name, fields = get_fields(start, end) 198 tokens[start:end] = [('',make_slots(name, fields))] 199 200 # Output result to stdout 201 for t, v in tokens: 202 sys.stdout.write(v) 203