1from . import model 2from .commontypes import COMMON_TYPES, resolve_common_type 3from .error import FFIError, CDefError 4try: 5 from . import _pycparser as pycparser 6except ImportError: 7 import pycparser 8import weakref, re, sys 9 10try: 11 if sys.version_info < (3,): 12 import thread as _thread 13 else: 14 import _thread 15 lock = _thread.allocate_lock() 16except ImportError: 17 lock = None 18 19def _workaround_for_static_import_finders(): 20 # Issue #392: packaging tools like cx_Freeze can not find these 21 # because pycparser uses exec dynamic import. This is an obscure 22 # workaround. This function is never called. 23 import pycparser.yacctab 24 import pycparser.lextab 25 26CDEF_SOURCE_STRING = "<cdef source string>" 27_r_comment = re.compile(r"/\*.*?\*/|//([^\n\\]|\\.)*?$", 28 re.DOTALL | re.MULTILINE) 29_r_define = re.compile(r"^\s*#\s*define\s+([A-Za-z_][A-Za-z_0-9]*)" 30 r"\b((?:[^\n\\]|\\.)*?)$", 31 re.DOTALL | re.MULTILINE) 32_r_line_directive = re.compile(r"^[ \t]*#[ \t]*(?:line|\d+)\b.*$", re.MULTILINE) 33_r_partial_enum = re.compile(r"=\s*\.\.\.\s*[,}]|\.\.\.\s*\}") 34_r_enum_dotdotdot = re.compile(r"__dotdotdot\d+__$") 35_r_partial_array = re.compile(r"\[\s*\.\.\.\s*\]") 36_r_words = re.compile(r"\w+|\S") 37_parser_cache = None 38_r_int_literal = re.compile(r"-?0?x?[0-9a-f]+[lu]*$", re.IGNORECASE) 39_r_stdcall1 = re.compile(r"\b(__stdcall|WINAPI)\b") 40_r_stdcall2 = re.compile(r"[(]\s*(__stdcall|WINAPI)\b") 41_r_cdecl = re.compile(r"\b__cdecl\b") 42_r_extern_python = re.compile(r'\bextern\s*"' 43 r'(Python|Python\s*\+\s*C|C\s*\+\s*Python)"\s*.') 44_r_star_const_space = re.compile( # matches "* const " 45 r"[*]\s*((const|volatile|restrict)\b\s*)+") 46_r_int_dotdotdot = re.compile(r"(\b(int|long|short|signed|unsigned|char)\s*)+" 47 r"\.\.\.") 48_r_float_dotdotdot = re.compile(r"\b(double|float)\s*\.\.\.") 49 50def _get_parser(): 51 global _parser_cache 52 if _parser_cache is None: 53 _parser_cache = pycparser.CParser() 54 return _parser_cache 55 56def _workaround_for_old_pycparser(csource): 57 # Workaround for a pycparser issue (fixed between pycparser 2.10 and 58 # 2.14): "char*const***" gives us a wrong syntax tree, the same as 59 # for "char***(*const)". This means we can't tell the difference 60 # afterwards. But "char(*const(***))" gives us the right syntax 61 # tree. The issue only occurs if there are several stars in 62 # sequence with no parenthesis inbetween, just possibly qualifiers. 63 # Attempt to fix it by adding some parentheses in the source: each 64 # time we see "* const" or "* const *", we add an opening 65 # parenthesis before each star---the hard part is figuring out where 66 # to close them. 67 parts = [] 68 while True: 69 match = _r_star_const_space.search(csource) 70 if not match: 71 break 72 #print repr(''.join(parts)+csource), '=>', 73 parts.append(csource[:match.start()]) 74 parts.append('('); closing = ')' 75 parts.append(match.group()) # e.g. "* const " 76 endpos = match.end() 77 if csource.startswith('*', endpos): 78 parts.append('('); closing += ')' 79 level = 0 80 i = endpos 81 while i < len(csource): 82 c = csource[i] 83 if c == '(': 84 level += 1 85 elif c == ')': 86 if level == 0: 87 break 88 level -= 1 89 elif c in ',;=': 90 if level == 0: 91 break 92 i += 1 93 csource = csource[endpos:i] + closing + csource[i:] 94 #print repr(''.join(parts)+csource) 95 parts.append(csource) 96 return ''.join(parts) 97 98def _preprocess_extern_python(csource): 99 # input: `extern "Python" int foo(int);` or 100 # `extern "Python" { int foo(int); }` 101 # output: 102 # void __cffi_extern_python_start; 103 # int foo(int); 104 # void __cffi_extern_python_stop; 105 # 106 # input: `extern "Python+C" int foo(int);` 107 # output: 108 # void __cffi_extern_python_plus_c_start; 109 # int foo(int); 110 # void __cffi_extern_python_stop; 111 parts = [] 112 while True: 113 match = _r_extern_python.search(csource) 114 if not match: 115 break 116 endpos = match.end() - 1 117 #print 118 #print ''.join(parts)+csource 119 #print '=>' 120 parts.append(csource[:match.start()]) 121 if 'C' in match.group(1): 122 parts.append('void __cffi_extern_python_plus_c_start; ') 123 else: 124 parts.append('void __cffi_extern_python_start; ') 125 if csource[endpos] == '{': 126 # grouping variant 127 closing = csource.find('}', endpos) 128 if closing < 0: 129 raise CDefError("'extern \"Python\" {': no '}' found") 130 if csource.find('{', endpos + 1, closing) >= 0: 131 raise NotImplementedError("cannot use { } inside a block " 132 "'extern \"Python\" { ... }'") 133 parts.append(csource[endpos+1:closing]) 134 csource = csource[closing+1:] 135 else: 136 # non-grouping variant 137 semicolon = csource.find(';', endpos) 138 if semicolon < 0: 139 raise CDefError("'extern \"Python\": no ';' found") 140 parts.append(csource[endpos:semicolon+1]) 141 csource = csource[semicolon+1:] 142 parts.append(' void __cffi_extern_python_stop;') 143 #print ''.join(parts)+csource 144 #print 145 parts.append(csource) 146 return ''.join(parts) 147 148def _warn_for_string_literal(csource): 149 if '"' not in csource: 150 return 151 for line in csource.splitlines(): 152 if '"' in line and not line.lstrip().startswith('#'): 153 import warnings 154 warnings.warn("String literal found in cdef() or type source. " 155 "String literals are ignored here, but you should " 156 "remove them anyway because some character sequences " 157 "confuse pre-parsing.") 158 break 159 160def _warn_for_non_extern_non_static_global_variable(decl): 161 if not decl.storage: 162 import warnings 163 warnings.warn("Global variable '%s' in cdef(): for consistency " 164 "with C it should have a storage class specifier " 165 "(usually 'extern')" % (decl.name,)) 166 167def _remove_line_directives(csource): 168 # _r_line_directive matches whole lines, without the final \n, if they 169 # start with '#line' with some spacing allowed, or '#NUMBER'. This 170 # function stores them away and replaces them with exactly the string 171 # '#line@N', where N is the index in the list 'line_directives'. 172 line_directives = [] 173 def replace(m): 174 i = len(line_directives) 175 line_directives.append(m.group()) 176 return '#line@%d' % i 177 csource = _r_line_directive.sub(replace, csource) 178 return csource, line_directives 179 180def _put_back_line_directives(csource, line_directives): 181 def replace(m): 182 s = m.group() 183 if not s.startswith('#line@'): 184 raise AssertionError("unexpected #line directive " 185 "(should have been processed and removed") 186 return line_directives[int(s[6:])] 187 return _r_line_directive.sub(replace, csource) 188 189def _preprocess(csource): 190 # First, remove the lines of the form '#line N "filename"' because 191 # the "filename" part could confuse the rest 192 csource, line_directives = _remove_line_directives(csource) 193 # Remove comments. NOTE: this only work because the cdef() section 194 # should not contain any string literals (except in line directives)! 195 def replace_keeping_newlines(m): 196 return ' ' + m.group().count('\n') * '\n' 197 csource = _r_comment.sub(replace_keeping_newlines, csource) 198 # Remove the "#define FOO x" lines 199 macros = {} 200 for match in _r_define.finditer(csource): 201 macroname, macrovalue = match.groups() 202 macrovalue = macrovalue.replace('\\\n', '').strip() 203 macros[macroname] = macrovalue 204 csource = _r_define.sub('', csource) 205 # 206 if pycparser.__version__ < '2.14': 207 csource = _workaround_for_old_pycparser(csource) 208 # 209 # BIG HACK: replace WINAPI or __stdcall with "volatile const". 210 # It doesn't make sense for the return type of a function to be 211 # "volatile volatile const", so we abuse it to detect __stdcall... 212 # Hack number 2 is that "int(volatile *fptr)();" is not valid C 213 # syntax, so we place the "volatile" before the opening parenthesis. 214 csource = _r_stdcall2.sub(' volatile volatile const(', csource) 215 csource = _r_stdcall1.sub(' volatile volatile const ', csource) 216 csource = _r_cdecl.sub(' ', csource) 217 # 218 # Replace `extern "Python"` with start/end markers 219 csource = _preprocess_extern_python(csource) 220 # 221 # Now there should not be any string literal left; warn if we get one 222 _warn_for_string_literal(csource) 223 # 224 # Replace "[...]" with "[__dotdotdotarray__]" 225 csource = _r_partial_array.sub('[__dotdotdotarray__]', csource) 226 # 227 # Replace "...}" with "__dotdotdotNUM__}". This construction should 228 # occur only at the end of enums; at the end of structs we have "...;}" 229 # and at the end of vararg functions "...);". Also replace "=...[,}]" 230 # with ",__dotdotdotNUM__[,}]": this occurs in the enums too, when 231 # giving an unknown value. 232 matches = list(_r_partial_enum.finditer(csource)) 233 for number, match in enumerate(reversed(matches)): 234 p = match.start() 235 if csource[p] == '=': 236 p2 = csource.find('...', p, match.end()) 237 assert p2 > p 238 csource = '%s,__dotdotdot%d__ %s' % (csource[:p], number, 239 csource[p2+3:]) 240 else: 241 assert csource[p:p+3] == '...' 242 csource = '%s __dotdotdot%d__ %s' % (csource[:p], number, 243 csource[p+3:]) 244 # Replace "int ..." or "unsigned long int..." with "__dotdotdotint__" 245 csource = _r_int_dotdotdot.sub(' __dotdotdotint__ ', csource) 246 # Replace "float ..." or "double..." with "__dotdotdotfloat__" 247 csource = _r_float_dotdotdot.sub(' __dotdotdotfloat__ ', csource) 248 # Replace all remaining "..." with the same name, "__dotdotdot__", 249 # which is declared with a typedef for the purpose of C parsing. 250 csource = csource.replace('...', ' __dotdotdot__ ') 251 # Finally, put back the line directives 252 csource = _put_back_line_directives(csource, line_directives) 253 return csource, macros 254 255def _common_type_names(csource): 256 # Look in the source for what looks like usages of types from the 257 # list of common types. A "usage" is approximated here as the 258 # appearance of the word, minus a "definition" of the type, which 259 # is the last word in a "typedef" statement. Approximative only 260 # but should be fine for all the common types. 261 look_for_words = set(COMMON_TYPES) 262 look_for_words.add(';') 263 look_for_words.add(',') 264 look_for_words.add('(') 265 look_for_words.add(')') 266 look_for_words.add('typedef') 267 words_used = set() 268 is_typedef = False 269 paren = 0 270 previous_word = '' 271 for word in _r_words.findall(csource): 272 if word in look_for_words: 273 if word == ';': 274 if is_typedef: 275 words_used.discard(previous_word) 276 look_for_words.discard(previous_word) 277 is_typedef = False 278 elif word == 'typedef': 279 is_typedef = True 280 paren = 0 281 elif word == '(': 282 paren += 1 283 elif word == ')': 284 paren -= 1 285 elif word == ',': 286 if is_typedef and paren == 0: 287 words_used.discard(previous_word) 288 look_for_words.discard(previous_word) 289 else: # word in COMMON_TYPES 290 words_used.add(word) 291 previous_word = word 292 return words_used 293 294 295class Parser(object): 296 297 def __init__(self): 298 self._declarations = {} 299 self._included_declarations = set() 300 self._anonymous_counter = 0 301 self._structnode2type = weakref.WeakKeyDictionary() 302 self._options = {} 303 self._int_constants = {} 304 self._recomplete = [] 305 self._uses_new_feature = None 306 307 def _parse(self, csource): 308 csource, macros = _preprocess(csource) 309 # XXX: for more efficiency we would need to poke into the 310 # internals of CParser... the following registers the 311 # typedefs, because their presence or absence influences the 312 # parsing itself (but what they are typedef'ed to plays no role) 313 ctn = _common_type_names(csource) 314 typenames = [] 315 for name in sorted(self._declarations): 316 if name.startswith('typedef '): 317 name = name[8:] 318 typenames.append(name) 319 ctn.discard(name) 320 typenames += sorted(ctn) 321 # 322 csourcelines = [] 323 csourcelines.append('# 1 "<cdef automatic initialization code>"') 324 for typename in typenames: 325 csourcelines.append('typedef int %s;' % typename) 326 csourcelines.append('typedef int __dotdotdotint__, __dotdotdotfloat__,' 327 ' __dotdotdot__;') 328 # this forces pycparser to consider the following in the file 329 # called <cdef source string> from line 1 330 csourcelines.append('# 1 "%s"' % (CDEF_SOURCE_STRING,)) 331 csourcelines.append(csource) 332 fullcsource = '\n'.join(csourcelines) 333 if lock is not None: 334 lock.acquire() # pycparser is not thread-safe... 335 try: 336 ast = _get_parser().parse(fullcsource) 337 except pycparser.c_parser.ParseError as e: 338 self.convert_pycparser_error(e, csource) 339 finally: 340 if lock is not None: 341 lock.release() 342 # csource will be used to find buggy source text 343 return ast, macros, csource 344 345 def _convert_pycparser_error(self, e, csource): 346 # xxx look for "<cdef source string>:NUM:" at the start of str(e) 347 # and interpret that as a line number. This will not work if 348 # the user gives explicit ``# NUM "FILE"`` directives. 349 line = None 350 msg = str(e) 351 match = re.match(r"%s:(\d+):" % (CDEF_SOURCE_STRING,), msg) 352 if match: 353 linenum = int(match.group(1), 10) 354 csourcelines = csource.splitlines() 355 if 1 <= linenum <= len(csourcelines): 356 line = csourcelines[linenum-1] 357 return line 358 359 def convert_pycparser_error(self, e, csource): 360 line = self._convert_pycparser_error(e, csource) 361 362 msg = str(e) 363 if line: 364 msg = 'cannot parse "%s"\n%s' % (line.strip(), msg) 365 else: 366 msg = 'parse error\n%s' % (msg,) 367 raise CDefError(msg) 368 369 def parse(self, csource, override=False, packed=False, pack=None, 370 dllexport=False): 371 if packed: 372 if packed != True: 373 raise ValueError("'packed' should be False or True; use " 374 "'pack' to give another value") 375 if pack: 376 raise ValueError("cannot give both 'pack' and 'packed'") 377 pack = 1 378 elif pack: 379 if pack & (pack - 1): 380 raise ValueError("'pack' must be a power of two, not %r" % 381 (pack,)) 382 else: 383 pack = 0 384 prev_options = self._options 385 try: 386 self._options = {'override': override, 387 'packed': pack, 388 'dllexport': dllexport} 389 self._internal_parse(csource) 390 finally: 391 self._options = prev_options 392 393 def _internal_parse(self, csource): 394 ast, macros, csource = self._parse(csource) 395 # add the macros 396 self._process_macros(macros) 397 # find the first "__dotdotdot__" and use that as a separator 398 # between the repeated typedefs and the real csource 399 iterator = iter(ast.ext) 400 for decl in iterator: 401 if decl.name == '__dotdotdot__': 402 break 403 else: 404 assert 0 405 current_decl = None 406 # 407 try: 408 self._inside_extern_python = '__cffi_extern_python_stop' 409 for decl in iterator: 410 current_decl = decl 411 if isinstance(decl, pycparser.c_ast.Decl): 412 self._parse_decl(decl) 413 elif isinstance(decl, pycparser.c_ast.Typedef): 414 if not decl.name: 415 raise CDefError("typedef does not declare any name", 416 decl) 417 quals = 0 418 if (isinstance(decl.type.type, pycparser.c_ast.IdentifierType) and 419 decl.type.type.names[-1].startswith('__dotdotdot')): 420 realtype = self._get_unknown_type(decl) 421 elif (isinstance(decl.type, pycparser.c_ast.PtrDecl) and 422 isinstance(decl.type.type, pycparser.c_ast.TypeDecl) and 423 isinstance(decl.type.type.type, 424 pycparser.c_ast.IdentifierType) and 425 decl.type.type.type.names[-1].startswith('__dotdotdot')): 426 realtype = self._get_unknown_ptr_type(decl) 427 else: 428 realtype, quals = self._get_type_and_quals( 429 decl.type, name=decl.name, partial_length_ok=True, 430 typedef_example="*(%s *)0" % (decl.name,)) 431 self._declare('typedef ' + decl.name, realtype, quals=quals) 432 elif decl.__class__.__name__ == 'Pragma': 433 pass # skip pragma, only in pycparser 2.15 434 else: 435 raise CDefError("unexpected <%s>: this construct is valid " 436 "C but not valid in cdef()" % 437 decl.__class__.__name__, decl) 438 except CDefError as e: 439 if len(e.args) == 1: 440 e.args = e.args + (current_decl,) 441 raise 442 except FFIError as e: 443 msg = self._convert_pycparser_error(e, csource) 444 if msg: 445 e.args = (e.args[0] + "\n *** Err: %s" % msg,) 446 raise 447 448 def _add_constants(self, key, val): 449 if key in self._int_constants: 450 if self._int_constants[key] == val: 451 return # ignore identical double declarations 452 raise FFIError( 453 "multiple declarations of constant: %s" % (key,)) 454 self._int_constants[key] = val 455 456 def _add_integer_constant(self, name, int_str): 457 int_str = int_str.lower().rstrip("ul") 458 neg = int_str.startswith('-') 459 if neg: 460 int_str = int_str[1:] 461 # "010" is not valid oct in py3 462 if (int_str.startswith("0") and int_str != '0' 463 and not int_str.startswith("0x")): 464 int_str = "0o" + int_str[1:] 465 pyvalue = int(int_str, 0) 466 if neg: 467 pyvalue = -pyvalue 468 self._add_constants(name, pyvalue) 469 self._declare('macro ' + name, pyvalue) 470 471 def _process_macros(self, macros): 472 for key, value in macros.items(): 473 value = value.strip() 474 if _r_int_literal.match(value): 475 self._add_integer_constant(key, value) 476 elif value == '...': 477 self._declare('macro ' + key, value) 478 else: 479 raise CDefError( 480 'only supports one of the following syntax:\n' 481 ' #define %s ... (literally dot-dot-dot)\n' 482 ' #define %s NUMBER (with NUMBER an integer' 483 ' constant, decimal/hex/octal)\n' 484 'got:\n' 485 ' #define %s %s' 486 % (key, key, key, value)) 487 488 def _declare_function(self, tp, quals, decl): 489 tp = self._get_type_pointer(tp, quals) 490 if self._options.get('dllexport'): 491 tag = 'dllexport_python ' 492 elif self._inside_extern_python == '__cffi_extern_python_start': 493 tag = 'extern_python ' 494 elif self._inside_extern_python == '__cffi_extern_python_plus_c_start': 495 tag = 'extern_python_plus_c ' 496 else: 497 tag = 'function ' 498 self._declare(tag + decl.name, tp) 499 500 def _parse_decl(self, decl): 501 node = decl.type 502 if isinstance(node, pycparser.c_ast.FuncDecl): 503 tp, quals = self._get_type_and_quals(node, name=decl.name) 504 assert isinstance(tp, model.RawFunctionType) 505 self._declare_function(tp, quals, decl) 506 else: 507 if isinstance(node, pycparser.c_ast.Struct): 508 self._get_struct_union_enum_type('struct', node) 509 elif isinstance(node, pycparser.c_ast.Union): 510 self._get_struct_union_enum_type('union', node) 511 elif isinstance(node, pycparser.c_ast.Enum): 512 self._get_struct_union_enum_type('enum', node) 513 elif not decl.name: 514 raise CDefError("construct does not declare any variable", 515 decl) 516 # 517 if decl.name: 518 tp, quals = self._get_type_and_quals(node, 519 partial_length_ok=True) 520 if tp.is_raw_function: 521 self._declare_function(tp, quals, decl) 522 elif (tp.is_integer_type() and 523 hasattr(decl, 'init') and 524 hasattr(decl.init, 'value') and 525 _r_int_literal.match(decl.init.value)): 526 self._add_integer_constant(decl.name, decl.init.value) 527 elif (tp.is_integer_type() and 528 isinstance(decl.init, pycparser.c_ast.UnaryOp) and 529 decl.init.op == '-' and 530 hasattr(decl.init.expr, 'value') and 531 _r_int_literal.match(decl.init.expr.value)): 532 self._add_integer_constant(decl.name, 533 '-' + decl.init.expr.value) 534 elif (tp is model.void_type and 535 decl.name.startswith('__cffi_extern_python_')): 536 # hack: `extern "Python"` in the C source is replaced 537 # with "void __cffi_extern_python_start;" and 538 # "void __cffi_extern_python_stop;" 539 self._inside_extern_python = decl.name 540 else: 541 if self._inside_extern_python !='__cffi_extern_python_stop': 542 raise CDefError( 543 "cannot declare constants or " 544 "variables with 'extern \"Python\"'") 545 if (quals & model.Q_CONST) and not tp.is_array_type: 546 self._declare('constant ' + decl.name, tp, quals=quals) 547 else: 548 _warn_for_non_extern_non_static_global_variable(decl) 549 self._declare('variable ' + decl.name, tp, quals=quals) 550 551 def parse_type(self, cdecl): 552 return self.parse_type_and_quals(cdecl)[0] 553 554 def parse_type_and_quals(self, cdecl): 555 ast, macros = self._parse('void __dummy(\n%s\n);' % cdecl)[:2] 556 assert not macros 557 exprnode = ast.ext[-1].type.args.params[0] 558 if isinstance(exprnode, pycparser.c_ast.ID): 559 raise CDefError("unknown identifier '%s'" % (exprnode.name,)) 560 return self._get_type_and_quals(exprnode.type) 561 562 def _declare(self, name, obj, included=False, quals=0): 563 if name in self._declarations: 564 prevobj, prevquals = self._declarations[name] 565 if prevobj is obj and prevquals == quals: 566 return 567 if not self._options.get('override'): 568 raise FFIError( 569 "multiple declarations of %s (for interactive usage, " 570 "try cdef(xx, override=True))" % (name,)) 571 assert '__dotdotdot__' not in name.split() 572 self._declarations[name] = (obj, quals) 573 if included: 574 self._included_declarations.add(obj) 575 576 def _extract_quals(self, type): 577 quals = 0 578 if isinstance(type, (pycparser.c_ast.TypeDecl, 579 pycparser.c_ast.PtrDecl)): 580 if 'const' in type.quals: 581 quals |= model.Q_CONST 582 if 'volatile' in type.quals: 583 quals |= model.Q_VOLATILE 584 if 'restrict' in type.quals: 585 quals |= model.Q_RESTRICT 586 return quals 587 588 def _get_type_pointer(self, type, quals, declname=None): 589 if isinstance(type, model.RawFunctionType): 590 return type.as_function_pointer() 591 if (isinstance(type, model.StructOrUnionOrEnum) and 592 type.name.startswith('$') and type.name[1:].isdigit() and 593 type.forcename is None and declname is not None): 594 return model.NamedPointerType(type, declname, quals) 595 return model.PointerType(type, quals) 596 597 def _get_type_and_quals(self, typenode, name=None, partial_length_ok=False, 598 typedef_example=None): 599 # first, dereference typedefs, if we have it already parsed, we're good 600 if (isinstance(typenode, pycparser.c_ast.TypeDecl) and 601 isinstance(typenode.type, pycparser.c_ast.IdentifierType) and 602 len(typenode.type.names) == 1 and 603 ('typedef ' + typenode.type.names[0]) in self._declarations): 604 tp, quals = self._declarations['typedef ' + typenode.type.names[0]] 605 quals |= self._extract_quals(typenode) 606 return tp, quals 607 # 608 if isinstance(typenode, pycparser.c_ast.ArrayDecl): 609 # array type 610 if typenode.dim is None: 611 length = None 612 else: 613 length = self._parse_constant( 614 typenode.dim, partial_length_ok=partial_length_ok) 615 # a hack: in 'typedef int foo_t[...][...];', don't use '...' as 616 # the length but use directly the C expression that would be 617 # generated by recompiler.py. This lets the typedef be used in 618 # many more places within recompiler.py 619 if typedef_example is not None: 620 if length == '...': 621 length = '_cffi_array_len(%s)' % (typedef_example,) 622 typedef_example = "*" + typedef_example 623 # 624 tp, quals = self._get_type_and_quals(typenode.type, 625 partial_length_ok=partial_length_ok, 626 typedef_example=typedef_example) 627 return model.ArrayType(tp, length), quals 628 # 629 if isinstance(typenode, pycparser.c_ast.PtrDecl): 630 # pointer type 631 itemtype, itemquals = self._get_type_and_quals(typenode.type) 632 tp = self._get_type_pointer(itemtype, itemquals, declname=name) 633 quals = self._extract_quals(typenode) 634 return tp, quals 635 # 636 if isinstance(typenode, pycparser.c_ast.TypeDecl): 637 quals = self._extract_quals(typenode) 638 type = typenode.type 639 if isinstance(type, pycparser.c_ast.IdentifierType): 640 # assume a primitive type. get it from .names, but reduce 641 # synonyms to a single chosen combination 642 names = list(type.names) 643 if names != ['signed', 'char']: # keep this unmodified 644 prefixes = {} 645 while names: 646 name = names[0] 647 if name in ('short', 'long', 'signed', 'unsigned'): 648 prefixes[name] = prefixes.get(name, 0) + 1 649 del names[0] 650 else: 651 break 652 # ignore the 'signed' prefix below, and reorder the others 653 newnames = [] 654 for prefix in ('unsigned', 'short', 'long'): 655 for i in range(prefixes.get(prefix, 0)): 656 newnames.append(prefix) 657 if not names: 658 names = ['int'] # implicitly 659 if names == ['int']: # but kill it if 'short' or 'long' 660 if 'short' in prefixes or 'long' in prefixes: 661 names = [] 662 names = newnames + names 663 ident = ' '.join(names) 664 if ident == 'void': 665 return model.void_type, quals 666 if ident == '__dotdotdot__': 667 raise FFIError(':%d: bad usage of "..."' % 668 typenode.coord.line) 669 tp0, quals0 = resolve_common_type(self, ident) 670 return tp0, (quals | quals0) 671 # 672 if isinstance(type, pycparser.c_ast.Struct): 673 # 'struct foobar' 674 tp = self._get_struct_union_enum_type('struct', type, name) 675 return tp, quals 676 # 677 if isinstance(type, pycparser.c_ast.Union): 678 # 'union foobar' 679 tp = self._get_struct_union_enum_type('union', type, name) 680 return tp, quals 681 # 682 if isinstance(type, pycparser.c_ast.Enum): 683 # 'enum foobar' 684 tp = self._get_struct_union_enum_type('enum', type, name) 685 return tp, quals 686 # 687 if isinstance(typenode, pycparser.c_ast.FuncDecl): 688 # a function type 689 return self._parse_function_type(typenode, name), 0 690 # 691 # nested anonymous structs or unions end up here 692 if isinstance(typenode, pycparser.c_ast.Struct): 693 return self._get_struct_union_enum_type('struct', typenode, name, 694 nested=True), 0 695 if isinstance(typenode, pycparser.c_ast.Union): 696 return self._get_struct_union_enum_type('union', typenode, name, 697 nested=True), 0 698 # 699 raise FFIError(":%d: bad or unsupported type declaration" % 700 typenode.coord.line) 701 702 def _parse_function_type(self, typenode, funcname=None): 703 params = list(getattr(typenode.args, 'params', [])) 704 for i, arg in enumerate(params): 705 if not hasattr(arg, 'type'): 706 raise CDefError("%s arg %d: unknown type '%s'" 707 " (if you meant to use the old C syntax of giving" 708 " untyped arguments, it is not supported)" 709 % (funcname or 'in expression', i + 1, 710 getattr(arg, 'name', '?'))) 711 ellipsis = ( 712 len(params) > 0 and 713 isinstance(params[-1].type, pycparser.c_ast.TypeDecl) and 714 isinstance(params[-1].type.type, 715 pycparser.c_ast.IdentifierType) and 716 params[-1].type.type.names == ['__dotdotdot__']) 717 if ellipsis: 718 params.pop() 719 if not params: 720 raise CDefError( 721 "%s: a function with only '(...)' as argument" 722 " is not correct C" % (funcname or 'in expression')) 723 args = [self._as_func_arg(*self._get_type_and_quals(argdeclnode.type)) 724 for argdeclnode in params] 725 if not ellipsis and args == [model.void_type]: 726 args = [] 727 result, quals = self._get_type_and_quals(typenode.type) 728 # the 'quals' on the result type are ignored. HACK: we absure them 729 # to detect __stdcall functions: we textually replace "__stdcall" 730 # with "volatile volatile const" above. 731 abi = None 732 if hasattr(typenode.type, 'quals'): # else, probable syntax error anyway 733 if typenode.type.quals[-3:] == ['volatile', 'volatile', 'const']: 734 abi = '__stdcall' 735 return model.RawFunctionType(tuple(args), result, ellipsis, abi) 736 737 def _as_func_arg(self, type, quals): 738 if isinstance(type, model.ArrayType): 739 return model.PointerType(type.item, quals) 740 elif isinstance(type, model.RawFunctionType): 741 return type.as_function_pointer() 742 else: 743 return type 744 745 def _get_struct_union_enum_type(self, kind, type, name=None, nested=False): 746 # First, a level of caching on the exact 'type' node of the AST. 747 # This is obscure, but needed because pycparser "unrolls" declarations 748 # such as "typedef struct { } foo_t, *foo_p" and we end up with 749 # an AST that is not a tree, but a DAG, with the "type" node of the 750 # two branches foo_t and foo_p of the trees being the same node. 751 # It's a bit silly but detecting "DAG-ness" in the AST tree seems 752 # to be the only way to distinguish this case from two independent 753 # structs. See test_struct_with_two_usages. 754 try: 755 return self._structnode2type[type] 756 except KeyError: 757 pass 758 # 759 # Note that this must handle parsing "struct foo" any number of 760 # times and always return the same StructType object. Additionally, 761 # one of these times (not necessarily the first), the fields of 762 # the struct can be specified with "struct foo { ...fields... }". 763 # If no name is given, then we have to create a new anonymous struct 764 # with no caching; in this case, the fields are either specified 765 # right now or never. 766 # 767 force_name = name 768 name = type.name 769 # 770 # get the type or create it if needed 771 if name is None: 772 # 'force_name' is used to guess a more readable name for 773 # anonymous structs, for the common case "typedef struct { } foo". 774 if force_name is not None: 775 explicit_name = '$%s' % force_name 776 else: 777 self._anonymous_counter += 1 778 explicit_name = '$%d' % self._anonymous_counter 779 tp = None 780 else: 781 explicit_name = name 782 key = '%s %s' % (kind, name) 783 tp, _ = self._declarations.get(key, (None, None)) 784 # 785 if tp is None: 786 if kind == 'struct': 787 tp = model.StructType(explicit_name, None, None, None) 788 elif kind == 'union': 789 tp = model.UnionType(explicit_name, None, None, None) 790 elif kind == 'enum': 791 if explicit_name == '__dotdotdot__': 792 raise CDefError("Enums cannot be declared with ...") 793 tp = self._build_enum_type(explicit_name, type.values) 794 else: 795 raise AssertionError("kind = %r" % (kind,)) 796 if name is not None: 797 self._declare(key, tp) 798 else: 799 if kind == 'enum' and type.values is not None: 800 raise NotImplementedError( 801 "enum %s: the '{}' declaration should appear on the first " 802 "time the enum is mentioned, not later" % explicit_name) 803 if not tp.forcename: 804 tp.force_the_name(force_name) 805 if tp.forcename and '$' in tp.name: 806 self._declare('anonymous %s' % tp.forcename, tp) 807 # 808 self._structnode2type[type] = tp 809 # 810 # enums: done here 811 if kind == 'enum': 812 return tp 813 # 814 # is there a 'type.decls'? If yes, then this is the place in the 815 # C sources that declare the fields. If no, then just return the 816 # existing type, possibly still incomplete. 817 if type.decls is None: 818 return tp 819 # 820 if tp.fldnames is not None: 821 raise CDefError("duplicate declaration of struct %s" % name) 822 fldnames = [] 823 fldtypes = [] 824 fldbitsize = [] 825 fldquals = [] 826 for decl in type.decls: 827 if (isinstance(decl.type, pycparser.c_ast.IdentifierType) and 828 ''.join(decl.type.names) == '__dotdotdot__'): 829 # XXX pycparser is inconsistent: 'names' should be a list 830 # of strings, but is sometimes just one string. Use 831 # str.join() as a way to cope with both. 832 self._make_partial(tp, nested) 833 continue 834 if decl.bitsize is None: 835 bitsize = -1 836 else: 837 bitsize = self._parse_constant(decl.bitsize) 838 self._partial_length = False 839 type, fqual = self._get_type_and_quals(decl.type, 840 partial_length_ok=True) 841 if self._partial_length: 842 self._make_partial(tp, nested) 843 if isinstance(type, model.StructType) and type.partial: 844 self._make_partial(tp, nested) 845 fldnames.append(decl.name or '') 846 fldtypes.append(type) 847 fldbitsize.append(bitsize) 848 fldquals.append(fqual) 849 tp.fldnames = tuple(fldnames) 850 tp.fldtypes = tuple(fldtypes) 851 tp.fldbitsize = tuple(fldbitsize) 852 tp.fldquals = tuple(fldquals) 853 if fldbitsize != [-1] * len(fldbitsize): 854 if isinstance(tp, model.StructType) and tp.partial: 855 raise NotImplementedError("%s: using both bitfields and '...;'" 856 % (tp,)) 857 tp.packed = self._options.get('packed') 858 if tp.completed: # must be re-completed: it is not opaque any more 859 tp.completed = 0 860 self._recomplete.append(tp) 861 return tp 862 863 def _make_partial(self, tp, nested): 864 if not isinstance(tp, model.StructOrUnion): 865 raise CDefError("%s cannot be partial" % (tp,)) 866 if not tp.has_c_name() and not nested: 867 raise NotImplementedError("%s is partial but has no C name" %(tp,)) 868 tp.partial = True 869 870 def _parse_constant(self, exprnode, partial_length_ok=False): 871 # for now, limited to expressions that are an immediate number 872 # or positive/negative number 873 if isinstance(exprnode, pycparser.c_ast.Constant): 874 s = exprnode.value 875 if '0' <= s[0] <= '9': 876 s = s.rstrip('uUlL') 877 try: 878 if s.startswith('0'): 879 return int(s, 8) 880 else: 881 return int(s, 10) 882 except ValueError: 883 if len(s) > 1: 884 if s.lower()[0:2] == '0x': 885 return int(s, 16) 886 elif s.lower()[0:2] == '0b': 887 return int(s, 2) 888 raise CDefError("invalid constant %r" % (s,)) 889 elif s[0] == "'" and s[-1] == "'" and ( 890 len(s) == 3 or (len(s) == 4 and s[1] == "\\")): 891 return ord(s[-2]) 892 else: 893 raise CDefError("invalid constant %r" % (s,)) 894 # 895 if (isinstance(exprnode, pycparser.c_ast.UnaryOp) and 896 exprnode.op == '+'): 897 return self._parse_constant(exprnode.expr) 898 # 899 if (isinstance(exprnode, pycparser.c_ast.UnaryOp) and 900 exprnode.op == '-'): 901 return -self._parse_constant(exprnode.expr) 902 # load previously defined int constant 903 if (isinstance(exprnode, pycparser.c_ast.ID) and 904 exprnode.name in self._int_constants): 905 return self._int_constants[exprnode.name] 906 # 907 if (isinstance(exprnode, pycparser.c_ast.ID) and 908 exprnode.name == '__dotdotdotarray__'): 909 if partial_length_ok: 910 self._partial_length = True 911 return '...' 912 raise FFIError(":%d: unsupported '[...]' here, cannot derive " 913 "the actual array length in this context" 914 % exprnode.coord.line) 915 # 916 if isinstance(exprnode, pycparser.c_ast.BinaryOp): 917 left = self._parse_constant(exprnode.left) 918 right = self._parse_constant(exprnode.right) 919 if exprnode.op == '+': 920 return left + right 921 elif exprnode.op == '-': 922 return left - right 923 elif exprnode.op == '*': 924 return left * right 925 elif exprnode.op == '/': 926 return self._c_div(left, right) 927 elif exprnode.op == '%': 928 return left - self._c_div(left, right) * right 929 elif exprnode.op == '<<': 930 return left << right 931 elif exprnode.op == '>>': 932 return left >> right 933 elif exprnode.op == '&': 934 return left & right 935 elif exprnode.op == '|': 936 return left | right 937 elif exprnode.op == '^': 938 return left ^ right 939 # 940 raise FFIError(":%d: unsupported expression: expected a " 941 "simple numeric constant" % exprnode.coord.line) 942 943 def _c_div(self, a, b): 944 result = a // b 945 if ((a < 0) ^ (b < 0)) and (a % b) != 0: 946 result += 1 947 return result 948 949 def _build_enum_type(self, explicit_name, decls): 950 if decls is not None: 951 partial = False 952 enumerators = [] 953 enumvalues = [] 954 nextenumvalue = 0 955 for enum in decls.enumerators: 956 if _r_enum_dotdotdot.match(enum.name): 957 partial = True 958 continue 959 if enum.value is not None: 960 nextenumvalue = self._parse_constant(enum.value) 961 enumerators.append(enum.name) 962 enumvalues.append(nextenumvalue) 963 self._add_constants(enum.name, nextenumvalue) 964 nextenumvalue += 1 965 enumerators = tuple(enumerators) 966 enumvalues = tuple(enumvalues) 967 tp = model.EnumType(explicit_name, enumerators, enumvalues) 968 tp.partial = partial 969 else: # opaque enum 970 tp = model.EnumType(explicit_name, (), ()) 971 return tp 972 973 def include(self, other): 974 for name, (tp, quals) in other._declarations.items(): 975 if name.startswith('anonymous $enum_$'): 976 continue # fix for test_anonymous_enum_include 977 kind = name.split(' ', 1)[0] 978 if kind in ('struct', 'union', 'enum', 'anonymous', 'typedef'): 979 self._declare(name, tp, included=True, quals=quals) 980 for k, v in other._int_constants.items(): 981 self._add_constants(k, v) 982 983 def _get_unknown_type(self, decl): 984 typenames = decl.type.type.names 985 if typenames == ['__dotdotdot__']: 986 return model.unknown_type(decl.name) 987 988 if typenames == ['__dotdotdotint__']: 989 if self._uses_new_feature is None: 990 self._uses_new_feature = "'typedef int... %s'" % decl.name 991 return model.UnknownIntegerType(decl.name) 992 993 if typenames == ['__dotdotdotfloat__']: 994 # note: not for 'long double' so far 995 if self._uses_new_feature is None: 996 self._uses_new_feature = "'typedef float... %s'" % decl.name 997 return model.UnknownFloatType(decl.name) 998 999 raise FFIError(':%d: unsupported usage of "..." in typedef' 1000 % decl.coord.line) 1001 1002 def _get_unknown_ptr_type(self, decl): 1003 if decl.type.type.type.names == ['__dotdotdot__']: 1004 return model.unknown_ptr_type(decl.name) 1005 raise FFIError(':%d: unsupported usage of "..." in typedef' 1006 % decl.coord.line) 1007