• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1from . import model
2from .commontypes import COMMON_TYPES, resolve_common_type
3from .error import FFIError, CDefError
4try:
5    from . import _pycparser as pycparser
6except ImportError:
7    import pycparser
8import weakref, re, sys
9
10try:
11    if sys.version_info < (3,):
12        import thread as _thread
13    else:
14        import _thread
15    lock = _thread.allocate_lock()
16except ImportError:
17    lock = None
18
19def _workaround_for_static_import_finders():
20    # Issue #392: packaging tools like cx_Freeze can not find these
21    # because pycparser uses exec dynamic import.  This is an obscure
22    # workaround.  This function is never called.
23    import pycparser.yacctab
24    import pycparser.lextab
25
26CDEF_SOURCE_STRING = "<cdef source string>"
27_r_comment = re.compile(r"/\*.*?\*/|//([^\n\\]|\\.)*?$",
28                        re.DOTALL | re.MULTILINE)
29_r_define  = re.compile(r"^\s*#\s*define\s+([A-Za-z_][A-Za-z_0-9]*)"
30                        r"\b((?:[^\n\\]|\\.)*?)$",
31                        re.DOTALL | re.MULTILINE)
32_r_line_directive = re.compile(r"^[ \t]*#[ \t]*(?:line|\d+)\b.*$", re.MULTILINE)
33_r_partial_enum = re.compile(r"=\s*\.\.\.\s*[,}]|\.\.\.\s*\}")
34_r_enum_dotdotdot = re.compile(r"__dotdotdot\d+__$")
35_r_partial_array = re.compile(r"\[\s*\.\.\.\s*\]")
36_r_words = re.compile(r"\w+|\S")
37_parser_cache = None
38_r_int_literal = re.compile(r"-?0?x?[0-9a-f]+[lu]*$", re.IGNORECASE)
39_r_stdcall1 = re.compile(r"\b(__stdcall|WINAPI)\b")
40_r_stdcall2 = re.compile(r"[(]\s*(__stdcall|WINAPI)\b")
41_r_cdecl = re.compile(r"\b__cdecl\b")
42_r_extern_python = re.compile(r'\bextern\s*"'
43                              r'(Python|Python\s*\+\s*C|C\s*\+\s*Python)"\s*.')
44_r_star_const_space = re.compile(       # matches "* const "
45    r"[*]\s*((const|volatile|restrict)\b\s*)+")
46_r_int_dotdotdot = re.compile(r"(\b(int|long|short|signed|unsigned|char)\s*)+"
47                              r"\.\.\.")
48_r_float_dotdotdot = re.compile(r"\b(double|float)\s*\.\.\.")
49
50def _get_parser():
51    global _parser_cache
52    if _parser_cache is None:
53        _parser_cache = pycparser.CParser()
54    return _parser_cache
55
56def _workaround_for_old_pycparser(csource):
57    # Workaround for a pycparser issue (fixed between pycparser 2.10 and
58    # 2.14): "char*const***" gives us a wrong syntax tree, the same as
59    # for "char***(*const)".  This means we can't tell the difference
60    # afterwards.  But "char(*const(***))" gives us the right syntax
61    # tree.  The issue only occurs if there are several stars in
62    # sequence with no parenthesis inbetween, just possibly qualifiers.
63    # Attempt to fix it by adding some parentheses in the source: each
64    # time we see "* const" or "* const *", we add an opening
65    # parenthesis before each star---the hard part is figuring out where
66    # to close them.
67    parts = []
68    while True:
69        match = _r_star_const_space.search(csource)
70        if not match:
71            break
72        #print repr(''.join(parts)+csource), '=>',
73        parts.append(csource[:match.start()])
74        parts.append('('); closing = ')'
75        parts.append(match.group())   # e.g. "* const "
76        endpos = match.end()
77        if csource.startswith('*', endpos):
78            parts.append('('); closing += ')'
79        level = 0
80        i = endpos
81        while i < len(csource):
82            c = csource[i]
83            if c == '(':
84                level += 1
85            elif c == ')':
86                if level == 0:
87                    break
88                level -= 1
89            elif c in ',;=':
90                if level == 0:
91                    break
92            i += 1
93        csource = csource[endpos:i] + closing + csource[i:]
94        #print repr(''.join(parts)+csource)
95    parts.append(csource)
96    return ''.join(parts)
97
98def _preprocess_extern_python(csource):
99    # input: `extern "Python" int foo(int);` or
100    #        `extern "Python" { int foo(int); }`
101    # output:
102    #     void __cffi_extern_python_start;
103    #     int foo(int);
104    #     void __cffi_extern_python_stop;
105    #
106    # input: `extern "Python+C" int foo(int);`
107    # output:
108    #     void __cffi_extern_python_plus_c_start;
109    #     int foo(int);
110    #     void __cffi_extern_python_stop;
111    parts = []
112    while True:
113        match = _r_extern_python.search(csource)
114        if not match:
115            break
116        endpos = match.end() - 1
117        #print
118        #print ''.join(parts)+csource
119        #print '=>'
120        parts.append(csource[:match.start()])
121        if 'C' in match.group(1):
122            parts.append('void __cffi_extern_python_plus_c_start; ')
123        else:
124            parts.append('void __cffi_extern_python_start; ')
125        if csource[endpos] == '{':
126            # grouping variant
127            closing = csource.find('}', endpos)
128            if closing < 0:
129                raise CDefError("'extern \"Python\" {': no '}' found")
130            if csource.find('{', endpos + 1, closing) >= 0:
131                raise NotImplementedError("cannot use { } inside a block "
132                                          "'extern \"Python\" { ... }'")
133            parts.append(csource[endpos+1:closing])
134            csource = csource[closing+1:]
135        else:
136            # non-grouping variant
137            semicolon = csource.find(';', endpos)
138            if semicolon < 0:
139                raise CDefError("'extern \"Python\": no ';' found")
140            parts.append(csource[endpos:semicolon+1])
141            csource = csource[semicolon+1:]
142        parts.append(' void __cffi_extern_python_stop;')
143        #print ''.join(parts)+csource
144        #print
145    parts.append(csource)
146    return ''.join(parts)
147
148def _warn_for_string_literal(csource):
149    if '"' not in csource:
150        return
151    for line in csource.splitlines():
152        if '"' in line and not line.lstrip().startswith('#'):
153            import warnings
154            warnings.warn("String literal found in cdef() or type source. "
155                          "String literals are ignored here, but you should "
156                          "remove them anyway because some character sequences "
157                          "confuse pre-parsing.")
158            break
159
160def _warn_for_non_extern_non_static_global_variable(decl):
161    if not decl.storage:
162        import warnings
163        warnings.warn("Global variable '%s' in cdef(): for consistency "
164                      "with C it should have a storage class specifier "
165                      "(usually 'extern')" % (decl.name,))
166
167def _remove_line_directives(csource):
168    # _r_line_directive matches whole lines, without the final \n, if they
169    # start with '#line' with some spacing allowed, or '#NUMBER'.  This
170    # function stores them away and replaces them with exactly the string
171    # '#line@N', where N is the index in the list 'line_directives'.
172    line_directives = []
173    def replace(m):
174        i = len(line_directives)
175        line_directives.append(m.group())
176        return '#line@%d' % i
177    csource = _r_line_directive.sub(replace, csource)
178    return csource, line_directives
179
180def _put_back_line_directives(csource, line_directives):
181    def replace(m):
182        s = m.group()
183        if not s.startswith('#line@'):
184            raise AssertionError("unexpected #line directive "
185                                 "(should have been processed and removed")
186        return line_directives[int(s[6:])]
187    return _r_line_directive.sub(replace, csource)
188
189def _preprocess(csource):
190    # First, remove the lines of the form '#line N "filename"' because
191    # the "filename" part could confuse the rest
192    csource, line_directives = _remove_line_directives(csource)
193    # Remove comments.  NOTE: this only work because the cdef() section
194    # should not contain any string literals (except in line directives)!
195    def replace_keeping_newlines(m):
196        return ' ' + m.group().count('\n') * '\n'
197    csource = _r_comment.sub(replace_keeping_newlines, csource)
198    # Remove the "#define FOO x" lines
199    macros = {}
200    for match in _r_define.finditer(csource):
201        macroname, macrovalue = match.groups()
202        macrovalue = macrovalue.replace('\\\n', '').strip()
203        macros[macroname] = macrovalue
204    csource = _r_define.sub('', csource)
205    #
206    if pycparser.__version__ < '2.14':
207        csource = _workaround_for_old_pycparser(csource)
208    #
209    # BIG HACK: replace WINAPI or __stdcall with "volatile const".
210    # It doesn't make sense for the return type of a function to be
211    # "volatile volatile const", so we abuse it to detect __stdcall...
212    # Hack number 2 is that "int(volatile *fptr)();" is not valid C
213    # syntax, so we place the "volatile" before the opening parenthesis.
214    csource = _r_stdcall2.sub(' volatile volatile const(', csource)
215    csource = _r_stdcall1.sub(' volatile volatile const ', csource)
216    csource = _r_cdecl.sub(' ', csource)
217    #
218    # Replace `extern "Python"` with start/end markers
219    csource = _preprocess_extern_python(csource)
220    #
221    # Now there should not be any string literal left; warn if we get one
222    _warn_for_string_literal(csource)
223    #
224    # Replace "[...]" with "[__dotdotdotarray__]"
225    csource = _r_partial_array.sub('[__dotdotdotarray__]', csource)
226    #
227    # Replace "...}" with "__dotdotdotNUM__}".  This construction should
228    # occur only at the end of enums; at the end of structs we have "...;}"
229    # and at the end of vararg functions "...);".  Also replace "=...[,}]"
230    # with ",__dotdotdotNUM__[,}]": this occurs in the enums too, when
231    # giving an unknown value.
232    matches = list(_r_partial_enum.finditer(csource))
233    for number, match in enumerate(reversed(matches)):
234        p = match.start()
235        if csource[p] == '=':
236            p2 = csource.find('...', p, match.end())
237            assert p2 > p
238            csource = '%s,__dotdotdot%d__ %s' % (csource[:p], number,
239                                                 csource[p2+3:])
240        else:
241            assert csource[p:p+3] == '...'
242            csource = '%s __dotdotdot%d__ %s' % (csource[:p], number,
243                                                 csource[p+3:])
244    # Replace "int ..." or "unsigned long int..." with "__dotdotdotint__"
245    csource = _r_int_dotdotdot.sub(' __dotdotdotint__ ', csource)
246    # Replace "float ..." or "double..." with "__dotdotdotfloat__"
247    csource = _r_float_dotdotdot.sub(' __dotdotdotfloat__ ', csource)
248    # Replace all remaining "..." with the same name, "__dotdotdot__",
249    # which is declared with a typedef for the purpose of C parsing.
250    csource = csource.replace('...', ' __dotdotdot__ ')
251    # Finally, put back the line directives
252    csource = _put_back_line_directives(csource, line_directives)
253    return csource, macros
254
255def _common_type_names(csource):
256    # Look in the source for what looks like usages of types from the
257    # list of common types.  A "usage" is approximated here as the
258    # appearance of the word, minus a "definition" of the type, which
259    # is the last word in a "typedef" statement.  Approximative only
260    # but should be fine for all the common types.
261    look_for_words = set(COMMON_TYPES)
262    look_for_words.add(';')
263    look_for_words.add(',')
264    look_for_words.add('(')
265    look_for_words.add(')')
266    look_for_words.add('typedef')
267    words_used = set()
268    is_typedef = False
269    paren = 0
270    previous_word = ''
271    for word in _r_words.findall(csource):
272        if word in look_for_words:
273            if word == ';':
274                if is_typedef:
275                    words_used.discard(previous_word)
276                    look_for_words.discard(previous_word)
277                    is_typedef = False
278            elif word == 'typedef':
279                is_typedef = True
280                paren = 0
281            elif word == '(':
282                paren += 1
283            elif word == ')':
284                paren -= 1
285            elif word == ',':
286                if is_typedef and paren == 0:
287                    words_used.discard(previous_word)
288                    look_for_words.discard(previous_word)
289            else:   # word in COMMON_TYPES
290                words_used.add(word)
291        previous_word = word
292    return words_used
293
294
295class Parser(object):
296
297    def __init__(self):
298        self._declarations = {}
299        self._included_declarations = set()
300        self._anonymous_counter = 0
301        self._structnode2type = weakref.WeakKeyDictionary()
302        self._options = {}
303        self._int_constants = {}
304        self._recomplete = []
305        self._uses_new_feature = None
306
307    def _parse(self, csource):
308        csource, macros = _preprocess(csource)
309        # XXX: for more efficiency we would need to poke into the
310        # internals of CParser...  the following registers the
311        # typedefs, because their presence or absence influences the
312        # parsing itself (but what they are typedef'ed to plays no role)
313        ctn = _common_type_names(csource)
314        typenames = []
315        for name in sorted(self._declarations):
316            if name.startswith('typedef '):
317                name = name[8:]
318                typenames.append(name)
319                ctn.discard(name)
320        typenames += sorted(ctn)
321        #
322        csourcelines = []
323        csourcelines.append('# 1 "<cdef automatic initialization code>"')
324        for typename in typenames:
325            csourcelines.append('typedef int %s;' % typename)
326        csourcelines.append('typedef int __dotdotdotint__, __dotdotdotfloat__,'
327                            ' __dotdotdot__;')
328        # this forces pycparser to consider the following in the file
329        # called <cdef source string> from line 1
330        csourcelines.append('# 1 "%s"' % (CDEF_SOURCE_STRING,))
331        csourcelines.append(csource)
332        fullcsource = '\n'.join(csourcelines)
333        if lock is not None:
334            lock.acquire()     # pycparser is not thread-safe...
335        try:
336            ast = _get_parser().parse(fullcsource)
337        except pycparser.c_parser.ParseError as e:
338            self.convert_pycparser_error(e, csource)
339        finally:
340            if lock is not None:
341                lock.release()
342        # csource will be used to find buggy source text
343        return ast, macros, csource
344
345    def _convert_pycparser_error(self, e, csource):
346        # xxx look for "<cdef source string>:NUM:" at the start of str(e)
347        # and interpret that as a line number.  This will not work if
348        # the user gives explicit ``# NUM "FILE"`` directives.
349        line = None
350        msg = str(e)
351        match = re.match(r"%s:(\d+):" % (CDEF_SOURCE_STRING,), msg)
352        if match:
353            linenum = int(match.group(1), 10)
354            csourcelines = csource.splitlines()
355            if 1 <= linenum <= len(csourcelines):
356                line = csourcelines[linenum-1]
357        return line
358
359    def convert_pycparser_error(self, e, csource):
360        line = self._convert_pycparser_error(e, csource)
361
362        msg = str(e)
363        if line:
364            msg = 'cannot parse "%s"\n%s' % (line.strip(), msg)
365        else:
366            msg = 'parse error\n%s' % (msg,)
367        raise CDefError(msg)
368
369    def parse(self, csource, override=False, packed=False, pack=None,
370                    dllexport=False):
371        if packed:
372            if packed != True:
373                raise ValueError("'packed' should be False or True; use "
374                                 "'pack' to give another value")
375            if pack:
376                raise ValueError("cannot give both 'pack' and 'packed'")
377            pack = 1
378        elif pack:
379            if pack & (pack - 1):
380                raise ValueError("'pack' must be a power of two, not %r" %
381                    (pack,))
382        else:
383            pack = 0
384        prev_options = self._options
385        try:
386            self._options = {'override': override,
387                             'packed': pack,
388                             'dllexport': dllexport}
389            self._internal_parse(csource)
390        finally:
391            self._options = prev_options
392
393    def _internal_parse(self, csource):
394        ast, macros, csource = self._parse(csource)
395        # add the macros
396        self._process_macros(macros)
397        # find the first "__dotdotdot__" and use that as a separator
398        # between the repeated typedefs and the real csource
399        iterator = iter(ast.ext)
400        for decl in iterator:
401            if decl.name == '__dotdotdot__':
402                break
403        else:
404            assert 0
405        current_decl = None
406        #
407        try:
408            self._inside_extern_python = '__cffi_extern_python_stop'
409            for decl in iterator:
410                current_decl = decl
411                if isinstance(decl, pycparser.c_ast.Decl):
412                    self._parse_decl(decl)
413                elif isinstance(decl, pycparser.c_ast.Typedef):
414                    if not decl.name:
415                        raise CDefError("typedef does not declare any name",
416                                        decl)
417                    quals = 0
418                    if (isinstance(decl.type.type, pycparser.c_ast.IdentifierType) and
419                            decl.type.type.names[-1].startswith('__dotdotdot')):
420                        realtype = self._get_unknown_type(decl)
421                    elif (isinstance(decl.type, pycparser.c_ast.PtrDecl) and
422                          isinstance(decl.type.type, pycparser.c_ast.TypeDecl) and
423                          isinstance(decl.type.type.type,
424                                     pycparser.c_ast.IdentifierType) and
425                          decl.type.type.type.names[-1].startswith('__dotdotdot')):
426                        realtype = self._get_unknown_ptr_type(decl)
427                    else:
428                        realtype, quals = self._get_type_and_quals(
429                            decl.type, name=decl.name, partial_length_ok=True,
430                            typedef_example="*(%s *)0" % (decl.name,))
431                    self._declare('typedef ' + decl.name, realtype, quals=quals)
432                elif decl.__class__.__name__ == 'Pragma':
433                    pass    # skip pragma, only in pycparser 2.15
434                else:
435                    raise CDefError("unexpected <%s>: this construct is valid "
436                                    "C but not valid in cdef()" %
437                                    decl.__class__.__name__, decl)
438        except CDefError as e:
439            if len(e.args) == 1:
440                e.args = e.args + (current_decl,)
441            raise
442        except FFIError as e:
443            msg = self._convert_pycparser_error(e, csource)
444            if msg:
445                e.args = (e.args[0] + "\n    *** Err: %s" % msg,)
446            raise
447
448    def _add_constants(self, key, val):
449        if key in self._int_constants:
450            if self._int_constants[key] == val:
451                return     # ignore identical double declarations
452            raise FFIError(
453                "multiple declarations of constant: %s" % (key,))
454        self._int_constants[key] = val
455
456    def _add_integer_constant(self, name, int_str):
457        int_str = int_str.lower().rstrip("ul")
458        neg = int_str.startswith('-')
459        if neg:
460            int_str = int_str[1:]
461        # "010" is not valid oct in py3
462        if (int_str.startswith("0") and int_str != '0'
463                and not int_str.startswith("0x")):
464            int_str = "0o" + int_str[1:]
465        pyvalue = int(int_str, 0)
466        if neg:
467            pyvalue = -pyvalue
468        self._add_constants(name, pyvalue)
469        self._declare('macro ' + name, pyvalue)
470
471    def _process_macros(self, macros):
472        for key, value in macros.items():
473            value = value.strip()
474            if _r_int_literal.match(value):
475                self._add_integer_constant(key, value)
476            elif value == '...':
477                self._declare('macro ' + key, value)
478            else:
479                raise CDefError(
480                    'only supports one of the following syntax:\n'
481                    '  #define %s ...     (literally dot-dot-dot)\n'
482                    '  #define %s NUMBER  (with NUMBER an integer'
483                                    ' constant, decimal/hex/octal)\n'
484                    'got:\n'
485                    '  #define %s %s'
486                    % (key, key, key, value))
487
488    def _declare_function(self, tp, quals, decl):
489        tp = self._get_type_pointer(tp, quals)
490        if self._options.get('dllexport'):
491            tag = 'dllexport_python '
492        elif self._inside_extern_python == '__cffi_extern_python_start':
493            tag = 'extern_python '
494        elif self._inside_extern_python == '__cffi_extern_python_plus_c_start':
495            tag = 'extern_python_plus_c '
496        else:
497            tag = 'function '
498        self._declare(tag + decl.name, tp)
499
500    def _parse_decl(self, decl):
501        node = decl.type
502        if isinstance(node, pycparser.c_ast.FuncDecl):
503            tp, quals = self._get_type_and_quals(node, name=decl.name)
504            assert isinstance(tp, model.RawFunctionType)
505            self._declare_function(tp, quals, decl)
506        else:
507            if isinstance(node, pycparser.c_ast.Struct):
508                self._get_struct_union_enum_type('struct', node)
509            elif isinstance(node, pycparser.c_ast.Union):
510                self._get_struct_union_enum_type('union', node)
511            elif isinstance(node, pycparser.c_ast.Enum):
512                self._get_struct_union_enum_type('enum', node)
513            elif not decl.name:
514                raise CDefError("construct does not declare any variable",
515                                decl)
516            #
517            if decl.name:
518                tp, quals = self._get_type_and_quals(node,
519                                                     partial_length_ok=True)
520                if tp.is_raw_function:
521                    self._declare_function(tp, quals, decl)
522                elif (tp.is_integer_type() and
523                        hasattr(decl, 'init') and
524                        hasattr(decl.init, 'value') and
525                        _r_int_literal.match(decl.init.value)):
526                    self._add_integer_constant(decl.name, decl.init.value)
527                elif (tp.is_integer_type() and
528                        isinstance(decl.init, pycparser.c_ast.UnaryOp) and
529                        decl.init.op == '-' and
530                        hasattr(decl.init.expr, 'value') and
531                        _r_int_literal.match(decl.init.expr.value)):
532                    self._add_integer_constant(decl.name,
533                                               '-' + decl.init.expr.value)
534                elif (tp is model.void_type and
535                      decl.name.startswith('__cffi_extern_python_')):
536                    # hack: `extern "Python"` in the C source is replaced
537                    # with "void __cffi_extern_python_start;" and
538                    # "void __cffi_extern_python_stop;"
539                    self._inside_extern_python = decl.name
540                else:
541                    if self._inside_extern_python !='__cffi_extern_python_stop':
542                        raise CDefError(
543                            "cannot declare constants or "
544                            "variables with 'extern \"Python\"'")
545                    if (quals & model.Q_CONST) and not tp.is_array_type:
546                        self._declare('constant ' + decl.name, tp, quals=quals)
547                    else:
548                        _warn_for_non_extern_non_static_global_variable(decl)
549                        self._declare('variable ' + decl.name, tp, quals=quals)
550
551    def parse_type(self, cdecl):
552        return self.parse_type_and_quals(cdecl)[0]
553
554    def parse_type_and_quals(self, cdecl):
555        ast, macros = self._parse('void __dummy(\n%s\n);' % cdecl)[:2]
556        assert not macros
557        exprnode = ast.ext[-1].type.args.params[0]
558        if isinstance(exprnode, pycparser.c_ast.ID):
559            raise CDefError("unknown identifier '%s'" % (exprnode.name,))
560        return self._get_type_and_quals(exprnode.type)
561
562    def _declare(self, name, obj, included=False, quals=0):
563        if name in self._declarations:
564            prevobj, prevquals = self._declarations[name]
565            if prevobj is obj and prevquals == quals:
566                return
567            if not self._options.get('override'):
568                raise FFIError(
569                    "multiple declarations of %s (for interactive usage, "
570                    "try cdef(xx, override=True))" % (name,))
571        assert '__dotdotdot__' not in name.split()
572        self._declarations[name] = (obj, quals)
573        if included:
574            self._included_declarations.add(obj)
575
576    def _extract_quals(self, type):
577        quals = 0
578        if isinstance(type, (pycparser.c_ast.TypeDecl,
579                             pycparser.c_ast.PtrDecl)):
580            if 'const' in type.quals:
581                quals |= model.Q_CONST
582            if 'volatile' in type.quals:
583                quals |= model.Q_VOLATILE
584            if 'restrict' in type.quals:
585                quals |= model.Q_RESTRICT
586        return quals
587
588    def _get_type_pointer(self, type, quals, declname=None):
589        if isinstance(type, model.RawFunctionType):
590            return type.as_function_pointer()
591        if (isinstance(type, model.StructOrUnionOrEnum) and
592                type.name.startswith('$') and type.name[1:].isdigit() and
593                type.forcename is None and declname is not None):
594            return model.NamedPointerType(type, declname, quals)
595        return model.PointerType(type, quals)
596
597    def _get_type_and_quals(self, typenode, name=None, partial_length_ok=False,
598                            typedef_example=None):
599        # first, dereference typedefs, if we have it already parsed, we're good
600        if (isinstance(typenode, pycparser.c_ast.TypeDecl) and
601            isinstance(typenode.type, pycparser.c_ast.IdentifierType) and
602            len(typenode.type.names) == 1 and
603            ('typedef ' + typenode.type.names[0]) in self._declarations):
604            tp, quals = self._declarations['typedef ' + typenode.type.names[0]]
605            quals |= self._extract_quals(typenode)
606            return tp, quals
607        #
608        if isinstance(typenode, pycparser.c_ast.ArrayDecl):
609            # array type
610            if typenode.dim is None:
611                length = None
612            else:
613                length = self._parse_constant(
614                    typenode.dim, partial_length_ok=partial_length_ok)
615            # a hack: in 'typedef int foo_t[...][...];', don't use '...' as
616            # the length but use directly the C expression that would be
617            # generated by recompiler.py.  This lets the typedef be used in
618            # many more places within recompiler.py
619            if typedef_example is not None:
620                if length == '...':
621                    length = '_cffi_array_len(%s)' % (typedef_example,)
622                typedef_example = "*" + typedef_example
623            #
624            tp, quals = self._get_type_and_quals(typenode.type,
625                                partial_length_ok=partial_length_ok,
626                                typedef_example=typedef_example)
627            return model.ArrayType(tp, length), quals
628        #
629        if isinstance(typenode, pycparser.c_ast.PtrDecl):
630            # pointer type
631            itemtype, itemquals = self._get_type_and_quals(typenode.type)
632            tp = self._get_type_pointer(itemtype, itemquals, declname=name)
633            quals = self._extract_quals(typenode)
634            return tp, quals
635        #
636        if isinstance(typenode, pycparser.c_ast.TypeDecl):
637            quals = self._extract_quals(typenode)
638            type = typenode.type
639            if isinstance(type, pycparser.c_ast.IdentifierType):
640                # assume a primitive type.  get it from .names, but reduce
641                # synonyms to a single chosen combination
642                names = list(type.names)
643                if names != ['signed', 'char']:    # keep this unmodified
644                    prefixes = {}
645                    while names:
646                        name = names[0]
647                        if name in ('short', 'long', 'signed', 'unsigned'):
648                            prefixes[name] = prefixes.get(name, 0) + 1
649                            del names[0]
650                        else:
651                            break
652                    # ignore the 'signed' prefix below, and reorder the others
653                    newnames = []
654                    for prefix in ('unsigned', 'short', 'long'):
655                        for i in range(prefixes.get(prefix, 0)):
656                            newnames.append(prefix)
657                    if not names:
658                        names = ['int']    # implicitly
659                    if names == ['int']:   # but kill it if 'short' or 'long'
660                        if 'short' in prefixes or 'long' in prefixes:
661                            names = []
662                    names = newnames + names
663                ident = ' '.join(names)
664                if ident == 'void':
665                    return model.void_type, quals
666                if ident == '__dotdotdot__':
667                    raise FFIError(':%d: bad usage of "..."' %
668                            typenode.coord.line)
669                tp0, quals0 = resolve_common_type(self, ident)
670                return tp0, (quals | quals0)
671            #
672            if isinstance(type, pycparser.c_ast.Struct):
673                # 'struct foobar'
674                tp = self._get_struct_union_enum_type('struct', type, name)
675                return tp, quals
676            #
677            if isinstance(type, pycparser.c_ast.Union):
678                # 'union foobar'
679                tp = self._get_struct_union_enum_type('union', type, name)
680                return tp, quals
681            #
682            if isinstance(type, pycparser.c_ast.Enum):
683                # 'enum foobar'
684                tp = self._get_struct_union_enum_type('enum', type, name)
685                return tp, quals
686        #
687        if isinstance(typenode, pycparser.c_ast.FuncDecl):
688            # a function type
689            return self._parse_function_type(typenode, name), 0
690        #
691        # nested anonymous structs or unions end up here
692        if isinstance(typenode, pycparser.c_ast.Struct):
693            return self._get_struct_union_enum_type('struct', typenode, name,
694                                                    nested=True), 0
695        if isinstance(typenode, pycparser.c_ast.Union):
696            return self._get_struct_union_enum_type('union', typenode, name,
697                                                    nested=True), 0
698        #
699        raise FFIError(":%d: bad or unsupported type declaration" %
700                typenode.coord.line)
701
702    def _parse_function_type(self, typenode, funcname=None):
703        params = list(getattr(typenode.args, 'params', []))
704        for i, arg in enumerate(params):
705            if not hasattr(arg, 'type'):
706                raise CDefError("%s arg %d: unknown type '%s'"
707                    " (if you meant to use the old C syntax of giving"
708                    " untyped arguments, it is not supported)"
709                    % (funcname or 'in expression', i + 1,
710                       getattr(arg, 'name', '?')))
711        ellipsis = (
712            len(params) > 0 and
713            isinstance(params[-1].type, pycparser.c_ast.TypeDecl) and
714            isinstance(params[-1].type.type,
715                       pycparser.c_ast.IdentifierType) and
716            params[-1].type.type.names == ['__dotdotdot__'])
717        if ellipsis:
718            params.pop()
719            if not params:
720                raise CDefError(
721                    "%s: a function with only '(...)' as argument"
722                    " is not correct C" % (funcname or 'in expression'))
723        args = [self._as_func_arg(*self._get_type_and_quals(argdeclnode.type))
724                for argdeclnode in params]
725        if not ellipsis and args == [model.void_type]:
726            args = []
727        result, quals = self._get_type_and_quals(typenode.type)
728        # the 'quals' on the result type are ignored.  HACK: we absure them
729        # to detect __stdcall functions: we textually replace "__stdcall"
730        # with "volatile volatile const" above.
731        abi = None
732        if hasattr(typenode.type, 'quals'): # else, probable syntax error anyway
733            if typenode.type.quals[-3:] == ['volatile', 'volatile', 'const']:
734                abi = '__stdcall'
735        return model.RawFunctionType(tuple(args), result, ellipsis, abi)
736
737    def _as_func_arg(self, type, quals):
738        if isinstance(type, model.ArrayType):
739            return model.PointerType(type.item, quals)
740        elif isinstance(type, model.RawFunctionType):
741            return type.as_function_pointer()
742        else:
743            return type
744
745    def _get_struct_union_enum_type(self, kind, type, name=None, nested=False):
746        # First, a level of caching on the exact 'type' node of the AST.
747        # This is obscure, but needed because pycparser "unrolls" declarations
748        # such as "typedef struct { } foo_t, *foo_p" and we end up with
749        # an AST that is not a tree, but a DAG, with the "type" node of the
750        # two branches foo_t and foo_p of the trees being the same node.
751        # It's a bit silly but detecting "DAG-ness" in the AST tree seems
752        # to be the only way to distinguish this case from two independent
753        # structs.  See test_struct_with_two_usages.
754        try:
755            return self._structnode2type[type]
756        except KeyError:
757            pass
758        #
759        # Note that this must handle parsing "struct foo" any number of
760        # times and always return the same StructType object.  Additionally,
761        # one of these times (not necessarily the first), the fields of
762        # the struct can be specified with "struct foo { ...fields... }".
763        # If no name is given, then we have to create a new anonymous struct
764        # with no caching; in this case, the fields are either specified
765        # right now or never.
766        #
767        force_name = name
768        name = type.name
769        #
770        # get the type or create it if needed
771        if name is None:
772            # 'force_name' is used to guess a more readable name for
773            # anonymous structs, for the common case "typedef struct { } foo".
774            if force_name is not None:
775                explicit_name = '$%s' % force_name
776            else:
777                self._anonymous_counter += 1
778                explicit_name = '$%d' % self._anonymous_counter
779            tp = None
780        else:
781            explicit_name = name
782            key = '%s %s' % (kind, name)
783            tp, _ = self._declarations.get(key, (None, None))
784        #
785        if tp is None:
786            if kind == 'struct':
787                tp = model.StructType(explicit_name, None, None, None)
788            elif kind == 'union':
789                tp = model.UnionType(explicit_name, None, None, None)
790            elif kind == 'enum':
791                if explicit_name == '__dotdotdot__':
792                    raise CDefError("Enums cannot be declared with ...")
793                tp = self._build_enum_type(explicit_name, type.values)
794            else:
795                raise AssertionError("kind = %r" % (kind,))
796            if name is not None:
797                self._declare(key, tp)
798        else:
799            if kind == 'enum' and type.values is not None:
800                raise NotImplementedError(
801                    "enum %s: the '{}' declaration should appear on the first "
802                    "time the enum is mentioned, not later" % explicit_name)
803        if not tp.forcename:
804            tp.force_the_name(force_name)
805        if tp.forcename and '$' in tp.name:
806            self._declare('anonymous %s' % tp.forcename, tp)
807        #
808        self._structnode2type[type] = tp
809        #
810        # enums: done here
811        if kind == 'enum':
812            return tp
813        #
814        # is there a 'type.decls'?  If yes, then this is the place in the
815        # C sources that declare the fields.  If no, then just return the
816        # existing type, possibly still incomplete.
817        if type.decls is None:
818            return tp
819        #
820        if tp.fldnames is not None:
821            raise CDefError("duplicate declaration of struct %s" % name)
822        fldnames = []
823        fldtypes = []
824        fldbitsize = []
825        fldquals = []
826        for decl in type.decls:
827            if (isinstance(decl.type, pycparser.c_ast.IdentifierType) and
828                    ''.join(decl.type.names) == '__dotdotdot__'):
829                # XXX pycparser is inconsistent: 'names' should be a list
830                # of strings, but is sometimes just one string.  Use
831                # str.join() as a way to cope with both.
832                self._make_partial(tp, nested)
833                continue
834            if decl.bitsize is None:
835                bitsize = -1
836            else:
837                bitsize = self._parse_constant(decl.bitsize)
838            self._partial_length = False
839            type, fqual = self._get_type_and_quals(decl.type,
840                                                   partial_length_ok=True)
841            if self._partial_length:
842                self._make_partial(tp, nested)
843            if isinstance(type, model.StructType) and type.partial:
844                self._make_partial(tp, nested)
845            fldnames.append(decl.name or '')
846            fldtypes.append(type)
847            fldbitsize.append(bitsize)
848            fldquals.append(fqual)
849        tp.fldnames = tuple(fldnames)
850        tp.fldtypes = tuple(fldtypes)
851        tp.fldbitsize = tuple(fldbitsize)
852        tp.fldquals = tuple(fldquals)
853        if fldbitsize != [-1] * len(fldbitsize):
854            if isinstance(tp, model.StructType) and tp.partial:
855                raise NotImplementedError("%s: using both bitfields and '...;'"
856                                          % (tp,))
857        tp.packed = self._options.get('packed')
858        if tp.completed:    # must be re-completed: it is not opaque any more
859            tp.completed = 0
860            self._recomplete.append(tp)
861        return tp
862
863    def _make_partial(self, tp, nested):
864        if not isinstance(tp, model.StructOrUnion):
865            raise CDefError("%s cannot be partial" % (tp,))
866        if not tp.has_c_name() and not nested:
867            raise NotImplementedError("%s is partial but has no C name" %(tp,))
868        tp.partial = True
869
870    def _parse_constant(self, exprnode, partial_length_ok=False):
871        # for now, limited to expressions that are an immediate number
872        # or positive/negative number
873        if isinstance(exprnode, pycparser.c_ast.Constant):
874            s = exprnode.value
875            if '0' <= s[0] <= '9':
876                s = s.rstrip('uUlL')
877                try:
878                    if s.startswith('0'):
879                        return int(s, 8)
880                    else:
881                        return int(s, 10)
882                except ValueError:
883                    if len(s) > 1:
884                        if s.lower()[0:2] == '0x':
885                            return int(s, 16)
886                        elif s.lower()[0:2] == '0b':
887                            return int(s, 2)
888                raise CDefError("invalid constant %r" % (s,))
889            elif s[0] == "'" and s[-1] == "'" and (
890                    len(s) == 3 or (len(s) == 4 and s[1] == "\\")):
891                return ord(s[-2])
892            else:
893                raise CDefError("invalid constant %r" % (s,))
894        #
895        if (isinstance(exprnode, pycparser.c_ast.UnaryOp) and
896                exprnode.op == '+'):
897            return self._parse_constant(exprnode.expr)
898        #
899        if (isinstance(exprnode, pycparser.c_ast.UnaryOp) and
900                exprnode.op == '-'):
901            return -self._parse_constant(exprnode.expr)
902        # load previously defined int constant
903        if (isinstance(exprnode, pycparser.c_ast.ID) and
904                exprnode.name in self._int_constants):
905            return self._int_constants[exprnode.name]
906        #
907        if (isinstance(exprnode, pycparser.c_ast.ID) and
908                    exprnode.name == '__dotdotdotarray__'):
909            if partial_length_ok:
910                self._partial_length = True
911                return '...'
912            raise FFIError(":%d: unsupported '[...]' here, cannot derive "
913                           "the actual array length in this context"
914                           % exprnode.coord.line)
915        #
916        if isinstance(exprnode, pycparser.c_ast.BinaryOp):
917            left = self._parse_constant(exprnode.left)
918            right = self._parse_constant(exprnode.right)
919            if exprnode.op == '+':
920                return left + right
921            elif exprnode.op == '-':
922                return left - right
923            elif exprnode.op == '*':
924                return left * right
925            elif exprnode.op == '/':
926                return self._c_div(left, right)
927            elif exprnode.op == '%':
928                return left - self._c_div(left, right) * right
929            elif exprnode.op == '<<':
930                return left << right
931            elif exprnode.op == '>>':
932                return left >> right
933            elif exprnode.op == '&':
934                return left & right
935            elif exprnode.op == '|':
936                return left | right
937            elif exprnode.op == '^':
938                return left ^ right
939        #
940        raise FFIError(":%d: unsupported expression: expected a "
941                       "simple numeric constant" % exprnode.coord.line)
942
943    def _c_div(self, a, b):
944        result = a // b
945        if ((a < 0) ^ (b < 0)) and (a % b) != 0:
946            result += 1
947        return result
948
949    def _build_enum_type(self, explicit_name, decls):
950        if decls is not None:
951            partial = False
952            enumerators = []
953            enumvalues = []
954            nextenumvalue = 0
955            for enum in decls.enumerators:
956                if _r_enum_dotdotdot.match(enum.name):
957                    partial = True
958                    continue
959                if enum.value is not None:
960                    nextenumvalue = self._parse_constant(enum.value)
961                enumerators.append(enum.name)
962                enumvalues.append(nextenumvalue)
963                self._add_constants(enum.name, nextenumvalue)
964                nextenumvalue += 1
965            enumerators = tuple(enumerators)
966            enumvalues = tuple(enumvalues)
967            tp = model.EnumType(explicit_name, enumerators, enumvalues)
968            tp.partial = partial
969        else:   # opaque enum
970            tp = model.EnumType(explicit_name, (), ())
971        return tp
972
973    def include(self, other):
974        for name, (tp, quals) in other._declarations.items():
975            if name.startswith('anonymous $enum_$'):
976                continue   # fix for test_anonymous_enum_include
977            kind = name.split(' ', 1)[0]
978            if kind in ('struct', 'union', 'enum', 'anonymous', 'typedef'):
979                self._declare(name, tp, included=True, quals=quals)
980        for k, v in other._int_constants.items():
981            self._add_constants(k, v)
982
983    def _get_unknown_type(self, decl):
984        typenames = decl.type.type.names
985        if typenames == ['__dotdotdot__']:
986            return model.unknown_type(decl.name)
987
988        if typenames == ['__dotdotdotint__']:
989            if self._uses_new_feature is None:
990                self._uses_new_feature = "'typedef int... %s'" % decl.name
991            return model.UnknownIntegerType(decl.name)
992
993        if typenames == ['__dotdotdotfloat__']:
994            # note: not for 'long double' so far
995            if self._uses_new_feature is None:
996                self._uses_new_feature = "'typedef float... %s'" % decl.name
997            return model.UnknownFloatType(decl.name)
998
999        raise FFIError(':%d: unsupported usage of "..." in typedef'
1000                       % decl.coord.line)
1001
1002    def _get_unknown_ptr_type(self, decl):
1003        if decl.type.type.type.names == ['__dotdotdot__']:
1004            return model.unknown_ptr_type(decl.name)
1005        raise FFIError(':%d: unsupported usage of "..." in typedef'
1006                       % decl.coord.line)
1007