• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/env python3
2#
3# This is the API builder, it parses the C sources and build the
4# API formal description in XML.
5#
6# See Copyright for the status of this software.
7#
8# daniel@veillard.com
9#
10import os, sys
11import string
12import glob
13
14debug=0
15#debugsym='ignorableWhitespaceSAXFunc'
16debugsym=None
17
18#
19# C parser analysis code
20#
21ignored_files = {
22  "config.h": "generated portability layer",
23  "libxml.h": "internal only",
24  "legacy.c": "legacy code",
25  "testModule.c": "test tool",
26  "testapi.c": "generated regression tests",
27  "runtest.c": "regression tests program",
28  "runsuite.c": "regression tests program",
29  "tst.c": "not part of the library",
30  "test.c": "not part of the library",
31  "testdso.c": "test for dynamid shared libraries",
32  "testrecurse.c": "test for entities recursions",
33  "timsort.h": "Internal header only for xpath.c 2.9.0",
34  "nanoftp.h": "empty",
35  "SAX.h": "empty",
36}
37
38ignored_words = {
39  "WINAPI": (0, "Windows keyword"),
40  "LIBXML_DLL_IMPORT": (0, "Special macro to flag external keywords"),
41  "XMLPUBVAR": (0, "Special macro for extern vars for win32"),
42  "XSLTPUBVAR": (0, "Special macro for extern vars for win32"),
43  "EXSLTPUBVAR": (0, "Special macro for extern vars for win32"),
44  "XMLPUBFUN": (0, "Special macro for extern funcs for win32"),
45  "XSLTPUBFUN": (0, "Special macro for extern funcs for win32"),
46  "EXSLTPUBFUN": (0, "Special macro for extern funcs for win32"),
47  "XSLTCALL": (0, "Special macro for win32 calls"),
48  "EXSLTCALL": (0, "Special macro for win32 calls"),
49  "__declspec": (3, "Windows keyword"),
50  "__stdcall": (0, "Windows keyword"),
51  "ATTRIBUTE_UNUSED": (0, "macro keyword"),
52  "ATTRIBUTE_DESTRUCTOR": (0, "macro keyword"),
53  "LIBEXSLT_PUBLIC": (0, "macro keyword"),
54  "X_IN_Y": (5, "macro function builder"),
55  "ATTRIBUTE_ALLOC_SIZE": (3, "macro for gcc checking extension"),
56  "ATTRIBUTE_PRINTF": (5, "macro for gcc printf args checking extension"),
57  "LIBXML_ATTR_FORMAT": (5, "macro for gcc printf args checking extension"),
58  "LIBXML_ATTR_ALLOC_SIZE": (3, "macro for gcc checking extension"),
59  "ATTRIBUTE_NO_SANITIZE": (3, "macro keyword"),
60  "ATTRIBUTE_NO_SANITIZE_INTEGER": (0, "macro keyword"),
61  "ATTRIBUTE_COUNTED_BY": (3, "macro keyword"),
62  "XML_DEPRECATED": (0, "macro keyword"),
63  "XML_DEPRECATED_MEMBER": (0, "macro keyword"),
64  "XML_GLOBALS_ALLOC": (0, "macro keyword"),
65  "XML_GLOBALS_ERROR": (0, "macro keyword"),
66  "XML_GLOBALS_IO": (0, "macro keyword"),
67  "XML_GLOBALS_PARSER": (0, "macro keyword"),
68  "XML_GLOBALS_TREE": (0, "macro keyword"),
69  "XML_THREAD_LOCAL": (0, "macro keyword"),
70}
71
72def escape(raw):
73    raw = raw.replace('&', '&')
74    raw = raw.replace('<', '&lt;')
75    raw = raw.replace('>', '&gt;')
76    raw = raw.replace("'", '&apos;')
77    raw = raw.replace('"', '&quot;')
78    return raw
79
80class identifier:
81    def __init__(self, name, header=None, module=None, type=None, lineno = 0,
82                 info=None, extra=None, conditionals = None):
83        self.name = name
84        self.header = header
85        self.module = module
86        self.type = type
87        self.info = info
88        self.extra = extra
89        self.lineno = lineno
90        self.static = 0
91        if conditionals == None or len(conditionals) == 0:
92            self.conditionals = None
93        else:
94            self.conditionals = conditionals[:]
95        if self.name == debugsym:
96            print("=> define %s : %s" % (debugsym, (module, type, info,
97                                         extra, conditionals)))
98
99    def __repr__(self):
100        r = "%s %s:" % (self.type, self.name)
101        if self.static:
102            r = r + " static"
103        if self.module != None:
104            r = r + " from %s" % (self.module)
105        if self.info != None:
106            r = r + " " +  repr(self.info)
107        if self.extra != None:
108            r = r + " " + repr(self.extra)
109        if self.conditionals != None:
110            r = r + " " + repr(self.conditionals)
111        return r
112
113
114    def set_header(self, header):
115        self.header = header
116    def set_module(self, module):
117        self.module = module
118    def set_type(self, type):
119        self.type = type
120    def set_info(self, info):
121        self.info = info
122    def set_extra(self, extra):
123        self.extra = extra
124    def set_lineno(self, lineno):
125        self.lineno = lineno
126    def set_static(self, static):
127        self.static = static
128    def set_conditionals(self, conditionals):
129        if conditionals == None or len(conditionals) == 0:
130            self.conditionals = None
131        else:
132            self.conditionals = conditionals[:]
133
134    def get_name(self):
135        return self.name
136    def get_header(self):
137        return self.module
138    def get_module(self):
139        return self.module
140    def get_type(self):
141        return self.type
142    def get_info(self):
143        return self.info
144    def get_lineno(self):
145        return self.lineno
146    def get_extra(self):
147        return self.extra
148    def get_static(self):
149        return self.static
150    def get_conditionals(self):
151        return self.conditionals
152
153    def update(self, header, module, type = None, info = None, extra=None,
154               conditionals=None):
155        if self.name == debugsym:
156            print("=> update %s : %s" % (debugsym, (module, type, info,
157                                         extra, conditionals)))
158        if header != None and self.header == None:
159            self.set_header(module)
160        if module != None and (self.module == None or self.header == self.module):
161            self.set_module(module)
162        if type != None and self.type == None:
163            self.set_type(type)
164        if info != None:
165            self.set_info(info)
166        if extra != None:
167            self.set_extra(extra)
168        if conditionals != None:
169            self.set_conditionals(conditionals)
170
171class index:
172    def __init__(self, name = "noname"):
173        self.name = name
174        self.identifiers = {}
175        self.functions = {}
176        self.variables = {}
177        self.includes = {}
178        self.structs = {}
179        self.enums = {}
180        self.typedefs = {}
181        self.macros = {}
182        self.references = {}
183        self.info = {}
184
185    def add_ref(self, name, header, module, static, type, lineno, info=None, extra=None, conditionals = None):
186        if name[0:2] == '__':
187            return None
188        d = None
189        if name in self.identifiers:
190           d = self.identifiers[name]
191           d.update(header, module, type, info, extra, conditionals)
192        else:
193           d = identifier(name, header, module, type, lineno, info, extra, conditionals)
194           self.identifiers[name] = d
195
196        if d != None and static == 1:
197            d.set_static(1)
198
199        if d != None and name != None and type != None:
200            self.references[name] = d
201
202        if name == debugsym:
203            print("New ref: %s" % (d))
204
205        return d
206
207    def add(self, name, header, module, static, type, lineno, info=None, extra=None, conditionals = None):
208        if name[0:2] == '__':
209            return None
210        d = None
211        if name in self.identifiers:
212           d = self.identifiers[name]
213           d.update(header, module, type, info, extra, conditionals)
214        else:
215           d = identifier(name, header, module, type, lineno, info, extra, conditionals)
216           self.identifiers[name] = d
217
218        if d != None and static == 1:
219            d.set_static(1)
220
221        if d != None and name != None and type != None:
222            if type == "function":
223                self.functions[name] = d
224            elif type == "functype":
225                self.functions[name] = d
226            elif type == "variable":
227                self.variables[name] = d
228            elif type == "include":
229                self.includes[name] = d
230            elif type == "struct":
231                self.structs[name] = d
232            elif type == "enum":
233                self.enums[name] = d
234            elif type == "typedef":
235                self.typedefs[name] = d
236            elif type == "macro":
237                self.macros[name] = d
238            else:
239                print("Unable to register type ", type)
240
241        if name == debugsym:
242            print("New symbol: %s" % (d))
243
244        return d
245
246    def merge(self, idx):
247        for id in list(idx.functions.keys()):
248              #
249              # macro might be used to override functions or variables
250              # definitions
251              #
252             if id in self.macros:
253                 del self.macros[id]
254             if id in self.functions:
255                 print("function %s from %s redeclared in %s" % (
256                    id, self.functions[id].header, idx.functions[id].header))
257             else:
258                 self.functions[id] = idx.functions[id]
259                 self.identifiers[id] = idx.functions[id]
260        for id in list(idx.variables.keys()):
261              #
262              # macro might be used to override functions or variables
263              # definitions
264              #
265             if id in self.macros:
266                 del self.macros[id]
267             if id in self.variables:
268                 print("variable %s from %s redeclared in %s" % (
269                    id, self.variables[id].header, idx.variables[id].header))
270             else:
271                 self.variables[id] = idx.variables[id]
272                 self.identifiers[id] = idx.variables[id]
273        for id in list(idx.structs.keys()):
274             if id in self.structs:
275                 print("struct %s from %s redeclared in %s" % (
276                    id, self.structs[id].header, idx.structs[id].header))
277             else:
278                 self.structs[id] = idx.structs[id]
279                 self.identifiers[id] = idx.structs[id]
280        for id in list(idx.typedefs.keys()):
281             if id in self.typedefs:
282                 print("typedef %s from %s redeclared in %s" % (
283                    id, self.typedefs[id].header, idx.typedefs[id].header))
284             else:
285                 self.typedefs[id] = idx.typedefs[id]
286                 self.identifiers[id] = idx.typedefs[id]
287        for id in list(idx.macros.keys()):
288              #
289              # macro might be used to override functions or variables
290              # definitions
291              #
292             if id in self.variables:
293                 continue
294             if id in self.functions:
295                 continue
296             if id in self.enums:
297                 continue
298             if id in self.macros and id != 'XML_OP':
299                 print("macro %s from %s redeclared in %s" % (
300                    id, self.macros[id].header, idx.macros[id].header))
301             else:
302                 self.macros[id] = idx.macros[id]
303                 self.identifiers[id] = idx.macros[id]
304        for id in list(idx.enums.keys()):
305             if id in self.enums:
306                 print("enum %s from %s redeclared in %s" % (
307                    id, self.enums[id].header, idx.enums[id].header))
308             else:
309                 self.enums[id] = idx.enums[id]
310                 self.identifiers[id] = idx.enums[id]
311
312    def merge_public(self, idx):
313        for id in list(idx.functions.keys()):
314             if id in self.functions:
315                 # check that function condition agrees with header
316                 if idx.functions[id].conditionals != \
317                    self.functions[id].conditionals:
318                     print("Header condition differs from Function for %s:" \
319                        % id)
320                     print("  H: %s" % self.functions[id].conditionals)
321                     print("  C: %s" % idx.functions[id].conditionals)
322                 up = idx.functions[id]
323                 self.functions[id].update(None, up.module, up.type, up.info, up.extra)
324         #     else:
325         #         print "Function %s from %s is not declared in headers" % (
326         #                id, idx.functions[id].module)
327
328        for id in list(idx.variables.keys()):
329            if id in self.variables:
330                # check that variable condition agrees with header
331                # TODO: produces many false positives
332                #if idx.variables[id].conditionals != \
333                #   self.variables[id].conditionals:
334                #    print("Header condition differs from Variable for %s:" \
335                #       % id)
336                #    print("  H: %s" % self.variables[id].conditionals)
337                #    print("  C: %s" % idx.variables[id].conditionals)
338                up = idx.variables[id]
339                self.variables[id].update(None, up.module, up.type, up.info, up.extra)
340
341    def analyze_dict(self, type, dict):
342        count = 0
343        public = 0
344        for name in list(dict.keys()):
345            id = dict[name]
346            count = count + 1
347            if id.static == 0:
348                public = public + 1
349        if count != public:
350            print("  %d %s , %d public" % (count, type, public))
351        elif count != 0:
352            print("  %d public %s" % (count, type))
353
354
355    def analyze(self):
356        self.analyze_dict("functions", self.functions)
357        self.analyze_dict("variables", self.variables)
358        self.analyze_dict("structs", self.structs)
359        self.analyze_dict("typedefs", self.typedefs)
360        self.analyze_dict("macros", self.macros)
361
362class CLexer:
363    """A lexer for the C language, tokenize the input by reading and
364       analyzing it line by line"""
365    def __init__(self, input):
366        self.input = input
367        self.tokens = []
368        self.line = ""
369        self.lineno = 0
370
371    def getline(self):
372        line = ''
373        while line == '':
374            line = self.input.readline()
375            if not line:
376                return None
377            self.lineno = self.lineno + 1
378            line = line.lstrip()
379            line = line.rstrip()
380            if line == '':
381                continue
382            while line[-1] == '\\':
383                line = line[:-1]
384                n = self.input.readline()
385                self.lineno = self.lineno + 1
386                n = n.lstrip()
387                n = n.rstrip()
388                if not n:
389                    break
390                else:
391                    line = line + n
392        return line
393
394    def getlineno(self):
395        return self.lineno
396
397    def push(self, token):
398        self.tokens.insert(0, token);
399
400    def debug(self):
401        print("Last token: ", self.last)
402        print("Token queue: ", self.tokens)
403        print("Line %d end: " % (self.lineno), self.line)
404
405    def token(self):
406        while self.tokens == []:
407            if self.line == "":
408                line = self.getline()
409            else:
410                line = self.line
411                self.line = ""
412            if line == None:
413                return None
414
415            if line[0] == '#':
416                self.tokens = list(map((lambda x: ('preproc', x)),
417                                  line.split()))
418                break;
419            l = len(line)
420            if line[0] == '"' or line[0] == "'":
421                end = line[0]
422                line = line[1:]
423                found = 0
424                tok = ""
425                while found == 0:
426                    i = 0
427                    l = len(line)
428                    while i < l:
429                        if line[i] == end:
430                            self.line = line[i+1:]
431                            line = line[:i]
432                            l = i
433                            found = 1
434                            break
435                        if line[i] == '\\':
436                            i = i + 1
437                        i = i + 1
438                    tok = tok + line
439                    if found == 0:
440                        line = self.getline()
441                        if line == None:
442                            return None
443                self.last = ('string', tok)
444                return self.last
445
446            if l >= 2 and line[0] == '/' and line[1] == '*':
447                line = line[2:]
448                found = 0
449                tok = ""
450                while found == 0:
451                    i = 0
452                    l = len(line)
453                    while i < l:
454                        if line[i] == '*' and i+1 < l and line[i+1] == '/':
455                            self.line = line[i+2:]
456                            line = line[:i-1]
457                            l = i
458                            found = 1
459                            break
460                        i = i + 1
461                    if tok != "":
462                        tok = tok + "\n"
463                    tok = tok + line
464                    if found == 0:
465                        line = self.getline()
466                        if line == None:
467                            return None
468                self.last = ('comment', tok)
469                return self.last
470            if l >= 2 and line[0] == '/' and line[1] == '/':
471                line = line[2:]
472                self.last = ('comment', line)
473                return self.last
474            i = 0
475            while i < l:
476                if line[i] == '/' and i+1 < l and line[i+1] == '/':
477                    self.line = line[i:]
478                    line = line[:i]
479                    break
480                if line[i] == '/' and i+1 < l and line[i+1] == '*':
481                    self.line = line[i:]
482                    line = line[:i]
483                    break
484                if line[i] == '"' or line[i] == "'":
485                    self.line = line[i:]
486                    line = line[:i]
487                    break
488                i = i + 1
489            l = len(line)
490            i = 0
491            while i < l:
492                if line[i] == ' ' or line[i] == '\t':
493                    i = i + 1
494                    continue
495                o = ord(line[i])
496                if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \
497                   (o >= 48 and o <= 57):
498                    s = i
499                    while i < l:
500                        o = ord(line[i])
501                        if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \
502                           (o >= 48 and o <= 57) or \
503			   (" \t(){}:;,+-*/%&!|[]=><".find(line[i])) == -1:
504                            i = i + 1
505                        else:
506                            break
507                    self.tokens.append(('name', line[s:i]))
508                    continue
509                if "(){}:;,[]".find(line[i]) != -1:
510#                 if line[i] == '(' or line[i] == ')' or line[i] == '{' or \
511#                    line[i] == '}' or line[i] == ':' or line[i] == ';' or \
512#                    line[i] == ',' or line[i] == '[' or line[i] == ']':
513                    self.tokens.append(('sep', line[i]))
514                    i = i + 1
515                    continue
516                if "+-*><=/%&!|.".find(line[i]) != -1:
517#                 if line[i] == '+' or line[i] == '-' or line[i] == '*' or \
518#                    line[i] == '>' or line[i] == '<' or line[i] == '=' or \
519#                    line[i] == '/' or line[i] == '%' or line[i] == '&' or \
520#                    line[i] == '!' or line[i] == '|' or line[i] == '.':
521                    if line[i] == '.' and  i + 2 < l and \
522                       line[i+1] == '.' and line[i+2] == '.':
523                        self.tokens.append(('name', '...'))
524                        i = i + 3
525                        continue
526
527                    j = i + 1
528                    if j < l and (
529                       "+-*><=/%&!|".find(line[j]) != -1):
530#                        line[j] == '+' or line[j] == '-' or line[j] == '*' or \
531#                        line[j] == '>' or line[j] == '<' or line[j] == '=' or \
532#                        line[j] == '/' or line[j] == '%' or line[j] == '&' or \
533#                        line[j] == '!' or line[j] == '|'):
534                        self.tokens.append(('op', line[i:j+1]))
535                        i = j + 1
536                    else:
537                        self.tokens.append(('op', line[i]))
538                        i = i + 1
539                    continue
540                s = i
541                while i < l:
542                    o = ord(line[i])
543                    if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \
544                       (o >= 48 and o <= 57) or (
545                        " \t(){}:;,+-*/%&!|[]=><".find(line[i]) == -1):
546#                         line[i] != ' ' and line[i] != '\t' and
547#                         line[i] != '(' and line[i] != ')' and
548#                         line[i] != '{'  and line[i] != '}' and
549#                         line[i] != ':' and line[i] != ';' and
550#                         line[i] != ',' and line[i] != '+' and
551#                         line[i] != '-' and line[i] != '*' and
552#                         line[i] != '/' and line[i] != '%' and
553#                         line[i] != '&' and line[i] != '!' and
554#                         line[i] != '|' and line[i] != '[' and
555#                         line[i] != ']' and line[i] != '=' and
556#                         line[i] != '*' and line[i] != '>' and
557#                         line[i] != '<'):
558                        i = i + 1
559                    else:
560                        break
561                self.tokens.append(('name', line[s:i]))
562
563        tok = self.tokens[0]
564        self.tokens = self.tokens[1:]
565        self.last = tok
566        return tok
567
568class CParser:
569    """The C module parser"""
570    def __init__(self, filename, idx = None):
571        self.filename = filename
572        if len(filename) > 2 and filename[-2:] == '.h':
573            self.is_header = 1
574        else:
575            self.is_header = 0
576        self.input = open(filename)
577        self.lexer = CLexer(self.input)
578        if idx == None:
579            self.index = index()
580        else:
581            self.index = idx
582        self.top_comment = ""
583        self.last_comment = ""
584        self.comment = None
585        self.collect_ref = 0
586        self.doc_disable = 0
587        self.conditionals = []
588        self.defines = []
589
590    def collect_references(self):
591        self.collect_ref = 1
592
593    def disable(self):
594        self.doc_disable = 1
595
596    def enable(self):
597        self.doc_disable = 0
598
599    def lineno(self):
600        return self.lexer.getlineno()
601
602    def index_add(self, name, module, static, type, info=None, extra = None):
603        if self.doc_disable:
604            return
605        if self.is_header == 1:
606            self.index.add(name, module, module, static, type, self.lineno(),
607                           info, extra, self.conditionals)
608        else:
609            self.index.add(name, None, module, static, type, self.lineno(),
610                           info, extra, self.conditionals)
611
612    def index_add_ref(self, name, module, static, type, info=None,
613                      extra = None):
614        if self.is_header == 1:
615            self.index.add_ref(name, module, module, static, type,
616                               self.lineno(), info, extra, self.conditionals)
617        else:
618            self.index.add_ref(name, None, module, static, type, self.lineno(),
619                               info, extra, self.conditionals)
620
621    def warning(self, msg):
622        if self.doc_disable:
623            return
624        print(msg)
625
626    def error(self, msg, token=-1):
627        if self.doc_disable:
628            return
629
630        print("Parse Error: " + msg)
631        if token != -1:
632            print("Got token ", token)
633        self.lexer.debug()
634        sys.exit(1)
635
636    def debug(self, msg, token=-1):
637        print("Debug: " + msg)
638        if token != -1:
639            print("Got token ", token)
640        self.lexer.debug()
641
642    def parseTopComment(self, comment):
643        res = {}
644        lines = comment.split("\n")
645        item = None
646        for line in lines:
647            while line != "" and (line[0] == ' ' or line[0] == '\t'):
648                line = line[1:]
649            while line != "" and line[0] == '*':
650                line = line[1:]
651            while line != "" and (line[0] == ' ' or line[0] == '\t'):
652                line = line[1:]
653            try:
654                (it, line) = line.split(":", 1)
655                item = it
656                while line != "" and (line[0] == ' ' or line[0] == '\t'):
657                    line = line[1:]
658                if item in res:
659                    res[item] = res[item] + " " + line
660                else:
661                    res[item] = line
662            except:
663                if item != None:
664                    if item in res:
665                        res[item] = res[item] + " " + line
666                    else:
667                        res[item] = line
668        self.index.info = res
669
670    def parseComment(self, token):
671        if self.top_comment == "":
672            self.top_comment = token[1]
673        if self.comment == None or token[1][0] == '*':
674            self.comment = token[1];
675        else:
676            self.comment = self.comment + token[1]
677        token = self.lexer.token()
678
679        if self.comment.find("DOC_DISABLE") != -1:
680            self.disable()
681
682        if self.comment.find("DOC_ENABLE") != -1:
683            self.enable()
684
685        return token
686
687    #
688    # Parse a simple comment block for typedefs or global variables
689    #
690    def parseSimpleComment(self, name, quiet = False):
691        if name[0:2] == '__':
692            quiet = 1
693
694        args = []
695        desc = ""
696
697        if self.comment == None:
698            if not quiet:
699                self.warning("Missing comment for %s" % (name))
700            return(None)
701        if self.comment[0] != '*':
702            if not quiet:
703                self.warning("Missing * in comment for %s" % (name))
704            return(None)
705        lines = self.comment.split('\n')
706        if lines[0] == '*':
707            del lines[0]
708        if lines[0] != "* %s:" % (name):
709            if not quiet:
710                self.warning("Misformatted comment for %s" % (name))
711                self.warning("  Expecting '* %s:' got '%s'" % (name, lines[0]))
712            return(None)
713        del lines[0]
714        while len(lines) > 0 and lines[0] == '*':
715            del lines[0]
716        desc = ""
717        while len(lines) > 0:
718            l = lines[0]
719            while len(l) > 0 and l[0] == '*':
720                l = l[1:]
721            l = l.strip()
722            desc = desc + " " + l
723            del lines[0]
724
725        desc = desc.strip()
726
727        if quiet == 0:
728            if desc == "":
729                self.warning("Comment for %s lacks description" % (name))
730
731        return(desc)
732    #
733    # Parse a comment block associate to a macro
734    #
735    def parseMacroComment(self, name, quiet = 0):
736        if name[0:2] == '__':
737            quiet = 1
738
739        args = []
740        desc = ""
741
742        if self.comment == None:
743            if not quiet:
744                self.warning("Missing comment for macro %s" % (name))
745            return((args, desc))
746        if self.comment[0] != '*':
747            if not quiet:
748                self.warning("Missing * in macro comment for %s" % (name))
749            return((args, desc))
750        lines = self.comment.split('\n')
751        if lines[0] == '*':
752            del lines[0]
753        if lines[0] != "* %s:" % (name):
754            if not quiet:
755                self.warning("Misformatted macro comment for %s" % (name))
756                self.warning("  Expecting '* %s:' got '%s'" % (name, lines[0]))
757            return((args, desc))
758        del lines[0]
759        while lines[0] == '*':
760            del lines[0]
761        while len(lines) > 0 and lines[0][0:3] == '* @':
762            l = lines[0][3:]
763            try:
764                (arg, desc) = l.split(':', 1)
765                desc=desc.strip()
766                arg=arg.strip()
767            except:
768                if not quiet:
769                    self.warning("Misformatted macro comment for %s" % (name))
770                    self.warning("  problem with '%s'" % (lines[0]))
771                del lines[0]
772                continue
773            del lines[0]
774            l = lines[0].strip()
775            while len(l) > 2 and l[0:3] != '* @':
776                while l[0] == '*':
777                    l = l[1:]
778                desc = desc + ' ' + l.strip()
779                del lines[0]
780                if len(lines) == 0:
781                    break
782                l = lines[0]
783            args.append((arg, desc))
784        while len(lines) > 0 and lines[0] == '*':
785            del lines[0]
786        desc = ""
787        while len(lines) > 0:
788            l = lines[0]
789            while len(l) > 0 and l[0] == '*':
790                l = l[1:]
791            l = l.strip()
792            desc = desc + " " + l
793            del lines[0]
794
795        desc = desc.strip()
796
797        if quiet == 0:
798            if desc == "":
799                self.warning("Macro comment for %s lack description of the macro" % (name))
800
801        return((args, desc))
802
803     #
804     # Parse a comment block and merge the information found in the
805     # parameters descriptions, finally returns a block as complete
806     # as possible
807     #
808    def mergeFunctionComment(self, name, description, quiet = 0):
809        if name == 'main':
810            quiet = 1
811        if name[0:2] == '__':
812            quiet = 1
813
814        (ret, args) = description
815        desc = ""
816        retdesc = ""
817
818        if self.comment == None:
819            if not quiet:
820                self.warning("Missing comment for function %s" % (name))
821            return(((ret[0], retdesc), args, desc))
822        if self.comment[0] != '*':
823            if not quiet:
824                self.warning("Missing * in function comment for %s" % (name))
825            return(((ret[0], retdesc), args, desc))
826        lines = self.comment.split('\n')
827        if lines[0] == '*':
828            del lines[0]
829        if lines[0] != "* %s:" % (name):
830            if not quiet:
831                self.warning("Misformatted function comment for %s" % (name))
832                self.warning("  Expecting '* %s:' got '%s'" % (name, lines[0]))
833            return(((ret[0], retdesc), args, desc))
834        del lines[0]
835        while lines[0] == '*':
836            del lines[0]
837        nbargs = len(args)
838        while len(lines) > 0 and lines[0][0:3] == '* @':
839            l = lines[0][3:]
840            try:
841                (arg, desc) = l.split(':', 1)
842                desc=desc.strip()
843                arg=arg.strip()
844            except:
845                if not quiet:
846                    self.warning("Misformatted function comment for %s" % (name))
847                    self.warning("  problem with '%s'" % (lines[0]))
848                del lines[0]
849                continue
850            del lines[0]
851            l = lines[0].strip()
852            while len(l) > 2 and l[0:3] != '* @':
853                while l[0] == '*':
854                    l = l[1:]
855                desc = desc + ' ' + l.strip()
856                del lines[0]
857                if len(lines) == 0:
858                    break
859                l = lines[0]
860            i = 0
861            while i < nbargs:
862                if args[i][1] == arg:
863                    args[i] = (args[i][0], arg, desc)
864                    break;
865                i = i + 1
866            if i >= nbargs:
867                if not quiet:
868                    self.warning("Unable to find arg %s from function comment for %s" % (
869                       arg, name))
870        while len(lines) > 0 and lines[0] == '*':
871            del lines[0]
872        desc = ""
873        while len(lines) > 0:
874            l = lines[0]
875            while len(l) > 0 and l[0] == '*':
876                l = l[1:]
877            l = l.strip()
878            if len(l) >= 6 and  l[0:6] == "return" or l[0:6] == "Return":
879                try:
880                    l = l.split(' ', 1)[1]
881                except:
882                    l = ""
883                retdesc = l.strip()
884                del lines[0]
885                while len(lines) > 0:
886                    l = lines[0]
887                    while len(l) > 0 and l[0] == '*':
888                        l = l[1:]
889                    l = l.strip()
890                    retdesc = retdesc + " " + l
891                    del lines[0]
892            else:
893                desc = desc + " " + l
894                del lines[0]
895
896        retdesc = retdesc.strip()
897        desc = desc.strip()
898
899        if quiet == 0:
900             #
901             # report missing comments
902             #
903            i = 0
904            while i < nbargs:
905                if args[i][2] == None and args[i][0] != "void" and \
906                   ((args[i][1] != None) or (args[i][1] == '')):
907                    self.warning("Function comment for %s lacks description of arg %s" % (name, args[i][1]))
908                i = i + 1
909            if retdesc == "" and ret[0] != "void":
910                self.warning("Function comment for %s lacks description of return value" % (name))
911            if desc == "" and retdesc == "":
912                self.warning("Function comment for %s lacks description of the function" % (name))
913
914        return(((ret[0], retdesc), args, desc))
915
916    def parsePreproc(self, token):
917        if debug:
918            print("=> preproc ", token, self.lexer.tokens)
919        name = token[1]
920        if name == "#include":
921            token = self.lexer.token()
922            if token == None:
923                return None
924            if token[0] == 'preproc':
925                self.index_add(token[1], self.filename, not self.is_header,
926                                "include")
927                return self.lexer.token()
928            return token
929        if name == "#define":
930            token = self.lexer.token()
931            if token == None:
932                return None
933            if token[0] == 'preproc':
934                 # TODO macros with arguments
935                name = token[1]
936                lst = []
937                token = self.lexer.token()
938                while token != None and token[0] == 'preproc' and \
939                      token[1][0] != '#':
940                    lst.append(token[1])
941                    token = self.lexer.token()
942                try:
943                    name = name.split('(') [0]
944                except:
945                    pass
946                info = self.parseMacroComment(name, True)
947                self.index_add(name, self.filename, not self.is_header,
948                                "macro", info)
949                return token
950
951        #
952        # Processing of conditionals modified by Bill 1/1/05
953        #
954        # We process conditionals (i.e. tokens from #ifdef, #ifndef,
955        # #if, #else and #endif) for headers and mainline code,
956        # store the ones from the header in libxml2-api.xml, and later
957        # (in the routine merge_public) verify that the two (header and
958        # mainline code) agree.
959        #
960        # There is a small problem with processing the headers. Some of
961        # the variables are not concerned with enabling / disabling of
962        # library functions (e.g. '__XML_PARSER_H__'), and we don't want
963        # them to be included in libxml2-api.xml, or involved in
964        # the check between the header and the mainline code.  To
965        # accomplish this, we ignore any conditional which doesn't include
966        # the string 'ENABLED'
967        #
968        if name == "#ifdef":
969            apstr = self.lexer.tokens[0][1]
970            try:
971                self.defines.append(apstr)
972                if apstr.find('ENABLED') != -1:
973                    self.conditionals.append("defined(%s)" % apstr)
974            except:
975                pass
976        elif name == "#ifndef":
977            apstr = self.lexer.tokens[0][1]
978            try:
979                self.defines.append(apstr)
980                if apstr.find('ENABLED') != -1:
981                    self.conditionals.append("!defined(%s)" % apstr)
982            except:
983                pass
984        elif name == "#if":
985            apstr = ""
986            for tok in self.lexer.tokens:
987                if apstr != "":
988                    apstr = apstr + " "
989                apstr = apstr + tok[1]
990            try:
991                self.defines.append(apstr)
992                if apstr.find('ENABLED') != -1:
993                    self.conditionals.append(apstr)
994            except:
995                pass
996        elif name == "#else":
997            if self.conditionals != [] and \
998               self.defines[-1].find('ENABLED') != -1:
999                self.conditionals[-1] = "!(%s)" % self.conditionals[-1]
1000        elif name == "#endif":
1001            if self.conditionals != [] and \
1002               self.defines[-1].find('ENABLED') != -1:
1003                self.conditionals = self.conditionals[:-1]
1004            self.defines = self.defines[:-1]
1005        token = self.lexer.token()
1006        while token != None and token[0] == 'preproc' and \
1007            token[1][0] != '#':
1008            token = self.lexer.token()
1009        return token
1010
1011     #
1012     # token acquisition on top of the lexer, it handle internally
1013     # preprocessor and comments since they are logically not part of
1014     # the program structure.
1015     #
1016    def token(self):
1017        global ignored_words
1018
1019        token = self.lexer.token()
1020        while token != None:
1021            if token[0] == 'comment':
1022                token = self.parseComment(token)
1023                continue
1024            elif token[0] == 'preproc':
1025                token = self.parsePreproc(token)
1026                continue
1027            elif token[0] == "name" and token[1] == "__const":
1028                token = ("name", "const")
1029                return token
1030            elif token[0] == "name" and token[1] == "__attribute":
1031                token = self.lexer.token()
1032                while token != None and token[1] != ";":
1033                    token = self.lexer.token()
1034                return token
1035            elif token[0] == "name" and token[1] in ignored_words:
1036                (n, info) = ignored_words[token[1]]
1037                i = 0
1038                while i < n:
1039                    token = self.lexer.token()
1040                    i = i + 1
1041                token = self.lexer.token()
1042                continue
1043            else:
1044                if debug:
1045                    print("=> ", token)
1046                return token
1047        return None
1048
1049     #
1050     # Parse a typedef, it records the type and its name.
1051     #
1052    def parseTypedef(self, token):
1053        if token == None:
1054            return None
1055        token = self.parseType(token)
1056        if token == None:
1057            self.error("parsing typedef")
1058            return None
1059        base_type = self.type
1060        type = base_type
1061         #self.debug("end typedef type", token)
1062        while token != None:
1063            if token[0] == "name":
1064                name = token[1]
1065                signature = self.signature
1066                if signature != None:
1067                    type = type.split('(')[0]
1068                    d = self.mergeFunctionComment(name,
1069                            ((type, None), signature), 1)
1070                    self.index_add(name, self.filename, not self.is_header,
1071                                    "functype", d)
1072                else:
1073                    if base_type == "struct":
1074                        self.index_add(name, self.filename, not self.is_header,
1075                                        "struct", type)
1076                        base_type = "struct " + name
1077                    else:
1078                        # TODO report missing or misformatted comments
1079                        info = self.parseSimpleComment(name, True)
1080                        self.index_add(name, self.filename, not self.is_header,
1081                                    "typedef", type, info)
1082                token = self.token()
1083            else:
1084                self.error("parsing typedef: expecting a name")
1085                return token
1086             #self.debug("end typedef", token)
1087            if token != None and token[0] == 'sep' and token[1] == ',':
1088                type = base_type
1089                token = self.token()
1090                while token != None and token[0] == "op":
1091                    type = type + token[1]
1092                    token = self.token()
1093            elif token != None and token[0] == 'sep' and token[1] == ';':
1094                break;
1095            elif token != None and token[0] == 'name':
1096                type = base_type
1097                continue;
1098            else:
1099                self.error("parsing typedef: expecting ';'", token)
1100                return token
1101        token = self.token()
1102        return token
1103
1104     #
1105     # Parse a C code block, used for functions it parse till
1106     # the balancing } included
1107     #
1108    def parseBlock(self, token):
1109        while token != None:
1110            if token[0] == "sep" and token[1] == "{":
1111                token = self.token()
1112                token = self.parseBlock(token)
1113            elif token[0] == "sep" and token[1] == "}":
1114                token = self.token()
1115                return token
1116            else:
1117                if self.collect_ref == 1:
1118                    oldtok = token
1119                    token = self.token()
1120                    if oldtok[0] == "name" and oldtok[1][0:3] == "xml":
1121                        if token[0] == "sep" and token[1] == "(":
1122                            self.index_add_ref(oldtok[1], self.filename,
1123                                                0, "function")
1124                            token = self.token()
1125                        elif token[0] == "name":
1126                            token = self.token()
1127                            if token[0] == "sep" and (token[1] == ";" or
1128                               token[1] == "," or token[1] == "="):
1129                                self.index_add_ref(oldtok[1], self.filename,
1130                                                    0, "type")
1131                    elif oldtok[0] == "name" and oldtok[1][0:4] == "XML_":
1132                        self.index_add_ref(oldtok[1], self.filename,
1133                                            0, "typedef")
1134                    elif oldtok[0] == "name" and oldtok[1][0:7] == "LIBXML_":
1135                        self.index_add_ref(oldtok[1], self.filename,
1136                                            0, "typedef")
1137
1138                else:
1139                    token = self.token()
1140        return token
1141
1142     #
1143     # Parse a C struct definition till the balancing }
1144     #
1145    def parseStruct(self, token):
1146        fields = []
1147         #self.debug("start parseStruct", token)
1148        while token != None:
1149            if token[0] == "sep" and token[1] == "{":
1150                token = self.token()
1151                token = self.parseTypeBlock(token)
1152            elif token[0] == "sep" and token[1] == "}":
1153                self.struct_fields = fields
1154                 #self.debug("end parseStruct", token)
1155                 #print fields
1156                token = self.token()
1157                return token
1158            else:
1159                base_type = self.type
1160                 #self.debug("before parseType", token)
1161                token = self.parseType(token)
1162                 #self.debug("after parseType", token)
1163                if token != None and token[0] == "name":
1164                    fname = token[1]
1165                    token = self.token()
1166                    if token[0] == "sep" and token[1] == ";":
1167                        token = self.token()
1168                        fields.append((self.type, fname))
1169                    else:
1170                        self.error("parseStruct: expecting ;", token)
1171                elif token != None and token[0] == "sep" and token[1] == "{":
1172                    token = self.token()
1173                    token = self.parseTypeBlock(token)
1174                    if token != None and token[0] == "name":
1175                        token = self.token()
1176                    if token != None and token[0] == "sep" and token[1] == ";":
1177                        token = self.token()
1178                    else:
1179                        self.error("parseStruct: expecting ;", token)
1180                else:
1181                    self.error("parseStruct: name", token)
1182                    token = self.token()
1183                self.type = base_type;
1184        self.struct_fields = fields
1185         #self.debug("end parseStruct", token)
1186         #print fields
1187        return token
1188
1189     #
1190     # Parse a C enum block, parse till the balancing }
1191     #
1192    def parseEnumBlock(self, token):
1193        self.enums = []
1194        name = None
1195        self.comment = None
1196        comment = ""
1197        value = "0"
1198        while token != None:
1199            if token[0] == "sep" and token[1] == "{":
1200                token = self.token()
1201                token = self.parseTypeBlock(token)
1202            elif token[0] == "sep" and token[1] == "}":
1203                if name != None:
1204                    if self.comment != None:
1205                        comment = self.comment
1206                        self.comment = None
1207                    self.enums.append((name, value, comment))
1208                token = self.token()
1209                return token
1210            elif token[0] == "name":
1211                    if name != None:
1212                        if self.comment != None:
1213                            comment = self.comment.strip()
1214                            self.comment = None
1215                        self.enums.append((name, value, comment))
1216                    name = token[1]
1217                    comment = ""
1218                    token = self.token()
1219                    if token[0] == "op" and token[1][0] == "=":
1220                        value = ""
1221                        if len(token[1]) > 1:
1222                            value = token[1][1:]
1223                        token = self.token()
1224                        while token[0] != "sep" or (token[1] != ',' and
1225                              token[1] != '}'):
1226                            value = value + token[1]
1227                            token = self.token()
1228                    else:
1229                        try:
1230                            value = "%d" % (int(value) + 1)
1231                        except:
1232                            self.warning("Failed to compute value of enum %s" % (name))
1233                            value=""
1234                    if token[0] == "sep" and token[1] == ",":
1235                        token = self.token()
1236            else:
1237                token = self.token()
1238        return token
1239
1240     #
1241     # Parse a C definition block, used for structs it parse till
1242     # the balancing }
1243     #
1244    def parseTypeBlock(self, token):
1245        while token != None:
1246            if token[0] == "sep" and token[1] == "{":
1247                token = self.token()
1248                token = self.parseTypeBlock(token)
1249            elif token[0] == "sep" and token[1] == "}":
1250                token = self.token()
1251                return token
1252            else:
1253                token = self.token()
1254        return token
1255
1256     #
1257     # Parse a type: the fact that the type name can either occur after
1258     #    the definition or within the definition makes it a little harder
1259     #    if inside, the name token is pushed back before returning
1260     #
1261    def parseType(self, token):
1262        self.type = ""
1263        self.struct_fields = []
1264        self.signature = None
1265        if token == None:
1266            return token
1267
1268        have_sign = 0
1269        done = 0
1270
1271        while token[0] == "name" and (
1272              token[1] == "const" or \
1273              token[1] == "unsigned" or \
1274              token[1] == "signed"):
1275            if token[1] == "unsigned" or token[1] == "signed":
1276                have_sign = 1
1277            if self.type == "":
1278                self.type = token[1]
1279            else:
1280                self.type = self.type + " " + token[1]
1281            token = self.token()
1282
1283        if token[0] == "name" and token[1] in ("char", "short", "int", "long"):
1284            if self.type == "":
1285                self.type = token[1]
1286            else:
1287                self.type = self.type + " " + token[1]
1288
1289        elif have_sign:
1290            done = 1
1291
1292        elif token[0] == "name" and token[1] == "struct":
1293            if self.type == "":
1294                self.type = token[1]
1295            else:
1296                self.type = self.type + " " + token[1]
1297            token = self.token()
1298            nametok = None
1299            if token[0] == "name":
1300                nametok = token
1301                token = self.token()
1302            if token != None and token[0] == "sep" and token[1] == "{":
1303                token = self.token()
1304                token = self.parseStruct(token)
1305            elif token != None and token[0] == "op" and token[1] == "*":
1306                self.type = self.type + " " + nametok[1] + " *"
1307                token = self.token()
1308                while token != None and token[0] == "op" and token[1] == "*":
1309                    self.type = self.type + " *"
1310                    token = self.token()
1311                if token[0] == "name":
1312                    nametok = token
1313                    token = self.token()
1314                else:
1315                    self.error("struct : expecting name", token)
1316                    return token
1317            elif token != None and token[0] == "name" and nametok != None:
1318                self.type = self.type + " " + nametok[1]
1319                return token
1320
1321            if nametok != None:
1322                self.lexer.push(token)
1323                token = nametok
1324            return token
1325
1326        elif token[0] == "name" and token[1] == "enum":
1327            if self.type == "":
1328                self.type = token[1]
1329            else:
1330                self.type = self.type + " " + token[1]
1331            self.enums = []
1332            token = self.token()
1333            if token != None and token[0] == "sep" and token[1] == "{":
1334                token = self.token()
1335                token = self.parseEnumBlock(token)
1336            else:
1337                self.error("parsing enum: expecting '{'", token)
1338            enum_type = None
1339            if token != None and token[0] != "name":
1340                self.lexer.push(token)
1341                token = ("name", "enum")
1342            else:
1343                enum_type = token[1]
1344            for enum in self.enums:
1345                self.index_add(enum[0], self.filename,
1346                               not self.is_header, "enum",
1347                               (enum[1], enum[2], enum_type))
1348            return token
1349
1350        elif token[0] == "name":
1351            if self.type == "":
1352                self.type = token[1]
1353            else:
1354                self.type = self.type + " " + token[1]
1355        else:
1356            self.error("parsing type %s: expecting a name" % (self.type),
1357                       token)
1358            return token
1359        if not done:
1360            token = self.token()
1361        while token != None and (token[0] == "op" or
1362              token[0] == "name" and token[1] == "const"):
1363            self.type = self.type + " " + token[1]
1364            token = self.token()
1365
1366         #
1367         # if there is a parenthesis here, this means a function type
1368         #
1369        if token != None and token[0] == "sep" and token[1] == '(':
1370            self.type = self.type + token[1]
1371            token = self.token()
1372            while token != None and token[0] == "op" and token[1] == '*':
1373                self.type = self.type + token[1]
1374                token = self.token()
1375            if token == None or token[0] != "name" :
1376                self.error("parsing function type, name expected", token);
1377                return token
1378            self.type = self.type + token[1]
1379            nametok = token
1380            token = self.token()
1381            if token != None and token[0] == "sep" and token[1] == ')':
1382                self.type = self.type + token[1]
1383                token = self.token()
1384                if token != None and token[0] == "sep" and token[1] == '(':
1385                    token = self.token()
1386                    type = self.type;
1387                    token = self.parseSignature(token);
1388                    self.type = type;
1389                else:
1390                    self.error("parsing function type, '(' expected", token);
1391                    return token
1392            else:
1393                self.error("parsing function type, ')' expected", token);
1394                return token
1395            self.lexer.push(token)
1396            token = nametok
1397            return token
1398
1399         #
1400         # do some lookahead for arrays
1401         #
1402        if token != None and token[0] == "name":
1403            nametok = token
1404            token = self.token()
1405            if token != None and token[0] == "sep" and token[1] == '[':
1406                self.type = self.type + nametok[1]
1407                while token != None and token[0] == "sep" and token[1] == '[':
1408                    self.type = self.type + token[1]
1409                    token = self.token()
1410                    while token != None and token[0] != 'sep' and \
1411                          token[1] != ']' and token[1] != ';':
1412                        self.type = self.type + token[1]
1413                        token = self.token()
1414                if token != None and token[0] == 'sep' and token[1] == ']':
1415                    self.type = self.type + token[1]
1416                    token = self.token()
1417                else:
1418                    self.error("parsing array type, ']' expected", token);
1419                    return token
1420            elif token != None and token[0] == "sep" and token[1] == ':':
1421                 # remove :12 in case it's a limited int size
1422                token = self.token()
1423                token = self.token()
1424            self.lexer.push(token)
1425            token = nametok
1426
1427        return token
1428
1429     #
1430     # Parse a signature: '(' has been parsed and we scan the type definition
1431     #    up to the ')' included
1432    def parseSignature(self, token):
1433        signature = []
1434        if token != None and token[0] == "sep" and token[1] == ')':
1435            self.signature = []
1436            token = self.token()
1437            return token
1438        while token != None:
1439            token = self.parseType(token)
1440            if token != None and token[0] == "name":
1441                signature.append((self.type, token[1], None))
1442                token = self.token()
1443            elif token != None and token[0] == "sep" and token[1] == ',':
1444                token = self.token()
1445                continue
1446            elif token != None and token[0] == "sep" and token[1] == ')':
1447                 # only the type was provided
1448                if self.type == "...":
1449                    signature.append((self.type, "...", None))
1450                else:
1451                    signature.append((self.type, None, None))
1452            if token != None and token[0] == "sep":
1453                if token[1] == ',':
1454                    token = self.token()
1455                    continue
1456                elif token[1] == ')':
1457                    token = self.token()
1458                    break
1459        self.signature = signature
1460        return token
1461
1462     #
1463     # Parse a global definition, be it a type, variable or function
1464     # the extern "C" blocks are a bit nasty and require it to recurse.
1465     #
1466    def parseGlobal(self, token):
1467        static = 0
1468        if token[1] == 'extern':
1469            token = self.token()
1470            if token == None:
1471                return token
1472            if token[0] == 'string':
1473                if token[1] == 'C':
1474                    token = self.token()
1475                    if token == None:
1476                        return token
1477                    if token[0] == 'sep' and token[1] == "{":
1478                        token = self.token()
1479#                         print 'Entering extern "C line ', self.lineno()
1480                        while token != None and (token[0] != 'sep' or
1481                              token[1] != "}"):
1482                            if token[0] == 'name':
1483                                token = self.parseGlobal(token)
1484                            else:
1485                                self.error(
1486                                 "token %s %s unexpected at the top level" % (
1487                                        token[0], token[1]))
1488                                token = self.parseGlobal(token)
1489#                         print 'Exiting extern "C" line', self.lineno()
1490                        token = self.token()
1491                        return token
1492                else:
1493                    return token
1494        elif token[1] == 'static':
1495            static = 1
1496            token = self.token()
1497            if token == None or  token[0] != 'name':
1498                return token
1499
1500        if token[1] == 'typedef':
1501            token = self.token()
1502            return self.parseTypedef(token)
1503        else:
1504            token = self.parseType(token)
1505            type_orig = self.type
1506        if token == None or token[0] != "name":
1507            return token
1508        type = type_orig
1509        self.name = token[1]
1510        token = self.token()
1511        while token != None and (token[0] == "sep" or token[0] == "op"):
1512            if token[0] == "sep":
1513                if token[1] == "[":
1514                    type = type + token[1]
1515                    token = self.token()
1516                    while token != None and (token[0] != "sep" or \
1517                          token[1] != ";"):
1518                        type = type + token[1]
1519                        token = self.token()
1520
1521            if token != None and token[0] == "op" and token[1] == "=":
1522                 #
1523                 # Skip the initialization of the variable
1524                 #
1525                token = self.token()
1526                if token[0] == 'sep' and token[1] == '{':
1527                    token = self.token()
1528                    token = self.parseBlock(token)
1529                else:
1530                    while token != None and (token[0] != "sep" or \
1531                          (token[1] != ';' and token[1] != ',')):
1532                            token = self.token()
1533                if token == None or token[0] != "sep" or (token[1] != ';' and
1534                   token[1] != ','):
1535                    self.error("missing ';' or ',' after value")
1536
1537            if token != None and token[0] == "sep":
1538                if token[1] == ";":
1539                    if type == "struct":
1540                        self.index_add(self.name, self.filename,
1541                             not self.is_header, "struct", self.struct_fields)
1542                    else:
1543                        info = self.parseSimpleComment(self.name, True)
1544                        self.index_add(self.name, self.filename,
1545                             not self.is_header, "variable", type, info)
1546                    self.comment = None
1547                    token = self.token()
1548                    break
1549                elif token[1] == "(":
1550                    token = self.token()
1551                    token = self.parseSignature(token)
1552                    if token == None:
1553                        return None
1554                    if token[0] == "sep" and token[1] == ";":
1555                        d = self.mergeFunctionComment(self.name,
1556                                ((type, None), self.signature), 1)
1557                        self.index_add(self.name, self.filename, static,
1558                                        "function", d)
1559                        self.comment = None
1560                        token = self.token()
1561                    elif token[0] == "sep" and token[1] == "{":
1562                        d = self.mergeFunctionComment(self.name,
1563                                ((type, None), self.signature), static)
1564                        self.index_add(self.name, self.filename, static,
1565                                        "function", d)
1566                        self.comment = None
1567                        token = self.token()
1568                        token = self.parseBlock(token);
1569                elif token[1] == ',':
1570                    self.index_add(self.name, self.filename, static,
1571                                    "variable", type)
1572                    self.comment = None
1573                    type = type_orig
1574                    token = self.token()
1575                    while token != None and token[0] == "sep":
1576                        type = type + token[1]
1577                        token = self.token()
1578                    if token != None and token[0] == "name":
1579                        self.name = token[1]
1580                        token = self.token()
1581                else:
1582                    break
1583
1584        return token
1585
1586    def parse(self):
1587        self.warning("Parsing %s" % (self.filename))
1588        token = self.token()
1589        while token != None:
1590            if token[0] == 'name':
1591                token = self.parseGlobal(token)
1592            else:
1593                self.error("token %s %s unexpected at the top level" % (
1594                       token[0], token[1]))
1595                token = self.parseGlobal(token)
1596                return
1597        self.parseTopComment(self.top_comment)
1598        return self.index
1599
1600
1601class docBuilder:
1602    """A documentation builder"""
1603    def __init__(self, name, directories=['.'], excludes=[]):
1604        self.name = name
1605        self.directories = directories
1606        self.excludes = excludes + list(ignored_files.keys())
1607        self.modules = {}
1608        self.headers = {}
1609        self.idx = index()
1610        self.index = {}
1611        if name == 'libxml2':
1612            self.basename = 'libxml'
1613        else:
1614            self.basename = name
1615
1616    def analyze(self):
1617        print("Project %s : %d headers, %d modules" % (self.name, len(list(self.headers.keys())), len(list(self.modules.keys()))))
1618        self.idx.analyze()
1619
1620    def scanHeaders(self):
1621        for header in list(self.headers.keys()):
1622            parser = CParser(header)
1623            idx = parser.parse()
1624            self.headers[header] = idx;
1625            self.idx.merge(idx)
1626
1627    def scanModules(self):
1628        for module in list(self.modules.keys()):
1629            parser = CParser(module)
1630            idx = parser.parse()
1631            # idx.analyze()
1632            self.modules[module] = idx
1633            self.idx.merge_public(idx)
1634
1635    def scan(self):
1636        for directory in self.directories:
1637            files = glob.glob(directory + "/*.c")
1638            for file in files:
1639                skip = 0
1640                for excl in self.excludes:
1641                    if file.find(excl) != -1:
1642                        print("Skipping %s" % file)
1643                        skip = 1
1644                        break
1645                if skip == 0:
1646                    self.modules[file] = None;
1647            files = glob.glob(directory + "/*.h")
1648            for file in files:
1649                skip = 0
1650                for excl in self.excludes:
1651                    if file.find(excl) != -1:
1652                        print("Skipping %s" % file)
1653                        skip = 1
1654                        break
1655                if skip == 0:
1656                    self.headers[file] = None;
1657        self.scanHeaders()
1658        self.scanModules()
1659
1660    def modulename_file(self, file):
1661        module = os.path.basename(file)
1662        if module[-2:] == '.h':
1663            module = module[:-2]
1664        elif module[-2:] == '.c':
1665            module = module[:-2]
1666        return module
1667
1668    def serialize_enum(self, output, name):
1669        id = self.idx.enums[name]
1670        output.write("    <enum name='%s' file='%s'" % (name,
1671                     self.modulename_file(id.header)))
1672        if id.info != None:
1673            info = id.info
1674            if info[0] != None and info[0] != '':
1675                try:
1676                    val = eval(info[0])
1677                except:
1678                    val = info[0]
1679                output.write(" value='%s'" % (val));
1680            if info[2] != None and info[2] != '':
1681                output.write(" type='%s'" % info[2]);
1682            if info[1] != None and info[1] != '':
1683                output.write(" info='%s'" % escape(info[1]));
1684        output.write("/>\n")
1685
1686    def serialize_macro(self, output, name):
1687        id = self.idx.macros[name]
1688        output.write("    <macro name='%s' file='%s'>\n" % (name,
1689                     self.modulename_file(id.header)))
1690        if id.info != None:
1691            try:
1692                (args, desc) = id.info
1693                if desc != None and desc != "":
1694                    output.write("      <info>%s</info>\n" % (escape(desc)))
1695                for arg in args:
1696                    (name, desc) = arg
1697                    if desc != None and desc != "":
1698                        output.write("      <arg name='%s' info='%s'/>\n" % (
1699                                     name, escape(desc)))
1700                    else:
1701                        output.write("      <arg name='%s'/>\n" % (name))
1702            except:
1703                pass
1704        output.write("    </macro>\n")
1705
1706    def serialize_typedef(self, output, name):
1707        id = self.idx.typedefs[name]
1708        if id.info[0:7] == 'struct ':
1709            output.write("    <struct name='%s' file='%s' type='%s'" % (
1710                     name, self.modulename_file(id.header), id.info))
1711            name = id.info[7:]
1712            if name in self.idx.structs and ( \
1713               type(self.idx.structs[name].info) == type(()) or
1714                type(self.idx.structs[name].info) == type([])):
1715                output.write(">\n");
1716                try:
1717                    for field in self.idx.structs[name].info:
1718                        output.write("      <field name='%s' type='%s'/>\n" % (field[1] , field[0]))
1719                except:
1720                    print("Failed to serialize struct %s" % (name))
1721                output.write("    </struct>\n")
1722            else:
1723                output.write("/>\n");
1724        else :
1725            output.write("    <typedef name='%s' file='%s' type='%s'" % (
1726                         name, self.modulename_file(id.header), id.info))
1727            try:
1728                desc = id.extra
1729                if desc != None and desc != "":
1730                    output.write(">\n      <info>%s</info>\n" % (escape(desc)))
1731                    output.write("    </typedef>\n")
1732                else:
1733                    output.write("/>\n")
1734            except:
1735                output.write("/>\n")
1736
1737    def serialize_variable(self, output, name):
1738        id = self.idx.variables[name]
1739        if id.info != None:
1740            output.write("    <variable name='%s' file='%s' type='%s'" % (
1741                    name, self.modulename_file(id.header), id.info))
1742        else:
1743            output.write("    <variable name='%s' file='%s'" % (
1744                    name, self.modulename_file(id.header)))
1745        desc = id.extra
1746        if desc != None and desc != "":
1747            output.write(">\n      <info>%s</info>\n" % (escape(desc)))
1748            output.write("    </variable>\n")
1749        else:
1750            output.write("/>\n")
1751
1752    def serialize_function(self, output, name):
1753        id = self.idx.functions[name]
1754        if name == debugsym:
1755            print("=>", id)
1756
1757        output.write("    <%s name='%s' file='%s' module='%s'>\n" % (id.type,
1758                     name, self.modulename_file(id.header),
1759                     self.modulename_file(id.module)))
1760        #
1761        # Processing of conditionals modified by Bill 1/1/05
1762        #
1763        if id.conditionals != None:
1764            apstr = ""
1765            for cond in id.conditionals:
1766                if apstr != "":
1767                    apstr = apstr + " &amp;&amp; "
1768                apstr = apstr + cond
1769            output.write("      <cond>%s</cond>\n"% (apstr));
1770        try:
1771            (ret, params, desc) = id.info
1772            if (desc == None or desc == '') and \
1773               name[0:9] != "xmlThrDef" and name != "xmlDllMain" and \
1774               ret[1] == '':
1775                print("%s %s from %s has no description" % (id.type, name,
1776                       self.modulename_file(id.module)))
1777
1778            output.write("      <info>%s</info>\n" % (escape(desc)))
1779            if ret[0] != None:
1780                if ret[0] == "void":
1781                    output.write("      <return type='void'/>\n")
1782                else:
1783                    output.write("      <return type='%s' info='%s'/>\n" % (
1784                             ret[0], escape(ret[1])))
1785            for param in params:
1786                if param[0] == 'void':
1787                    continue
1788                if param[2] == None:
1789                    output.write("      <arg name='%s' type='%s' info=''/>\n" % (param[1], param[0]))
1790                else:
1791                    output.write("      <arg name='%s' type='%s' info='%s'/>\n" % (param[1], param[0], escape(param[2])))
1792        except:
1793            print("Failed to save function %s info: " % name, repr(id.info))
1794        output.write("    </%s>\n" % (id.type))
1795
1796    def serialize_exports(self, output, file):
1797        module = self.modulename_file(file)
1798        output.write("    <file name='%s'>\n" % (module))
1799        dict = self.headers[file]
1800        if dict.info != None:
1801            for data in ('Summary', 'Description', 'Author'):
1802                try:
1803                    output.write("     <%s>%s</%s>\n" % (
1804                                 data.lower(),
1805                                 escape(dict.info[data]),
1806                                 data.lower()))
1807                except:
1808                    if data != 'Author':
1809                        print("Header %s lacks a %s description" % (module, data))
1810            if 'Description' in dict.info:
1811                desc = dict.info['Description']
1812                if desc.find("DEPRECATED") != -1:
1813                    output.write("     <deprecated/>\n")
1814
1815        ids = list(dict.macros.keys())
1816        ids.sort()
1817        for id in ids:
1818            # Macros are sometime used to masquerade other types.
1819            if id in dict.functions:
1820                continue
1821            if id in dict.variables:
1822                continue
1823            if id in dict.typedefs:
1824                continue
1825            if id in dict.structs:
1826                continue
1827            if id in dict.enums:
1828                continue
1829            output.write("     <exports symbol='%s' type='macro'/>\n" % (id))
1830        ids = list(dict.enums.keys())
1831        ids.sort()
1832        for id in ids:
1833            output.write("     <exports symbol='%s' type='enum'/>\n" % (id))
1834        ids = list(dict.typedefs.keys())
1835        ids.sort()
1836        for id in ids:
1837            output.write("     <exports symbol='%s' type='typedef'/>\n" % (id))
1838        ids = list(dict.structs.keys())
1839        ids.sort()
1840        for id in ids:
1841            output.write("     <exports symbol='%s' type='struct'/>\n" % (id))
1842        ids = list(dict.variables.keys())
1843        ids.sort()
1844        for id in ids:
1845            output.write("     <exports symbol='%s' type='variable'/>\n" % (id))
1846        ids = list(dict.functions.keys())
1847        ids.sort()
1848        for id in ids:
1849            output.write("     <exports symbol='%s' type='function'/>\n" % (id))
1850        output.write("    </file>\n")
1851
1852    def serialize(self):
1853        filename = "%s-api.xml" % self.name
1854        print("Saving XML description %s" % (filename))
1855        output = open(filename, "w")
1856        output.write('<?xml version="1.0" encoding="ISO-8859-1"?>\n')
1857        output.write("<api name='%s'>\n" % self.name)
1858        output.write("  <files>\n")
1859        headers = list(self.headers.keys())
1860        headers.sort()
1861        for file in headers:
1862            self.serialize_exports(output, file)
1863        output.write("  </files>\n")
1864        output.write("  <symbols>\n")
1865        macros = list(self.idx.macros.keys())
1866        macros.sort()
1867        for macro in macros:
1868            self.serialize_macro(output, macro)
1869        enums = list(self.idx.enums.keys())
1870        enums.sort()
1871        for enum in enums:
1872            self.serialize_enum(output, enum)
1873        typedefs = list(self.idx.typedefs.keys())
1874        typedefs.sort()
1875        for typedef in typedefs:
1876            self.serialize_typedef(output, typedef)
1877        variables = list(self.idx.variables.keys())
1878        variables.sort()
1879        for variable in variables:
1880            self.serialize_variable(output, variable)
1881        functions = list(self.idx.functions.keys())
1882        functions.sort()
1883        for function in functions:
1884            self.serialize_function(output, function)
1885        output.write("  </symbols>\n")
1886        output.write("</api>\n")
1887        output.close()
1888
1889
1890def rebuild():
1891    builder = None
1892    if glob.glob("parser.c") != [] :
1893        print("Rebuilding API description for libxml2")
1894        builder = docBuilder("libxml2", [".", "."],
1895                             ["tst.c"])
1896    elif glob.glob("../parser.c") != [] :
1897        print("Rebuilding API description for libxml2")
1898        builder = docBuilder("libxml2", ["..", "../include/libxml"],
1899                             ["tst.c"])
1900    elif glob.glob("../libxslt/transform.c") != [] :
1901        print("Rebuilding API description for libxslt")
1902        builder = docBuilder("libxslt", ["../libxslt"],
1903                             ["win32config.h", "libxslt.h", "tst.c"])
1904    else:
1905        print("rebuild() failed, unable to guess the module")
1906        return None
1907    builder.scan()
1908    builder.analyze()
1909    builder.serialize()
1910    if glob.glob("../libexslt/exslt.c") != [] :
1911        extra = docBuilder("libexslt", ["../libexslt"], ["libexslt.h"])
1912        extra.scan()
1913        extra.analyze()
1914        extra.serialize()
1915    return builder
1916
1917#
1918# for debugging the parser
1919#
1920def parse(filename):
1921    parser = CParser(filename)
1922    idx = parser.parse()
1923    return idx
1924
1925if __name__ == "__main__":
1926    if len(sys.argv) > 1:
1927        debug = 1
1928        parse(sys.argv[1])
1929    else:
1930        rebuild()
1931