• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/env python3
2#
3# This is the API builder, it parses the C sources and build the
4# API formal description in XML.
5#
6# See Copyright for the status of this software.
7#
8# daniel@veillard.com
9#
10import os, sys
11import string
12import glob
13
14debug=0
15#debugsym='ignorableWhitespaceSAXFunc'
16debugsym=None
17
18#
19# C parser analysis code
20#
21ignored_files = {
22  "config.h": "generated portability layer",
23  "libxml.h": "internal only",
24  "rngparser.c": "not yet integrated",
25  "testModule.c": "test tool",
26  "testThreads.c": "test tool",
27  "testapi.c": "generated regression tests",
28  "runtest.c": "regression tests program",
29  "runsuite.c": "regression tests program",
30  "tst.c": "not part of the library",
31  "test.c": "not part of the library",
32  "testdso.c": "test for dynamid shared libraries",
33  "testrecurse.c": "test for entities recursions",
34  "timsort.h": "Internal header only for xpath.c 2.9.0",
35}
36
37ignored_words = {
38  "WINAPI": (0, "Windows keyword"),
39  "LIBXML_DLL_IMPORT": (0, "Special macro to flag external keywords"),
40  "XMLPUBVAR": (0, "Special macro for extern vars for win32"),
41  "XSLTPUBVAR": (0, "Special macro for extern vars for win32"),
42  "EXSLTPUBVAR": (0, "Special macro for extern vars for win32"),
43  "XMLPUBFUN": (0, "Special macro for extern funcs for win32"),
44  "XSLTPUBFUN": (0, "Special macro for extern funcs for win32"),
45  "EXSLTPUBFUN": (0, "Special macro for extern funcs for win32"),
46  "XSLTCALL": (0, "Special macro for win32 calls"),
47  "EXSLTCALL": (0, "Special macro for win32 calls"),
48  "__declspec": (3, "Windows keyword"),
49  "__stdcall": (0, "Windows keyword"),
50  "ATTRIBUTE_UNUSED": (0, "macro keyword"),
51  "ATTRIBUTE_DESTRUCTOR": (0, "macro keyword"),
52  "LIBEXSLT_PUBLIC": (0, "macro keyword"),
53  "X_IN_Y": (5, "macro function builder"),
54  "ATTRIBUTE_ALLOC_SIZE": (3, "macro for gcc checking extension"),
55  "ATTRIBUTE_PRINTF": (5, "macro for gcc printf args checking extension"),
56  "LIBXML_ATTR_FORMAT": (5, "macro for gcc printf args checking extension"),
57  "LIBXML_ATTR_ALLOC_SIZE": (3, "macro for gcc checking extension"),
58  "ATTRIBUTE_NO_SANITIZE": (3, "macro keyword"),
59  "ATTRIBUTE_NO_SANITIZE_INTEGER": (0, "macro keyword"),
60  "XML_DEPRECATED": (0, "macro keyword"),
61  "XML_GLOBALS_ALLOC": (0, "macro keyword"),
62  "XML_GLOBALS_ERROR": (0, "macro keyword"),
63  "XML_GLOBALS_IO": (0, "macro keyword"),
64  "XML_GLOBALS_PARSER": (0, "macro keyword"),
65  "XML_GLOBALS_TREE": (0, "macro keyword"),
66  "XML_THREAD_LOCAL": (0, "macro keyword"),
67}
68
69def escape(raw):
70    raw = raw.replace('&', '&')
71    raw = raw.replace('<', '&lt;')
72    raw = raw.replace('>', '&gt;')
73    raw = raw.replace("'", '&apos;')
74    raw = raw.replace('"', '&quot;')
75    return raw
76
77class identifier:
78    def __init__(self, name, header=None, module=None, type=None, lineno = 0,
79                 info=None, extra=None, conditionals = None):
80        self.name = name
81        self.header = header
82        self.module = module
83        self.type = type
84        self.info = info
85        self.extra = extra
86        self.lineno = lineno
87        self.static = 0
88        if conditionals == None or len(conditionals) == 0:
89            self.conditionals = None
90        else:
91            self.conditionals = conditionals[:]
92        if self.name == debugsym:
93            print("=> define %s : %s" % (debugsym, (module, type, info,
94                                         extra, conditionals)))
95
96    def __repr__(self):
97        r = "%s %s:" % (self.type, self.name)
98        if self.static:
99            r = r + " static"
100        if self.module != None:
101            r = r + " from %s" % (self.module)
102        if self.info != None:
103            r = r + " " +  repr(self.info)
104        if self.extra != None:
105            r = r + " " + repr(self.extra)
106        if self.conditionals != None:
107            r = r + " " + repr(self.conditionals)
108        return r
109
110
111    def set_header(self, header):
112        self.header = header
113    def set_module(self, module):
114        self.module = module
115    def set_type(self, type):
116        self.type = type
117    def set_info(self, info):
118        self.info = info
119    def set_extra(self, extra):
120        self.extra = extra
121    def set_lineno(self, lineno):
122        self.lineno = lineno
123    def set_static(self, static):
124        self.static = static
125    def set_conditionals(self, conditionals):
126        if conditionals == None or len(conditionals) == 0:
127            self.conditionals = None
128        else:
129            self.conditionals = conditionals[:]
130
131    def get_name(self):
132        return self.name
133    def get_header(self):
134        return self.module
135    def get_module(self):
136        return self.module
137    def get_type(self):
138        return self.type
139    def get_info(self):
140        return self.info
141    def get_lineno(self):
142        return self.lineno
143    def get_extra(self):
144        return self.extra
145    def get_static(self):
146        return self.static
147    def get_conditionals(self):
148        return self.conditionals
149
150    def update(self, header, module, type = None, info = None, extra=None,
151               conditionals=None):
152        if self.name == debugsym:
153            print("=> update %s : %s" % (debugsym, (module, type, info,
154                                         extra, conditionals)))
155        if header != None and self.header == None:
156            self.set_header(module)
157        if module != None and (self.module == None or self.header == self.module):
158            self.set_module(module)
159        if type != None and self.type == None:
160            self.set_type(type)
161        if info != None:
162            self.set_info(info)
163        if extra != None:
164            self.set_extra(extra)
165        if conditionals != None:
166            self.set_conditionals(conditionals)
167
168class index:
169    def __init__(self, name = "noname"):
170        self.name = name
171        self.identifiers = {}
172        self.functions = {}
173        self.variables = {}
174        self.includes = {}
175        self.structs = {}
176        self.enums = {}
177        self.typedefs = {}
178        self.macros = {}
179        self.references = {}
180        self.info = {}
181
182    def add_ref(self, name, header, module, static, type, lineno, info=None, extra=None, conditionals = None):
183        if name[0:2] == '__':
184            return None
185        d = None
186        if name in self.identifiers:
187           d = self.identifiers[name]
188           d.update(header, module, type, info, extra, conditionals)
189        else:
190           d = identifier(name, header, module, type, lineno, info, extra, conditionals)
191           self.identifiers[name] = d
192
193        if d != None and static == 1:
194            d.set_static(1)
195
196        if d != None and name != None and type != None:
197            self.references[name] = d
198
199        if name == debugsym:
200            print("New ref: %s" % (d))
201
202        return d
203
204    def add(self, name, header, module, static, type, lineno, info=None, extra=None, conditionals = None):
205        if name[0:2] == '__':
206            return None
207        d = None
208        if name in self.identifiers:
209           d = self.identifiers[name]
210           d.update(header, module, type, info, extra, conditionals)
211        else:
212           d = identifier(name, header, module, type, lineno, info, extra, conditionals)
213           self.identifiers[name] = d
214
215        if d != None and static == 1:
216            d.set_static(1)
217
218        if d != None and name != None and type != None:
219            if type == "function":
220                self.functions[name] = d
221            elif type == "functype":
222                self.functions[name] = d
223            elif type == "variable":
224                self.variables[name] = d
225            elif type == "include":
226                self.includes[name] = d
227            elif type == "struct":
228                self.structs[name] = d
229            elif type == "enum":
230                self.enums[name] = d
231            elif type == "typedef":
232                self.typedefs[name] = d
233            elif type == "macro":
234                self.macros[name] = d
235            else:
236                print("Unable to register type ", type)
237
238        if name == debugsym:
239            print("New symbol: %s" % (d))
240
241        return d
242
243    def merge(self, idx):
244        for id in list(idx.functions.keys()):
245              #
246              # macro might be used to override functions or variables
247              # definitions
248              #
249             if id in self.macros:
250                 del self.macros[id]
251             if id in self.functions:
252                 print("function %s from %s redeclared in %s" % (
253                    id, self.functions[id].header, idx.functions[id].header))
254             else:
255                 self.functions[id] = idx.functions[id]
256                 self.identifiers[id] = idx.functions[id]
257        for id in list(idx.variables.keys()):
258              #
259              # macro might be used to override functions or variables
260              # definitions
261              #
262             if id in self.macros:
263                 del self.macros[id]
264             if id in self.variables:
265                 print("variable %s from %s redeclared in %s" % (
266                    id, self.variables[id].header, idx.variables[id].header))
267             else:
268                 self.variables[id] = idx.variables[id]
269                 self.identifiers[id] = idx.variables[id]
270        for id in list(idx.structs.keys()):
271             if id in self.structs:
272                 print("struct %s from %s redeclared in %s" % (
273                    id, self.structs[id].header, idx.structs[id].header))
274             else:
275                 self.structs[id] = idx.structs[id]
276                 self.identifiers[id] = idx.structs[id]
277        for id in list(idx.typedefs.keys()):
278             if id in self.typedefs:
279                 print("typedef %s from %s redeclared in %s" % (
280                    id, self.typedefs[id].header, idx.typedefs[id].header))
281             else:
282                 self.typedefs[id] = idx.typedefs[id]
283                 self.identifiers[id] = idx.typedefs[id]
284        for id in list(idx.macros.keys()):
285              #
286              # macro might be used to override functions or variables
287              # definitions
288              #
289             if id in self.variables:
290                 continue
291             if id in self.functions:
292                 continue
293             if id in self.enums:
294                 continue
295             if id in self.macros and id != 'XML_OP':
296                 print("macro %s from %s redeclared in %s" % (
297                    id, self.macros[id].header, idx.macros[id].header))
298             else:
299                 self.macros[id] = idx.macros[id]
300                 self.identifiers[id] = idx.macros[id]
301        for id in list(idx.enums.keys()):
302             if id in self.enums:
303                 print("enum %s from %s redeclared in %s" % (
304                    id, self.enums[id].header, idx.enums[id].header))
305             else:
306                 self.enums[id] = idx.enums[id]
307                 self.identifiers[id] = idx.enums[id]
308
309    def merge_public(self, idx):
310        for id in list(idx.functions.keys()):
311             if id in self.functions:
312                 # check that function condition agrees with header
313                 if idx.functions[id].conditionals != \
314                    self.functions[id].conditionals:
315                     print("Header condition differs from Function for %s:" \
316                        % id)
317                     print("  H: %s" % self.functions[id].conditionals)
318                     print("  C: %s" % idx.functions[id].conditionals)
319                 up = idx.functions[id]
320                 self.functions[id].update(None, up.module, up.type, up.info, up.extra)
321         #     else:
322         #         print "Function %s from %s is not declared in headers" % (
323         #                id, idx.functions[id].module)
324
325        for id in list(idx.variables.keys()):
326            if id in self.variables:
327                # check that variable condition agrees with header
328                # TODO: produces many false positives
329                #if idx.variables[id].conditionals != \
330                #   self.variables[id].conditionals:
331                #    print("Header condition differs from Variable for %s:" \
332                #       % id)
333                #    print("  H: %s" % self.variables[id].conditionals)
334                #    print("  C: %s" % idx.variables[id].conditionals)
335                up = idx.variables[id]
336                self.variables[id].update(None, up.module, up.type, up.info, up.extra)
337
338    def analyze_dict(self, type, dict):
339        count = 0
340        public = 0
341        for name in list(dict.keys()):
342            id = dict[name]
343            count = count + 1
344            if id.static == 0:
345                public = public + 1
346        if count != public:
347            print("  %d %s , %d public" % (count, type, public))
348        elif count != 0:
349            print("  %d public %s" % (count, type))
350
351
352    def analyze(self):
353        self.analyze_dict("functions", self.functions)
354        self.analyze_dict("variables", self.variables)
355        self.analyze_dict("structs", self.structs)
356        self.analyze_dict("typedefs", self.typedefs)
357        self.analyze_dict("macros", self.macros)
358
359class CLexer:
360    """A lexer for the C language, tokenize the input by reading and
361       analyzing it line by line"""
362    def __init__(self, input):
363        self.input = input
364        self.tokens = []
365        self.line = ""
366        self.lineno = 0
367
368    def getline(self):
369        line = ''
370        while line == '':
371            line = self.input.readline()
372            if not line:
373                return None
374            self.lineno = self.lineno + 1
375            line = line.lstrip()
376            line = line.rstrip()
377            if line == '':
378                continue
379            while line[-1] == '\\':
380                line = line[:-1]
381                n = self.input.readline()
382                self.lineno = self.lineno + 1
383                n = n.lstrip()
384                n = n.rstrip()
385                if not n:
386                    break
387                else:
388                    line = line + n
389        return line
390
391    def getlineno(self):
392        return self.lineno
393
394    def push(self, token):
395        self.tokens.insert(0, token);
396
397    def debug(self):
398        print("Last token: ", self.last)
399        print("Token queue: ", self.tokens)
400        print("Line %d end: " % (self.lineno), self.line)
401
402    def token(self):
403        while self.tokens == []:
404            if self.line == "":
405                line = self.getline()
406            else:
407                line = self.line
408                self.line = ""
409            if line == None:
410                return None
411
412            if line[0] == '#':
413                self.tokens = list(map((lambda x: ('preproc', x)),
414                                  line.split()))
415                break;
416            l = len(line)
417            if line[0] == '"' or line[0] == "'":
418                end = line[0]
419                line = line[1:]
420                found = 0
421                tok = ""
422                while found == 0:
423                    i = 0
424                    l = len(line)
425                    while i < l:
426                        if line[i] == end:
427                            self.line = line[i+1:]
428                            line = line[:i]
429                            l = i
430                            found = 1
431                            break
432                        if line[i] == '\\':
433                            i = i + 1
434                        i = i + 1
435                    tok = tok + line
436                    if found == 0:
437                        line = self.getline()
438                        if line == None:
439                            return None
440                self.last = ('string', tok)
441                return self.last
442
443            if l >= 2 and line[0] == '/' and line[1] == '*':
444                line = line[2:]
445                found = 0
446                tok = ""
447                while found == 0:
448                    i = 0
449                    l = len(line)
450                    while i < l:
451                        if line[i] == '*' and i+1 < l and line[i+1] == '/':
452                            self.line = line[i+2:]
453                            line = line[:i-1]
454                            l = i
455                            found = 1
456                            break
457                        i = i + 1
458                    if tok != "":
459                        tok = tok + "\n"
460                    tok = tok + line
461                    if found == 0:
462                        line = self.getline()
463                        if line == None:
464                            return None
465                self.last = ('comment', tok)
466                return self.last
467            if l >= 2 and line[0] == '/' and line[1] == '/':
468                line = line[2:]
469                self.last = ('comment', line)
470                return self.last
471            i = 0
472            while i < l:
473                if line[i] == '/' and i+1 < l and line[i+1] == '/':
474                    self.line = line[i:]
475                    line = line[:i]
476                    break
477                if line[i] == '/' and i+1 < l and line[i+1] == '*':
478                    self.line = line[i:]
479                    line = line[:i]
480                    break
481                if line[i] == '"' or line[i] == "'":
482                    self.line = line[i:]
483                    line = line[:i]
484                    break
485                i = i + 1
486            l = len(line)
487            i = 0
488            while i < l:
489                if line[i] == ' ' or line[i] == '\t':
490                    i = i + 1
491                    continue
492                o = ord(line[i])
493                if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \
494                   (o >= 48 and o <= 57):
495                    s = i
496                    while i < l:
497                        o = ord(line[i])
498                        if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \
499                           (o >= 48 and o <= 57) or \
500			   (" \t(){}:;,+-*/%&!|[]=><".find(line[i])) == -1:
501                            i = i + 1
502                        else:
503                            break
504                    self.tokens.append(('name', line[s:i]))
505                    continue
506                if "(){}:;,[]".find(line[i]) != -1:
507#                 if line[i] == '(' or line[i] == ')' or line[i] == '{' or \
508#                    line[i] == '}' or line[i] == ':' or line[i] == ';' or \
509#                    line[i] == ',' or line[i] == '[' or line[i] == ']':
510                    self.tokens.append(('sep', line[i]))
511                    i = i + 1
512                    continue
513                if "+-*><=/%&!|.".find(line[i]) != -1:
514#                 if line[i] == '+' or line[i] == '-' or line[i] == '*' or \
515#                    line[i] == '>' or line[i] == '<' or line[i] == '=' or \
516#                    line[i] == '/' or line[i] == '%' or line[i] == '&' or \
517#                    line[i] == '!' or line[i] == '|' or line[i] == '.':
518                    if line[i] == '.' and  i + 2 < l and \
519                       line[i+1] == '.' and line[i+2] == '.':
520                        self.tokens.append(('name', '...'))
521                        i = i + 3
522                        continue
523
524                    j = i + 1
525                    if j < l and (
526                       "+-*><=/%&!|".find(line[j]) != -1):
527#                        line[j] == '+' or line[j] == '-' or line[j] == '*' or \
528#                        line[j] == '>' or line[j] == '<' or line[j] == '=' or \
529#                        line[j] == '/' or line[j] == '%' or line[j] == '&' or \
530#                        line[j] == '!' or line[j] == '|'):
531                        self.tokens.append(('op', line[i:j+1]))
532                        i = j + 1
533                    else:
534                        self.tokens.append(('op', line[i]))
535                        i = i + 1
536                    continue
537                s = i
538                while i < l:
539                    o = ord(line[i])
540                    if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \
541                       (o >= 48 and o <= 57) or (
542                        " \t(){}:;,+-*/%&!|[]=><".find(line[i]) == -1):
543#                         line[i] != ' ' and line[i] != '\t' and
544#                         line[i] != '(' and line[i] != ')' and
545#                         line[i] != '{'  and line[i] != '}' and
546#                         line[i] != ':' and line[i] != ';' and
547#                         line[i] != ',' and line[i] != '+' and
548#                         line[i] != '-' and line[i] != '*' and
549#                         line[i] != '/' and line[i] != '%' and
550#                         line[i] != '&' and line[i] != '!' and
551#                         line[i] != '|' and line[i] != '[' and
552#                         line[i] != ']' and line[i] != '=' and
553#                         line[i] != '*' and line[i] != '>' and
554#                         line[i] != '<'):
555                        i = i + 1
556                    else:
557                        break
558                self.tokens.append(('name', line[s:i]))
559
560        tok = self.tokens[0]
561        self.tokens = self.tokens[1:]
562        self.last = tok
563        return tok
564
565class CParser:
566    """The C module parser"""
567    def __init__(self, filename, idx = None):
568        self.filename = filename
569        if len(filename) > 2 and filename[-2:] == '.h':
570            self.is_header = 1
571        else:
572            self.is_header = 0
573        self.input = open(filename)
574        self.lexer = CLexer(self.input)
575        if idx == None:
576            self.index = index()
577        else:
578            self.index = idx
579        self.top_comment = ""
580        self.last_comment = ""
581        self.comment = None
582        self.collect_ref = 0
583        self.doc_disable = 0
584        self.conditionals = []
585        self.defines = []
586
587    def collect_references(self):
588        self.collect_ref = 1
589
590    def disable(self):
591        self.doc_disable = 1
592
593    def enable(self):
594        self.doc_disable = 0
595
596    def lineno(self):
597        return self.lexer.getlineno()
598
599    def index_add(self, name, module, static, type, info=None, extra = None):
600        if self.doc_disable:
601            return
602        if self.is_header == 1:
603            self.index.add(name, module, module, static, type, self.lineno(),
604                           info, extra, self.conditionals)
605        else:
606            self.index.add(name, None, module, static, type, self.lineno(),
607                           info, extra, self.conditionals)
608
609    def index_add_ref(self, name, module, static, type, info=None,
610                      extra = None):
611        if self.is_header == 1:
612            self.index.add_ref(name, module, module, static, type,
613                               self.lineno(), info, extra, self.conditionals)
614        else:
615            self.index.add_ref(name, None, module, static, type, self.lineno(),
616                               info, extra, self.conditionals)
617
618    def warning(self, msg):
619        if self.doc_disable:
620            return
621        print(msg)
622
623    def error(self, msg, token=-1):
624        if self.doc_disable:
625            return
626
627        print("Parse Error: " + msg)
628        if token != -1:
629            print("Got token ", token)
630        self.lexer.debug()
631        sys.exit(1)
632
633    def debug(self, msg, token=-1):
634        print("Debug: " + msg)
635        if token != -1:
636            print("Got token ", token)
637        self.lexer.debug()
638
639    def parseTopComment(self, comment):
640        res = {}
641        lines = comment.split("\n")
642        item = None
643        for line in lines:
644            while line != "" and (line[0] == ' ' or line[0] == '\t'):
645                line = line[1:]
646            while line != "" and line[0] == '*':
647                line = line[1:]
648            while line != "" and (line[0] == ' ' or line[0] == '\t'):
649                line = line[1:]
650            try:
651                (it, line) = line.split(":", 1)
652                item = it
653                while line != "" and (line[0] == ' ' or line[0] == '\t'):
654                    line = line[1:]
655                if item in res:
656                    res[item] = res[item] + " " + line
657                else:
658                    res[item] = line
659            except:
660                if item != None:
661                    if item in res:
662                        res[item] = res[item] + " " + line
663                    else:
664                        res[item] = line
665        self.index.info = res
666
667    def parseComment(self, token):
668        if self.top_comment == "":
669            self.top_comment = token[1]
670        if self.comment == None or token[1][0] == '*':
671            self.comment = token[1];
672        else:
673            self.comment = self.comment + token[1]
674        token = self.lexer.token()
675
676        if self.comment.find("DOC_DISABLE") != -1:
677            self.disable()
678
679        if self.comment.find("DOC_ENABLE") != -1:
680            self.enable()
681
682        return token
683
684    #
685    # Parse a simple comment block for typedefs or global variables
686    #
687    def parseSimpleComment(self, name, quiet = False):
688        if name[0:2] == '__':
689            quiet = 1
690
691        args = []
692        desc = ""
693
694        if self.comment == None:
695            if not quiet:
696                self.warning("Missing comment for %s" % (name))
697            return(None)
698        if self.comment[0] != '*':
699            if not quiet:
700                self.warning("Missing * in comment for %s" % (name))
701            return(None)
702        lines = self.comment.split('\n')
703        if lines[0] == '*':
704            del lines[0]
705        if lines[0] != "* %s:" % (name):
706            if not quiet:
707                self.warning("Misformatted comment for %s" % (name))
708                self.warning("  Expecting '* %s:' got '%s'" % (name, lines[0]))
709            return(None)
710        del lines[0]
711        while len(lines) > 0 and lines[0] == '*':
712            del lines[0]
713        desc = ""
714        while len(lines) > 0:
715            l = lines[0]
716            while len(l) > 0 and l[0] == '*':
717                l = l[1:]
718            l = l.strip()
719            desc = desc + " " + l
720            del lines[0]
721
722        desc = desc.strip()
723
724        if quiet == 0:
725            if desc == "":
726                self.warning("Comment for %s lacks description" % (name))
727
728        return(desc)
729    #
730    # Parse a comment block associate to a macro
731    #
732    def parseMacroComment(self, name, quiet = 0):
733        if name[0:2] == '__':
734            quiet = 1
735
736        args = []
737        desc = ""
738
739        if self.comment == None:
740            if not quiet:
741                self.warning("Missing comment for macro %s" % (name))
742            return((args, desc))
743        if self.comment[0] != '*':
744            if not quiet:
745                self.warning("Missing * in macro comment for %s" % (name))
746            return((args, desc))
747        lines = self.comment.split('\n')
748        if lines[0] == '*':
749            del lines[0]
750        if lines[0] != "* %s:" % (name):
751            if not quiet:
752                self.warning("Misformatted macro comment for %s" % (name))
753                self.warning("  Expecting '* %s:' got '%s'" % (name, lines[0]))
754            return((args, desc))
755        del lines[0]
756        while lines[0] == '*':
757            del lines[0]
758        while len(lines) > 0 and lines[0][0:3] == '* @':
759            l = lines[0][3:]
760            try:
761                (arg, desc) = l.split(':', 1)
762                desc=desc.strip()
763                arg=arg.strip()
764            except:
765                if not quiet:
766                    self.warning("Misformatted macro comment for %s" % (name))
767                    self.warning("  problem with '%s'" % (lines[0]))
768                del lines[0]
769                continue
770            del lines[0]
771            l = lines[0].strip()
772            while len(l) > 2 and l[0:3] != '* @':
773                while l[0] == '*':
774                    l = l[1:]
775                desc = desc + ' ' + l.strip()
776                del lines[0]
777                if len(lines) == 0:
778                    break
779                l = lines[0]
780            args.append((arg, desc))
781        while len(lines) > 0 and lines[0] == '*':
782            del lines[0]
783        desc = ""
784        while len(lines) > 0:
785            l = lines[0]
786            while len(l) > 0 and l[0] == '*':
787                l = l[1:]
788            l = l.strip()
789            desc = desc + " " + l
790            del lines[0]
791
792        desc = desc.strip()
793
794        if quiet == 0:
795            if desc == "":
796                self.warning("Macro comment for %s lack description of the macro" % (name))
797
798        return((args, desc))
799
800     #
801     # Parse a comment block and merge the information found in the
802     # parameters descriptions, finally returns a block as complete
803     # as possible
804     #
805    def mergeFunctionComment(self, name, description, quiet = 0):
806        if name == 'main':
807            quiet = 1
808        if name[0:2] == '__':
809            quiet = 1
810
811        (ret, args) = description
812        desc = ""
813        retdesc = ""
814
815        if self.comment == None:
816            if not quiet:
817                self.warning("Missing comment for function %s" % (name))
818            return(((ret[0], retdesc), args, desc))
819        if self.comment[0] != '*':
820            if not quiet:
821                self.warning("Missing * in function comment for %s" % (name))
822            return(((ret[0], retdesc), args, desc))
823        lines = self.comment.split('\n')
824        if lines[0] == '*':
825            del lines[0]
826        if lines[0] != "* %s:" % (name):
827            if not quiet:
828                self.warning("Misformatted function comment for %s" % (name))
829                self.warning("  Expecting '* %s:' got '%s'" % (name, lines[0]))
830            return(((ret[0], retdesc), args, desc))
831        del lines[0]
832        while lines[0] == '*':
833            del lines[0]
834        nbargs = len(args)
835        while len(lines) > 0 and lines[0][0:3] == '* @':
836            l = lines[0][3:]
837            try:
838                (arg, desc) = l.split(':', 1)
839                desc=desc.strip()
840                arg=arg.strip()
841            except:
842                if not quiet:
843                    self.warning("Misformatted function comment for %s" % (name))
844                    self.warning("  problem with '%s'" % (lines[0]))
845                del lines[0]
846                continue
847            del lines[0]
848            l = lines[0].strip()
849            while len(l) > 2 and l[0:3] != '* @':
850                while l[0] == '*':
851                    l = l[1:]
852                desc = desc + ' ' + l.strip()
853                del lines[0]
854                if len(lines) == 0:
855                    break
856                l = lines[0]
857            i = 0
858            while i < nbargs:
859                if args[i][1] == arg:
860                    args[i] = (args[i][0], arg, desc)
861                    break;
862                i = i + 1
863            if i >= nbargs:
864                if not quiet:
865                    self.warning("Unable to find arg %s from function comment for %s" % (
866                       arg, name))
867        while len(lines) > 0 and lines[0] == '*':
868            del lines[0]
869        desc = ""
870        while len(lines) > 0:
871            l = lines[0]
872            while len(l) > 0 and l[0] == '*':
873                l = l[1:]
874            l = l.strip()
875            if len(l) >= 6 and  l[0:6] == "return" or l[0:6] == "Return":
876                try:
877                    l = l.split(' ', 1)[1]
878                except:
879                    l = ""
880                retdesc = l.strip()
881                del lines[0]
882                while len(lines) > 0:
883                    l = lines[0]
884                    while len(l) > 0 and l[0] == '*':
885                        l = l[1:]
886                    l = l.strip()
887                    retdesc = retdesc + " " + l
888                    del lines[0]
889            else:
890                desc = desc + " " + l
891                del lines[0]
892
893        retdesc = retdesc.strip()
894        desc = desc.strip()
895
896        if quiet == 0:
897             #
898             # report missing comments
899             #
900            i = 0
901            while i < nbargs:
902                if args[i][2] == None and args[i][0] != "void" and \
903                   ((args[i][1] != None) or (args[i][1] == '')):
904                    self.warning("Function comment for %s lacks description of arg %s" % (name, args[i][1]))
905                i = i + 1
906            if retdesc == "" and ret[0] != "void":
907                self.warning("Function comment for %s lacks description of return value" % (name))
908            if desc == "" and retdesc == "":
909                self.warning("Function comment for %s lacks description of the function" % (name))
910
911        return(((ret[0], retdesc), args, desc))
912
913    def parsePreproc(self, token):
914        if debug:
915            print("=> preproc ", token, self.lexer.tokens)
916        name = token[1]
917        if name == "#include":
918            token = self.lexer.token()
919            if token == None:
920                return None
921            if token[0] == 'preproc':
922                self.index_add(token[1], self.filename, not self.is_header,
923                                "include")
924                return self.lexer.token()
925            return token
926        if name == "#define":
927            token = self.lexer.token()
928            if token == None:
929                return None
930            if token[0] == 'preproc':
931                 # TODO macros with arguments
932                name = token[1]
933                lst = []
934                token = self.lexer.token()
935                while token != None and token[0] == 'preproc' and \
936                      token[1][0] != '#':
937                    lst.append(token[1])
938                    token = self.lexer.token()
939                try:
940                    name = name.split('(') [0]
941                except:
942                    pass
943                info = self.parseMacroComment(name, True)
944                self.index_add(name, self.filename, not self.is_header,
945                                "macro", info)
946                return token
947
948        #
949        # Processing of conditionals modified by Bill 1/1/05
950        #
951        # We process conditionals (i.e. tokens from #ifdef, #ifndef,
952        # #if, #else and #endif) for headers and mainline code,
953        # store the ones from the header in libxml2-api.xml, and later
954        # (in the routine merge_public) verify that the two (header and
955        # mainline code) agree.
956        #
957        # There is a small problem with processing the headers. Some of
958        # the variables are not concerned with enabling / disabling of
959        # library functions (e.g. '__XML_PARSER_H__'), and we don't want
960        # them to be included in libxml2-api.xml, or involved in
961        # the check between the header and the mainline code.  To
962        # accomplish this, we ignore any conditional which doesn't include
963        # the string 'ENABLED'
964        #
965        if name == "#ifdef":
966            apstr = self.lexer.tokens[0][1]
967            try:
968                self.defines.append(apstr)
969                if apstr.find('ENABLED') != -1:
970                    self.conditionals.append("defined(%s)" % apstr)
971            except:
972                pass
973        elif name == "#ifndef":
974            apstr = self.lexer.tokens[0][1]
975            try:
976                self.defines.append(apstr)
977                if apstr.find('ENABLED') != -1:
978                    self.conditionals.append("!defined(%s)" % apstr)
979            except:
980                pass
981        elif name == "#if":
982            apstr = ""
983            for tok in self.lexer.tokens:
984                if apstr != "":
985                    apstr = apstr + " "
986                apstr = apstr + tok[1]
987            try:
988                self.defines.append(apstr)
989                if apstr.find('ENABLED') != -1:
990                    self.conditionals.append(apstr)
991            except:
992                pass
993        elif name == "#else":
994            if self.conditionals != [] and \
995               self.defines[-1].find('ENABLED') != -1:
996                self.conditionals[-1] = "!(%s)" % self.conditionals[-1]
997        elif name == "#endif":
998            if self.conditionals != [] and \
999               self.defines[-1].find('ENABLED') != -1:
1000                self.conditionals = self.conditionals[:-1]
1001            self.defines = self.defines[:-1]
1002        token = self.lexer.token()
1003        while token != None and token[0] == 'preproc' and \
1004            token[1][0] != '#':
1005            token = self.lexer.token()
1006        return token
1007
1008     #
1009     # token acquisition on top of the lexer, it handle internally
1010     # preprocessor and comments since they are logically not part of
1011     # the program structure.
1012     #
1013    def token(self):
1014        global ignored_words
1015
1016        token = self.lexer.token()
1017        while token != None:
1018            if token[0] == 'comment':
1019                token = self.parseComment(token)
1020                continue
1021            elif token[0] == 'preproc':
1022                token = self.parsePreproc(token)
1023                continue
1024            elif token[0] == "name" and token[1] == "__const":
1025                token = ("name", "const")
1026                return token
1027            elif token[0] == "name" and token[1] == "__attribute":
1028                token = self.lexer.token()
1029                while token != None and token[1] != ";":
1030                    token = self.lexer.token()
1031                return token
1032            elif token[0] == "name" and token[1] in ignored_words:
1033                (n, info) = ignored_words[token[1]]
1034                i = 0
1035                while i < n:
1036                    token = self.lexer.token()
1037                    i = i + 1
1038                token = self.lexer.token()
1039                continue
1040            else:
1041                if debug:
1042                    print("=> ", token)
1043                return token
1044        return None
1045
1046     #
1047     # Parse a typedef, it records the type and its name.
1048     #
1049    def parseTypedef(self, token):
1050        if token == None:
1051            return None
1052        token = self.parseType(token)
1053        if token == None:
1054            self.error("parsing typedef")
1055            return None
1056        base_type = self.type
1057        type = base_type
1058         #self.debug("end typedef type", token)
1059        while token != None:
1060            if token[0] == "name":
1061                name = token[1]
1062                signature = self.signature
1063                if signature != None:
1064                    type = type.split('(')[0]
1065                    d = self.mergeFunctionComment(name,
1066                            ((type, None), signature), 1)
1067                    self.index_add(name, self.filename, not self.is_header,
1068                                    "functype", d)
1069                else:
1070                    if base_type == "struct":
1071                        self.index_add(name, self.filename, not self.is_header,
1072                                        "struct", type)
1073                        base_type = "struct " + name
1074                    else:
1075                        # TODO report missing or misformatted comments
1076                        info = self.parseSimpleComment(name, True)
1077                        self.index_add(name, self.filename, not self.is_header,
1078                                    "typedef", type, info)
1079                token = self.token()
1080            else:
1081                self.error("parsing typedef: expecting a name")
1082                return token
1083             #self.debug("end typedef", token)
1084            if token != None and token[0] == 'sep' and token[1] == ',':
1085                type = base_type
1086                token = self.token()
1087                while token != None and token[0] == "op":
1088                    type = type + token[1]
1089                    token = self.token()
1090            elif token != None and token[0] == 'sep' and token[1] == ';':
1091                break;
1092            elif token != None and token[0] == 'name':
1093                type = base_type
1094                continue;
1095            else:
1096                self.error("parsing typedef: expecting ';'", token)
1097                return token
1098        token = self.token()
1099        return token
1100
1101     #
1102     # Parse a C code block, used for functions it parse till
1103     # the balancing } included
1104     #
1105    def parseBlock(self, token):
1106        while token != None:
1107            if token[0] == "sep" and token[1] == "{":
1108                token = self.token()
1109                token = self.parseBlock(token)
1110            elif token[0] == "sep" and token[1] == "}":
1111                token = self.token()
1112                return token
1113            else:
1114                if self.collect_ref == 1:
1115                    oldtok = token
1116                    token = self.token()
1117                    if oldtok[0] == "name" and oldtok[1][0:3] == "xml":
1118                        if token[0] == "sep" and token[1] == "(":
1119                            self.index_add_ref(oldtok[1], self.filename,
1120                                                0, "function")
1121                            token = self.token()
1122                        elif token[0] == "name":
1123                            token = self.token()
1124                            if token[0] == "sep" and (token[1] == ";" or
1125                               token[1] == "," or token[1] == "="):
1126                                self.index_add_ref(oldtok[1], self.filename,
1127                                                    0, "type")
1128                    elif oldtok[0] == "name" and oldtok[1][0:4] == "XML_":
1129                        self.index_add_ref(oldtok[1], self.filename,
1130                                            0, "typedef")
1131                    elif oldtok[0] == "name" and oldtok[1][0:7] == "LIBXML_":
1132                        self.index_add_ref(oldtok[1], self.filename,
1133                                            0, "typedef")
1134
1135                else:
1136                    token = self.token()
1137        return token
1138
1139     #
1140     # Parse a C struct definition till the balancing }
1141     #
1142    def parseStruct(self, token):
1143        fields = []
1144         #self.debug("start parseStruct", token)
1145        while token != None:
1146            if token[0] == "sep" and token[1] == "{":
1147                token = self.token()
1148                token = self.parseTypeBlock(token)
1149            elif token[0] == "sep" and token[1] == "}":
1150                self.struct_fields = fields
1151                 #self.debug("end parseStruct", token)
1152                 #print fields
1153                token = self.token()
1154                return token
1155            else:
1156                base_type = self.type
1157                 #self.debug("before parseType", token)
1158                token = self.parseType(token)
1159                 #self.debug("after parseType", token)
1160                if token != None and token[0] == "name":
1161                    fname = token[1]
1162                    token = self.token()
1163                    if token[0] == "sep" and token[1] == ";":
1164                        self.comment = None
1165                        token = self.token()
1166                        fields.append((self.type, fname, self.comment))
1167                        self.comment = None
1168                    else:
1169                        self.error("parseStruct: expecting ;", token)
1170                elif token != None and token[0] == "sep" and token[1] == "{":
1171                    token = self.token()
1172                    token = self.parseTypeBlock(token)
1173                    if token != None and token[0] == "name":
1174                        token = self.token()
1175                    if token != None and token[0] == "sep" and token[1] == ";":
1176                        token = self.token()
1177                    else:
1178                        self.error("parseStruct: expecting ;", token)
1179                else:
1180                    self.error("parseStruct: name", token)
1181                    token = self.token()
1182                self.type = base_type;
1183        self.struct_fields = fields
1184         #self.debug("end parseStruct", token)
1185         #print fields
1186        return token
1187
1188     #
1189     # Parse a C enum block, parse till the balancing }
1190     #
1191    def parseEnumBlock(self, token):
1192        self.enums = []
1193        name = None
1194        self.comment = None
1195        comment = ""
1196        value = "0"
1197        while token != None:
1198            if token[0] == "sep" and token[1] == "{":
1199                token = self.token()
1200                token = self.parseTypeBlock(token)
1201            elif token[0] == "sep" and token[1] == "}":
1202                if name != None:
1203                    if self.comment != None:
1204                        comment = self.comment
1205                        self.comment = None
1206                    self.enums.append((name, value, comment))
1207                token = self.token()
1208                return token
1209            elif token[0] == "name":
1210                    if name != None:
1211                        if self.comment != None:
1212                            comment = self.comment.strip()
1213                            self.comment = None
1214                        self.enums.append((name, value, comment))
1215                    name = token[1]
1216                    comment = ""
1217                    token = self.token()
1218                    if token[0] == "op" and token[1][0] == "=":
1219                        value = ""
1220                        if len(token[1]) > 1:
1221                            value = token[1][1:]
1222                        token = self.token()
1223                        while token[0] != "sep" or (token[1] != ',' and
1224                              token[1] != '}'):
1225                            value = value + token[1]
1226                            token = self.token()
1227                    else:
1228                        try:
1229                            value = "%d" % (int(value) + 1)
1230                        except:
1231                            self.warning("Failed to compute value of enum %s" % (name))
1232                            value=""
1233                    if token[0] == "sep" and token[1] == ",":
1234                        token = self.token()
1235            else:
1236                token = self.token()
1237        return token
1238
1239     #
1240     # Parse a C definition block, used for structs it parse till
1241     # the balancing }
1242     #
1243    def parseTypeBlock(self, token):
1244        while token != None:
1245            if token[0] == "sep" and token[1] == "{":
1246                token = self.token()
1247                token = self.parseTypeBlock(token)
1248            elif token[0] == "sep" and token[1] == "}":
1249                token = self.token()
1250                return token
1251            else:
1252                token = self.token()
1253        return token
1254
1255     #
1256     # Parse a type: the fact that the type name can either occur after
1257     #    the definition or within the definition makes it a little harder
1258     #    if inside, the name token is pushed back before returning
1259     #
1260    def parseType(self, token):
1261        self.type = ""
1262        self.struct_fields = []
1263        self.signature = None
1264        if token == None:
1265            return token
1266
1267        have_sign = 0
1268        done = 0
1269
1270        while token[0] == "name" and (
1271              token[1] == "const" or \
1272              token[1] == "unsigned" or \
1273              token[1] == "signed"):
1274            if token[1] == "unsigned" or token[1] == "signed":
1275                have_sign = 1
1276            if self.type == "":
1277                self.type = token[1]
1278            else:
1279                self.type = self.type + " " + token[1]
1280            token = self.token()
1281
1282        if token[0] == "name" and token[1] in ("char", "short", "int", "long"):
1283            if self.type == "":
1284                self.type = token[1]
1285            else:
1286                self.type = self.type + " " + token[1]
1287
1288        elif have_sign:
1289            done = 1
1290
1291        elif token[0] == "name" and token[1] == "struct":
1292            if self.type == "":
1293                self.type = token[1]
1294            else:
1295                self.type = self.type + " " + token[1]
1296            token = self.token()
1297            nametok = None
1298            if token[0] == "name":
1299                nametok = token
1300                token = self.token()
1301            if token != None and token[0] == "sep" and token[1] == "{":
1302                token = self.token()
1303                token = self.parseStruct(token)
1304            elif token != None and token[0] == "op" and token[1] == "*":
1305                self.type = self.type + " " + nametok[1] + " *"
1306                token = self.token()
1307                while token != None and token[0] == "op" and token[1] == "*":
1308                    self.type = self.type + " *"
1309                    token = self.token()
1310                if token[0] == "name":
1311                    nametok = token
1312                    token = self.token()
1313                else:
1314                    self.error("struct : expecting name", token)
1315                    return token
1316            elif token != None and token[0] == "name" and nametok != None:
1317                self.type = self.type + " " + nametok[1]
1318                return token
1319
1320            if nametok != None:
1321                self.lexer.push(token)
1322                token = nametok
1323            return token
1324
1325        elif token[0] == "name" and token[1] == "enum":
1326            if self.type == "":
1327                self.type = token[1]
1328            else:
1329                self.type = self.type + " " + token[1]
1330            self.enums = []
1331            token = self.token()
1332            if token != None and token[0] == "sep" and token[1] == "{":
1333                token = self.token()
1334                token = self.parseEnumBlock(token)
1335            else:
1336                self.error("parsing enum: expecting '{'", token)
1337            enum_type = None
1338            if token != None and token[0] != "name":
1339                self.lexer.push(token)
1340                token = ("name", "enum")
1341            else:
1342                enum_type = token[1]
1343            for enum in self.enums:
1344                self.index_add(enum[0], self.filename,
1345                               not self.is_header, "enum",
1346                               (enum[1], enum[2], enum_type))
1347            return token
1348
1349        elif token[0] == "name":
1350            if self.type == "":
1351                self.type = token[1]
1352            else:
1353                self.type = self.type + " " + token[1]
1354        else:
1355            self.error("parsing type %s: expecting a name" % (self.type),
1356                       token)
1357            return token
1358        if not done:
1359            token = self.token()
1360        while token != None and (token[0] == "op" or
1361              token[0] == "name" and token[1] == "const"):
1362            self.type = self.type + " " + token[1]
1363            token = self.token()
1364
1365         #
1366         # if there is a parenthesis here, this means a function type
1367         #
1368        if token != None and token[0] == "sep" and token[1] == '(':
1369            self.type = self.type + token[1]
1370            token = self.token()
1371            while token != None and token[0] == "op" and token[1] == '*':
1372                self.type = self.type + token[1]
1373                token = self.token()
1374            if token == None or token[0] != "name" :
1375                self.error("parsing function type, name expected", token);
1376                return token
1377            self.type = self.type + token[1]
1378            nametok = token
1379            token = self.token()
1380            if token != None and token[0] == "sep" and token[1] == ')':
1381                self.type = self.type + token[1]
1382                token = self.token()
1383                if token != None and token[0] == "sep" and token[1] == '(':
1384                    token = self.token()
1385                    type = self.type;
1386                    token = self.parseSignature(token);
1387                    self.type = type;
1388                else:
1389                    self.error("parsing function type, '(' expected", token);
1390                    return token
1391            else:
1392                self.error("parsing function type, ')' expected", token);
1393                return token
1394            self.lexer.push(token)
1395            token = nametok
1396            return token
1397
1398         #
1399         # do some lookahead for arrays
1400         #
1401        if token != None and token[0] == "name":
1402            nametok = token
1403            token = self.token()
1404            if token != None and token[0] == "sep" and token[1] == '[':
1405                self.type = self.type + nametok[1]
1406                while token != None and token[0] == "sep" and token[1] == '[':
1407                    self.type = self.type + token[1]
1408                    token = self.token()
1409                    while token != None and token[0] != 'sep' and \
1410                          token[1] != ']' and token[1] != ';':
1411                        self.type = self.type + token[1]
1412                        token = self.token()
1413                if token != None and token[0] == 'sep' and token[1] == ']':
1414                    self.type = self.type + token[1]
1415                    token = self.token()
1416                else:
1417                    self.error("parsing array type, ']' expected", token);
1418                    return token
1419            elif token != None and token[0] == "sep" and token[1] == ':':
1420                 # remove :12 in case it's a limited int size
1421                token = self.token()
1422                token = self.token()
1423            self.lexer.push(token)
1424            token = nametok
1425
1426        return token
1427
1428     #
1429     # Parse a signature: '(' has been parsed and we scan the type definition
1430     #    up to the ')' included
1431    def parseSignature(self, token):
1432        signature = []
1433        if token != None and token[0] == "sep" and token[1] == ')':
1434            self.signature = []
1435            token = self.token()
1436            return token
1437        while token != None:
1438            token = self.parseType(token)
1439            if token != None and token[0] == "name":
1440                signature.append((self.type, token[1], None))
1441                token = self.token()
1442            elif token != None and token[0] == "sep" and token[1] == ',':
1443                token = self.token()
1444                continue
1445            elif token != None and token[0] == "sep" and token[1] == ')':
1446                 # only the type was provided
1447                if self.type == "...":
1448                    signature.append((self.type, "...", None))
1449                else:
1450                    signature.append((self.type, None, None))
1451            if token != None and token[0] == "sep":
1452                if token[1] == ',':
1453                    token = self.token()
1454                    continue
1455                elif token[1] == ')':
1456                    token = self.token()
1457                    break
1458        self.signature = signature
1459        return token
1460
1461     #
1462     # Parse a global definition, be it a type, variable or function
1463     # the extern "C" blocks are a bit nasty and require it to recurse.
1464     #
1465    def parseGlobal(self, token):
1466        static = 0
1467        if token[1] == 'extern':
1468            token = self.token()
1469            if token == None:
1470                return token
1471            if token[0] == 'string':
1472                if token[1] == 'C':
1473                    token = self.token()
1474                    if token == None:
1475                        return token
1476                    if token[0] == 'sep' and token[1] == "{":
1477                        token = self.token()
1478#                         print 'Entering extern "C line ', self.lineno()
1479                        while token != None and (token[0] != 'sep' or
1480                              token[1] != "}"):
1481                            if token[0] == 'name':
1482                                token = self.parseGlobal(token)
1483                            else:
1484                                self.error(
1485                                 "token %s %s unexpected at the top level" % (
1486                                        token[0], token[1]))
1487                                token = self.parseGlobal(token)
1488#                         print 'Exiting extern "C" line', self.lineno()
1489                        token = self.token()
1490                        return token
1491                else:
1492                    return token
1493        elif token[1] == 'static':
1494            static = 1
1495            token = self.token()
1496            if token == None or  token[0] != 'name':
1497                return token
1498
1499        if token[1] == 'typedef':
1500            token = self.token()
1501            return self.parseTypedef(token)
1502        else:
1503            token = self.parseType(token)
1504            type_orig = self.type
1505        if token == None or token[0] != "name":
1506            return token
1507        type = type_orig
1508        self.name = token[1]
1509        token = self.token()
1510        while token != None and (token[0] == "sep" or token[0] == "op"):
1511            if token[0] == "sep":
1512                if token[1] == "[":
1513                    type = type + token[1]
1514                    token = self.token()
1515                    while token != None and (token[0] != "sep" or \
1516                          token[1] != ";"):
1517                        type = type + token[1]
1518                        token = self.token()
1519
1520            if token != None and token[0] == "op" and token[1] == "=":
1521                 #
1522                 # Skip the initialization of the variable
1523                 #
1524                token = self.token()
1525                if token[0] == 'sep' and token[1] == '{':
1526                    token = self.token()
1527                    token = self.parseBlock(token)
1528                else:
1529                    while token != None and (token[0] != "sep" or \
1530                          (token[1] != ';' and token[1] != ',')):
1531                            token = self.token()
1532                if token == None or token[0] != "sep" or (token[1] != ';' and
1533                   token[1] != ','):
1534                    self.error("missing ';' or ',' after value")
1535
1536            if token != None and token[0] == "sep":
1537                if token[1] == ";":
1538                    if type == "struct":
1539                        self.index_add(self.name, self.filename,
1540                             not self.is_header, "struct", self.struct_fields)
1541                    else:
1542                        info = self.parseSimpleComment(self.name, True)
1543                        self.index_add(self.name, self.filename,
1544                             not self.is_header, "variable", type, info)
1545                    self.comment = None
1546                    token = self.token()
1547                    break
1548                elif token[1] == "(":
1549                    token = self.token()
1550                    token = self.parseSignature(token)
1551                    if token == None:
1552                        return None
1553                    if token[0] == "sep" and token[1] == ";":
1554                        d = self.mergeFunctionComment(self.name,
1555                                ((type, None), self.signature), 1)
1556                        self.index_add(self.name, self.filename, static,
1557                                        "function", d)
1558                        self.comment = None
1559                        token = self.token()
1560                    elif token[0] == "sep" and token[1] == "{":
1561                        d = self.mergeFunctionComment(self.name,
1562                                ((type, None), self.signature), static)
1563                        self.index_add(self.name, self.filename, static,
1564                                        "function", d)
1565                        self.comment = None
1566                        token = self.token()
1567                        token = self.parseBlock(token);
1568                elif token[1] == ',':
1569                    self.index_add(self.name, self.filename, static,
1570                                    "variable", type)
1571                    self.comment = None
1572                    type = type_orig
1573                    token = self.token()
1574                    while token != None and token[0] == "sep":
1575                        type = type + token[1]
1576                        token = self.token()
1577                    if token != None and token[0] == "name":
1578                        self.name = token[1]
1579                        token = self.token()
1580                else:
1581                    break
1582
1583        return token
1584
1585    def parse(self):
1586        self.warning("Parsing %s" % (self.filename))
1587        token = self.token()
1588        while token != None:
1589            if token[0] == 'name':
1590                token = self.parseGlobal(token)
1591            else:
1592                self.error("token %s %s unexpected at the top level" % (
1593                       token[0], token[1]))
1594                token = self.parseGlobal(token)
1595                return
1596        self.parseTopComment(self.top_comment)
1597        return self.index
1598
1599
1600class docBuilder:
1601    """A documentation builder"""
1602    def __init__(self, name, directories=['.'], excludes=[]):
1603        self.name = name
1604        self.directories = directories
1605        self.excludes = excludes + list(ignored_files.keys())
1606        self.modules = {}
1607        self.headers = {}
1608        self.idx = index()
1609        self.index = {}
1610        if name == 'libxml2':
1611            self.basename = 'libxml'
1612        else:
1613            self.basename = name
1614
1615    def analyze(self):
1616        print("Project %s : %d headers, %d modules" % (self.name, len(list(self.headers.keys())), len(list(self.modules.keys()))))
1617        self.idx.analyze()
1618
1619    def scanHeaders(self):
1620        for header in list(self.headers.keys()):
1621            parser = CParser(header)
1622            idx = parser.parse()
1623            self.headers[header] = idx;
1624            self.idx.merge(idx)
1625
1626    def scanModules(self):
1627        for module in list(self.modules.keys()):
1628            parser = CParser(module)
1629            idx = parser.parse()
1630            # idx.analyze()
1631            self.modules[module] = idx
1632            self.idx.merge_public(idx)
1633
1634    def scan(self):
1635        for directory in self.directories:
1636            files = glob.glob(directory + "/*.c")
1637            for file in files:
1638                skip = 0
1639                for excl in self.excludes:
1640                    if file.find(excl) != -1:
1641                        print("Skipping %s" % file)
1642                        skip = 1
1643                        break
1644                if skip == 0:
1645                    self.modules[file] = None;
1646            files = glob.glob(directory + "/*.h")
1647            for file in files:
1648                skip = 0
1649                for excl in self.excludes:
1650                    if file.find(excl) != -1:
1651                        print("Skipping %s" % file)
1652                        skip = 1
1653                        break
1654                if skip == 0:
1655                    self.headers[file] = None;
1656        self.scanHeaders()
1657        self.scanModules()
1658
1659    def modulename_file(self, file):
1660        module = os.path.basename(file)
1661        if module[-2:] == '.h':
1662            module = module[:-2]
1663        elif module[-2:] == '.c':
1664            module = module[:-2]
1665        return module
1666
1667    def serialize_enum(self, output, name):
1668        id = self.idx.enums[name]
1669        output.write("    <enum name='%s' file='%s'" % (name,
1670                     self.modulename_file(id.header)))
1671        if id.info != None:
1672            info = id.info
1673            if info[0] != None and info[0] != '':
1674                try:
1675                    val = eval(info[0])
1676                except:
1677                    val = info[0]
1678                output.write(" value='%s'" % (val));
1679            if info[2] != None and info[2] != '':
1680                output.write(" type='%s'" % info[2]);
1681            if info[1] != None and info[1] != '':
1682                output.write(" info='%s'" % escape(info[1]));
1683        output.write("/>\n")
1684
1685    def serialize_macro(self, output, name):
1686        id = self.idx.macros[name]
1687        output.write("    <macro name='%s' file='%s'>\n" % (name,
1688                     self.modulename_file(id.header)))
1689        if id.info != None:
1690            try:
1691                (args, desc) = id.info
1692                if desc != None and desc != "":
1693                    output.write("      <info>%s</info>\n" % (escape(desc)))
1694                for arg in args:
1695                    (name, desc) = arg
1696                    if desc != None and desc != "":
1697                        output.write("      <arg name='%s' info='%s'/>\n" % (
1698                                     name, escape(desc)))
1699                    else:
1700                        output.write("      <arg name='%s'/>\n" % (name))
1701            except:
1702                pass
1703        output.write("    </macro>\n")
1704
1705    def serialize_typedef(self, output, name):
1706        id = self.idx.typedefs[name]
1707        if id.info[0:7] == 'struct ':
1708            output.write("    <struct name='%s' file='%s' type='%s'" % (
1709                     name, self.modulename_file(id.header), id.info))
1710            name = id.info[7:]
1711            if name in self.idx.structs and ( \
1712               type(self.idx.structs[name].info) == type(()) or
1713                type(self.idx.structs[name].info) == type([])):
1714                output.write(">\n");
1715                try:
1716                    for field in self.idx.structs[name].info:
1717                        desc = field[2]
1718                        if desc == None:
1719                            desc = ''
1720                        else:
1721                            desc = escape(desc)
1722                        output.write("      <field name='%s' type='%s' info='%s'/>\n" % (field[1] , field[0], desc))
1723                except:
1724                    print("Failed to serialize struct %s" % (name))
1725                output.write("    </struct>\n")
1726            else:
1727                output.write("/>\n");
1728        else :
1729            output.write("    <typedef name='%s' file='%s' type='%s'" % (
1730                         name, self.modulename_file(id.header), id.info))
1731            try:
1732                desc = id.extra
1733                if desc != None and desc != "":
1734                    output.write(">\n      <info>%s</info>\n" % (escape(desc)))
1735                    output.write("    </typedef>\n")
1736                else:
1737                    output.write("/>\n")
1738            except:
1739                output.write("/>\n")
1740
1741    def serialize_variable(self, output, name):
1742        id = self.idx.variables[name]
1743        if id.info != None:
1744            output.write("    <variable name='%s' file='%s' type='%s'" % (
1745                    name, self.modulename_file(id.header), id.info))
1746        else:
1747            output.write("    <variable name='%s' file='%s'" % (
1748                    name, self.modulename_file(id.header)))
1749        desc = id.extra
1750        if desc != None and desc != "":
1751            output.write(">\n      <info>%s</info>\n" % (escape(desc)))
1752            output.write("    </variable>\n")
1753        else:
1754            output.write("/>\n")
1755
1756    def serialize_function(self, output, name):
1757        id = self.idx.functions[name]
1758        if name == debugsym:
1759            print("=>", id)
1760
1761        output.write("    <%s name='%s' file='%s' module='%s'>\n" % (id.type,
1762                     name, self.modulename_file(id.header),
1763                     self.modulename_file(id.module)))
1764        #
1765        # Processing of conditionals modified by Bill 1/1/05
1766        #
1767        if id.conditionals != None:
1768            apstr = ""
1769            for cond in id.conditionals:
1770                if apstr != "":
1771                    apstr = apstr + " &amp;&amp; "
1772                apstr = apstr + cond
1773            output.write("      <cond>%s</cond>\n"% (apstr));
1774        try:
1775            (ret, params, desc) = id.info
1776            if (desc == None or desc == '') and \
1777               name[0:9] != "xmlThrDef" and name != "xmlDllMain" and \
1778               ret[1] == '':
1779                print("%s %s from %s has no description" % (id.type, name,
1780                       self.modulename_file(id.module)))
1781
1782            output.write("      <info>%s</info>\n" % (escape(desc)))
1783            if ret[0] != None:
1784                if ret[0] == "void":
1785                    output.write("      <return type='void'/>\n")
1786                else:
1787                    output.write("      <return type='%s' info='%s'/>\n" % (
1788                             ret[0], escape(ret[1])))
1789            for param in params:
1790                if param[0] == 'void':
1791                    continue
1792                if param[2] == None:
1793                    output.write("      <arg name='%s' type='%s' info=''/>\n" % (param[1], param[0]))
1794                else:
1795                    output.write("      <arg name='%s' type='%s' info='%s'/>\n" % (param[1], param[0], escape(param[2])))
1796        except:
1797            print("Failed to save function %s info: " % name, repr(id.info))
1798        output.write("    </%s>\n" % (id.type))
1799
1800    def serialize_exports(self, output, file):
1801        module = self.modulename_file(file)
1802        output.write("    <file name='%s'>\n" % (module))
1803        dict = self.headers[file]
1804        if dict.info != None:
1805            for data in ('Summary', 'Description', 'Author'):
1806                try:
1807                    output.write("     <%s>%s</%s>\n" % (
1808                                 data.lower(),
1809                                 escape(dict.info[data]),
1810                                 data.lower()))
1811                except:
1812                    if data != 'Author':
1813                        print("Header %s lacks a %s description" % (module, data))
1814            if 'Description' in dict.info:
1815                desc = dict.info['Description']
1816                if desc.find("DEPRECATED") != -1:
1817                    output.write("     <deprecated/>\n")
1818
1819        ids = list(dict.macros.keys())
1820        ids.sort()
1821        for id in ids:
1822            # Macros are sometime used to masquerade other types.
1823            if id in dict.functions:
1824                continue
1825            if id in dict.variables:
1826                continue
1827            if id in dict.typedefs:
1828                continue
1829            if id in dict.structs:
1830                continue
1831            if id in dict.enums:
1832                continue
1833            output.write("     <exports symbol='%s' type='macro'/>\n" % (id))
1834        ids = list(dict.enums.keys())
1835        ids.sort()
1836        for id in ids:
1837            output.write("     <exports symbol='%s' type='enum'/>\n" % (id))
1838        ids = list(dict.typedefs.keys())
1839        ids.sort()
1840        for id in ids:
1841            output.write("     <exports symbol='%s' type='typedef'/>\n" % (id))
1842        ids = list(dict.structs.keys())
1843        ids.sort()
1844        for id in ids:
1845            output.write("     <exports symbol='%s' type='struct'/>\n" % (id))
1846        ids = list(dict.variables.keys())
1847        ids.sort()
1848        for id in ids:
1849            output.write("     <exports symbol='%s' type='variable'/>\n" % (id))
1850        ids = list(dict.functions.keys())
1851        ids.sort()
1852        for id in ids:
1853            output.write("     <exports symbol='%s' type='function'/>\n" % (id))
1854        output.write("    </file>\n")
1855
1856    def serialize(self):
1857        filename = "%s-api.xml" % self.name
1858        print("Saving XML description %s" % (filename))
1859        output = open(filename, "w")
1860        output.write('<?xml version="1.0" encoding="ISO-8859-1"?>\n')
1861        output.write("<api name='%s'>\n" % self.name)
1862        output.write("  <files>\n")
1863        headers = list(self.headers.keys())
1864        headers.sort()
1865        for file in headers:
1866            self.serialize_exports(output, file)
1867        output.write("  </files>\n")
1868        output.write("  <symbols>\n")
1869        macros = list(self.idx.macros.keys())
1870        macros.sort()
1871        for macro in macros:
1872            self.serialize_macro(output, macro)
1873        enums = list(self.idx.enums.keys())
1874        enums.sort()
1875        for enum in enums:
1876            self.serialize_enum(output, enum)
1877        typedefs = list(self.idx.typedefs.keys())
1878        typedefs.sort()
1879        for typedef in typedefs:
1880            self.serialize_typedef(output, typedef)
1881        variables = list(self.idx.variables.keys())
1882        variables.sort()
1883        for variable in variables:
1884            self.serialize_variable(output, variable)
1885        functions = list(self.idx.functions.keys())
1886        functions.sort()
1887        for function in functions:
1888            self.serialize_function(output, function)
1889        output.write("  </symbols>\n")
1890        output.write("</api>\n")
1891        output.close()
1892
1893
1894def rebuild():
1895    builder = None
1896    if glob.glob("parser.c") != [] :
1897        print("Rebuilding API description for libxml2")
1898        builder = docBuilder("libxml2", [".", "."],
1899                             ["tst.c"])
1900    elif glob.glob("../parser.c") != [] :
1901        print("Rebuilding API description for libxml2")
1902        builder = docBuilder("libxml2", ["..", "../include/libxml"],
1903                             ["tst.c"])
1904    elif glob.glob("../libxslt/transform.c") != [] :
1905        print("Rebuilding API description for libxslt")
1906        builder = docBuilder("libxslt", ["../libxslt"],
1907                             ["win32config.h", "libxslt.h", "tst.c"])
1908    else:
1909        print("rebuild() failed, unable to guess the module")
1910        return None
1911    builder.scan()
1912    builder.analyze()
1913    builder.serialize()
1914    if glob.glob("../libexslt/exslt.c") != [] :
1915        extra = docBuilder("libexslt", ["../libexslt"], ["libexslt.h"])
1916        extra.scan()
1917        extra.analyze()
1918        extra.serialize()
1919    return builder
1920
1921#
1922# for debugging the parser
1923#
1924def parse(filename):
1925    parser = CParser(filename)
1926    idx = parser.parse()
1927    return idx
1928
1929if __name__ == "__main__":
1930    if len(sys.argv) > 1:
1931        debug = 1
1932        parse(sys.argv[1])
1933    else:
1934        rebuild()
1935