• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/env python3
2#
3# This is the API builder, it parses the C sources and build the
4# API formal description in XML.
5#
6# See Copyright for the status of this software.
7#
8# daniel@veillard.com
9#
10import os, sys
11import string
12import glob
13
14debug=0
15#debugsym='ignorableWhitespaceSAXFunc'
16debugsym=None
17
18#
19# C parser analysis code
20#
21ignored_files = {
22  "trio": "too many non standard macros",
23  "trio.c": "too many non standard macros",
24  "trionan.c": "too many non standard macros",
25  "triostr.c": "too many non standard macros",
26  "config.h": "generated portability layer",
27  "libxml.h": "internal only",
28  "testOOM.c": "out of memory tester",
29  "testOOMlib.h": "out of memory tester",
30  "testOOMlib.c": "out of memory tester",
31  "rngparser.c": "not yet integrated",
32  "testModule.c": "test tool",
33  "testThreads.c": "test tool",
34  "testapi.c": "generated regression tests",
35  "runtest.c": "regression tests program",
36  "runsuite.c": "regression tests program",
37  "tst.c": "not part of the library",
38  "test.c": "not part of the library",
39  "testdso.c": "test for dynamid shared libraries",
40  "testrecurse.c": "test for entities recursions",
41  "timsort.h": "Internal header only for xpath.c 2.9.0",
42}
43
44ignored_words = {
45  "WINAPI": (0, "Windows keyword"),
46  "LIBXML_DLL_IMPORT": (0, "Special macro to flag external keywords"),
47  "XMLPUBVAR": (0, "Special macro for extern vars for win32"),
48  "XSLTPUBVAR": (0, "Special macro for extern vars for win32"),
49  "EXSLTPUBVAR": (0, "Special macro for extern vars for win32"),
50  "XMLPUBFUN": (0, "Special macro for extern funcs for win32"),
51  "XSLTPUBFUN": (0, "Special macro for extern funcs for win32"),
52  "EXSLTPUBFUN": (0, "Special macro for extern funcs for win32"),
53  "XSLTCALL": (0, "Special macro for win32 calls"),
54  "EXSLTCALL": (0, "Special macro for win32 calls"),
55  "__declspec": (3, "Windows keyword"),
56  "__stdcall": (0, "Windows keyword"),
57  "ATTRIBUTE_UNUSED": (0, "macro keyword"),
58  "ATTRIBUTE_DESTRUCTOR": (0, "macro keyword"),
59  "LIBEXSLT_PUBLIC": (0, "macro keyword"),
60  "X_IN_Y": (5, "macro function builder"),
61  "ATTRIBUTE_ALLOC_SIZE": (3, "macro for gcc checking extension"),
62  "ATTRIBUTE_PRINTF": (5, "macro for gcc printf args checking extension"),
63  "LIBXML_ATTR_FORMAT": (5, "macro for gcc printf args checking extension"),
64  "LIBXML_ATTR_ALLOC_SIZE": (3, "macro for gcc checking extension"),
65  "ATTRIBUTE_NO_SANITIZE": (3, "macro keyword"),
66  "XML_DEPRECATED": (0, "macro keyword"),
67}
68
69def escape(raw):
70    raw = raw.replace('&', '&')
71    raw = raw.replace('<', '&lt;')
72    raw = raw.replace('>', '&gt;')
73    raw = raw.replace("'", '&apos;')
74    raw = raw.replace('"', '&quot;')
75    return raw
76
77class identifier:
78    def __init__(self, name, header=None, module=None, type=None, lineno = 0,
79                 info=None, extra=None, conditionals = None):
80        self.name = name
81        self.header = header
82        self.module = module
83        self.type = type
84        self.info = info
85        self.extra = extra
86        self.lineno = lineno
87        self.static = 0
88        if conditionals == None or len(conditionals) == 0:
89            self.conditionals = None
90        else:
91            self.conditionals = conditionals[:]
92        if self.name == debugsym:
93            print("=> define %s : %s" % (debugsym, (module, type, info,
94                                         extra, conditionals)))
95
96    def __repr__(self):
97        r = "%s %s:" % (self.type, self.name)
98        if self.static:
99            r = r + " static"
100        if self.module != None:
101            r = r + " from %s" % (self.module)
102        if self.info != None:
103            r = r + " " +  repr(self.info)
104        if self.extra != None:
105            r = r + " " + repr(self.extra)
106        if self.conditionals != None:
107            r = r + " " + repr(self.conditionals)
108        return r
109
110
111    def set_header(self, header):
112        self.header = header
113    def set_module(self, module):
114        self.module = module
115    def set_type(self, type):
116        self.type = type
117    def set_info(self, info):
118        self.info = info
119    def set_extra(self, extra):
120        self.extra = extra
121    def set_lineno(self, lineno):
122        self.lineno = lineno
123    def set_static(self, static):
124        self.static = static
125    def set_conditionals(self, conditionals):
126        if conditionals == None or len(conditionals) == 0:
127            self.conditionals = None
128        else:
129            self.conditionals = conditionals[:]
130
131    def get_name(self):
132        return self.name
133    def get_header(self):
134        return self.module
135    def get_module(self):
136        return self.module
137    def get_type(self):
138        return self.type
139    def get_info(self):
140        return self.info
141    def get_lineno(self):
142        return self.lineno
143    def get_extra(self):
144        return self.extra
145    def get_static(self):
146        return self.static
147    def get_conditionals(self):
148        return self.conditionals
149
150    def update(self, header, module, type = None, info = None, extra=None,
151               conditionals=None):
152        if self.name == debugsym:
153            print("=> update %s : %s" % (debugsym, (module, type, info,
154                                         extra, conditionals)))
155        if header != None and self.header == None:
156            self.set_header(module)
157        if module != None and (self.module == None or self.header == self.module):
158            self.set_module(module)
159        if type != None and self.type == None:
160            self.set_type(type)
161        if info != None:
162            self.set_info(info)
163        if extra != None:
164            self.set_extra(extra)
165        if conditionals != None:
166            self.set_conditionals(conditionals)
167
168class index:
169    def __init__(self, name = "noname"):
170        self.name = name
171        self.identifiers = {}
172        self.functions = {}
173        self.variables = {}
174        self.includes = {}
175        self.structs = {}
176        self.enums = {}
177        self.typedefs = {}
178        self.macros = {}
179        self.references = {}
180        self.info = {}
181
182    def add_ref(self, name, header, module, static, type, lineno, info=None, extra=None, conditionals = None):
183        if name[0:2] == '__':
184            return None
185        d = None
186        if name in self.identifiers:
187           d = self.identifiers[name]
188           d.update(header, module, type, info, extra, conditionals)
189        else:
190           d = identifier(name, header, module, type, lineno, info, extra, conditionals)
191           self.identifiers[name] = d
192
193        if d != None and static == 1:
194            d.set_static(1)
195
196        if d != None and name != None and type != None:
197            self.references[name] = d
198
199        if name == debugsym:
200            print("New ref: %s" % (d))
201
202        return d
203
204    def add(self, name, header, module, static, type, lineno, info=None, extra=None, conditionals = None):
205        if name[0:2] == '__':
206            return None
207        d = None
208        if name in self.identifiers:
209           d = self.identifiers[name]
210           d.update(header, module, type, info, extra, conditionals)
211        else:
212           d = identifier(name, header, module, type, lineno, info, extra, conditionals)
213           self.identifiers[name] = d
214
215        if d != None and static == 1:
216            d.set_static(1)
217
218        if d != None and name != None and type != None:
219            if type == "function":
220                self.functions[name] = d
221            elif type == "functype":
222                self.functions[name] = d
223            elif type == "variable":
224                self.variables[name] = d
225            elif type == "include":
226                self.includes[name] = d
227            elif type == "struct":
228                self.structs[name] = d
229            elif type == "enum":
230                self.enums[name] = d
231            elif type == "typedef":
232                self.typedefs[name] = d
233            elif type == "macro":
234                self.macros[name] = d
235            else:
236                print("Unable to register type ", type)
237
238        if name == debugsym:
239            print("New symbol: %s" % (d))
240
241        return d
242
243    def merge(self, idx):
244        for id in list(idx.functions.keys()):
245              #
246              # macro might be used to override functions or variables
247              # definitions
248              #
249             if id in self.macros:
250                 del self.macros[id]
251             if id in self.functions:
252                 print("function %s from %s redeclared in %s" % (
253                    id, self.functions[id].header, idx.functions[id].header))
254             else:
255                 self.functions[id] = idx.functions[id]
256                 self.identifiers[id] = idx.functions[id]
257        for id in list(idx.variables.keys()):
258              #
259              # macro might be used to override functions or variables
260              # definitions
261              #
262             if id in self.macros:
263                 del self.macros[id]
264             if id in self.variables:
265                 print("variable %s from %s redeclared in %s" % (
266                    id, self.variables[id].header, idx.variables[id].header))
267             else:
268                 self.variables[id] = idx.variables[id]
269                 self.identifiers[id] = idx.variables[id]
270        for id in list(idx.structs.keys()):
271             if id in self.structs:
272                 print("struct %s from %s redeclared in %s" % (
273                    id, self.structs[id].header, idx.structs[id].header))
274             else:
275                 self.structs[id] = idx.structs[id]
276                 self.identifiers[id] = idx.structs[id]
277        for id in list(idx.typedefs.keys()):
278             if id in self.typedefs:
279                 print("typedef %s from %s redeclared in %s" % (
280                    id, self.typedefs[id].header, idx.typedefs[id].header))
281             else:
282                 self.typedefs[id] = idx.typedefs[id]
283                 self.identifiers[id] = idx.typedefs[id]
284        for id in list(idx.macros.keys()):
285              #
286              # macro might be used to override functions or variables
287              # definitions
288              #
289             if id in self.variables:
290                 continue
291             if id in self.functions:
292                 continue
293             if id in self.enums:
294                 continue
295             if id in self.macros:
296                 print("macro %s from %s redeclared in %s" % (
297                    id, self.macros[id].header, idx.macros[id].header))
298             else:
299                 self.macros[id] = idx.macros[id]
300                 self.identifiers[id] = idx.macros[id]
301        for id in list(idx.enums.keys()):
302             if id in self.enums:
303                 print("enum %s from %s redeclared in %s" % (
304                    id, self.enums[id].header, idx.enums[id].header))
305             else:
306                 self.enums[id] = idx.enums[id]
307                 self.identifiers[id] = idx.enums[id]
308
309    def merge_public(self, idx):
310        for id in list(idx.functions.keys()):
311             if id in self.functions:
312                 # check that function condition agrees with header
313                 if idx.functions[id].conditionals != \
314                    self.functions[id].conditionals:
315                     print("Header condition differs from Function for %s:" \
316                        % id)
317                     print("  H: %s" % self.functions[id].conditionals)
318                     print("  C: %s" % idx.functions[id].conditionals)
319                 up = idx.functions[id]
320                 self.functions[id].update(None, up.module, up.type, up.info, up.extra)
321         #     else:
322         #         print "Function %s from %s is not declared in headers" % (
323         #                id, idx.functions[id].module)
324
325        for id in list(idx.variables.keys()):
326            if id in self.variables:
327                # check that variable condition agrees with header
328                # TODO: produces many false positives
329                #if idx.variables[id].conditionals != \
330                #   self.variables[id].conditionals:
331                #    print("Header condition differs from Variable for %s:" \
332                #       % id)
333                #    print("  H: %s" % self.variables[id].conditionals)
334                #    print("  C: %s" % idx.variables[id].conditionals)
335                up = idx.variables[id]
336                self.variables[id].update(None, up.module, up.type, up.info, up.extra)
337
338    def analyze_dict(self, type, dict):
339        count = 0
340        public = 0
341        for name in list(dict.keys()):
342            id = dict[name]
343            count = count + 1
344            if id.static == 0:
345                public = public + 1
346        if count != public:
347            print("  %d %s , %d public" % (count, type, public))
348        elif count != 0:
349            print("  %d public %s" % (count, type))
350
351
352    def analyze(self):
353        self.analyze_dict("functions", self.functions)
354        self.analyze_dict("variables", self.variables)
355        self.analyze_dict("structs", self.structs)
356        self.analyze_dict("typedefs", self.typedefs)
357        self.analyze_dict("macros", self.macros)
358
359class CLexer:
360    """A lexer for the C language, tokenize the input by reading and
361       analyzing it line by line"""
362    def __init__(self, input):
363        self.input = input
364        self.tokens = []
365        self.line = ""
366        self.lineno = 0
367
368    def getline(self):
369        line = ''
370        while line == '':
371            line = self.input.readline()
372            if not line:
373                return None
374            self.lineno = self.lineno + 1
375            line = line.lstrip()
376            line = line.rstrip()
377            if line == '':
378                continue
379            while line[-1] == '\\':
380                line = line[:-1]
381                n = self.input.readline()
382                self.lineno = self.lineno + 1
383                n = n.lstrip()
384                n = n.rstrip()
385                if not n:
386                    break
387                else:
388                    line = line + n
389        return line
390
391    def getlineno(self):
392        return self.lineno
393
394    def push(self, token):
395        self.tokens.insert(0, token);
396
397    def debug(self):
398        print("Last token: ", self.last)
399        print("Token queue: ", self.tokens)
400        print("Line %d end: " % (self.lineno), self.line)
401
402    def token(self):
403        while self.tokens == []:
404            if self.line == "":
405                line = self.getline()
406            else:
407                line = self.line
408                self.line = ""
409            if line == None:
410                return None
411
412            if line[0] == '#':
413                self.tokens = list(map((lambda x: ('preproc', x)),
414                                  line.split()))
415                break;
416            l = len(line)
417            if line[0] == '"' or line[0] == "'":
418                end = line[0]
419                line = line[1:]
420                found = 0
421                tok = ""
422                while found == 0:
423                    i = 0
424                    l = len(line)
425                    while i < l:
426                        if line[i] == end:
427                            self.line = line[i+1:]
428                            line = line[:i]
429                            l = i
430                            found = 1
431                            break
432                        if line[i] == '\\':
433                            i = i + 1
434                        i = i + 1
435                    tok = tok + line
436                    if found == 0:
437                        line = self.getline()
438                        if line == None:
439                            return None
440                self.last = ('string', tok)
441                return self.last
442
443            if l >= 2 and line[0] == '/' and line[1] == '*':
444                line = line[2:]
445                found = 0
446                tok = ""
447                while found == 0:
448                    i = 0
449                    l = len(line)
450                    while i < l:
451                        if line[i] == '*' and i+1 < l and line[i+1] == '/':
452                            self.line = line[i+2:]
453                            line = line[:i-1]
454                            l = i
455                            found = 1
456                            break
457                        i = i + 1
458                    if tok != "":
459                        tok = tok + "\n"
460                    tok = tok + line
461                    if found == 0:
462                        line = self.getline()
463                        if line == None:
464                            return None
465                self.last = ('comment', tok)
466                return self.last
467            if l >= 2 and line[0] == '/' and line[1] == '/':
468                line = line[2:]
469                self.last = ('comment', line)
470                return self.last
471            i = 0
472            while i < l:
473                if line[i] == '/' and i+1 < l and line[i+1] == '/':
474                    self.line = line[i:]
475                    line = line[:i]
476                    break
477                if line[i] == '/' and i+1 < l and line[i+1] == '*':
478                    self.line = line[i:]
479                    line = line[:i]
480                    break
481                if line[i] == '"' or line[i] == "'":
482                    self.line = line[i:]
483                    line = line[:i]
484                    break
485                i = i + 1
486            l = len(line)
487            i = 0
488            while i < l:
489                if line[i] == ' ' or line[i] == '\t':
490                    i = i + 1
491                    continue
492                o = ord(line[i])
493                if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \
494                   (o >= 48 and o <= 57):
495                    s = i
496                    while i < l:
497                        o = ord(line[i])
498                        if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \
499                           (o >= 48 and o <= 57) or \
500			   (" \t(){}:;,+-*/%&!|[]=><".find(line[i])) == -1:
501                            i = i + 1
502                        else:
503                            break
504                    self.tokens.append(('name', line[s:i]))
505                    continue
506                if "(){}:;,[]".find(line[i]) != -1:
507#                 if line[i] == '(' or line[i] == ')' or line[i] == '{' or \
508#                    line[i] == '}' or line[i] == ':' or line[i] == ';' or \
509#                    line[i] == ',' or line[i] == '[' or line[i] == ']':
510                    self.tokens.append(('sep', line[i]))
511                    i = i + 1
512                    continue
513                if "+-*><=/%&!|.".find(line[i]) != -1:
514#                 if line[i] == '+' or line[i] == '-' or line[i] == '*' or \
515#                    line[i] == '>' or line[i] == '<' or line[i] == '=' or \
516#                    line[i] == '/' or line[i] == '%' or line[i] == '&' or \
517#                    line[i] == '!' or line[i] == '|' or line[i] == '.':
518                    if line[i] == '.' and  i + 2 < l and \
519                       line[i+1] == '.' and line[i+2] == '.':
520                        self.tokens.append(('name', '...'))
521                        i = i + 3
522                        continue
523
524                    j = i + 1
525                    if j < l and (
526                       "+-*><=/%&!|".find(line[j]) != -1):
527#                        line[j] == '+' or line[j] == '-' or line[j] == '*' or \
528#                        line[j] == '>' or line[j] == '<' or line[j] == '=' or \
529#                        line[j] == '/' or line[j] == '%' or line[j] == '&' or \
530#                        line[j] == '!' or line[j] == '|'):
531                        self.tokens.append(('op', line[i:j+1]))
532                        i = j + 1
533                    else:
534                        self.tokens.append(('op', line[i]))
535                        i = i + 1
536                    continue
537                s = i
538                while i < l:
539                    o = ord(line[i])
540                    if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \
541                       (o >= 48 and o <= 57) or (
542                        " \t(){}:;,+-*/%&!|[]=><".find(line[i]) == -1):
543#                         line[i] != ' ' and line[i] != '\t' and
544#                         line[i] != '(' and line[i] != ')' and
545#                         line[i] != '{'  and line[i] != '}' and
546#                         line[i] != ':' and line[i] != ';' and
547#                         line[i] != ',' and line[i] != '+' and
548#                         line[i] != '-' and line[i] != '*' and
549#                         line[i] != '/' and line[i] != '%' and
550#                         line[i] != '&' and line[i] != '!' and
551#                         line[i] != '|' and line[i] != '[' and
552#                         line[i] != ']' and line[i] != '=' and
553#                         line[i] != '*' and line[i] != '>' and
554#                         line[i] != '<'):
555                        i = i + 1
556                    else:
557                        break
558                self.tokens.append(('name', line[s:i]))
559
560        tok = self.tokens[0]
561        self.tokens = self.tokens[1:]
562        self.last = tok
563        return tok
564
565class CParser:
566    """The C module parser"""
567    def __init__(self, filename, idx = None):
568        self.filename = filename
569        if len(filename) > 2 and filename[-2:] == '.h':
570            self.is_header = 1
571        else:
572            self.is_header = 0
573        self.input = open(filename)
574        self.lexer = CLexer(self.input)
575        if idx == None:
576            self.index = index()
577        else:
578            self.index = idx
579        self.top_comment = ""
580        self.last_comment = ""
581        self.comment = None
582        self.collect_ref = 0
583        self.doc_disable = 0
584        self.conditionals = []
585        self.defines = []
586
587    def collect_references(self):
588        self.collect_ref = 1
589
590    def disable(self):
591        self.doc_disable = 1
592
593    def enable(self):
594        self.doc_disable = 0
595
596    def lineno(self):
597        return self.lexer.getlineno()
598
599    def index_add(self, name, module, static, type, info=None, extra = None):
600        if self.doc_disable:
601            return
602        if self.is_header == 1:
603            self.index.add(name, module, module, static, type, self.lineno(),
604                           info, extra, self.conditionals)
605        else:
606            self.index.add(name, None, module, static, type, self.lineno(),
607                           info, extra, self.conditionals)
608
609    def index_add_ref(self, name, module, static, type, info=None,
610                      extra = None):
611        if self.is_header == 1:
612            self.index.add_ref(name, module, module, static, type,
613                               self.lineno(), info, extra, self.conditionals)
614        else:
615            self.index.add_ref(name, None, module, static, type, self.lineno(),
616                               info, extra, self.conditionals)
617
618    def warning(self, msg):
619        if self.doc_disable:
620            return
621        print(msg)
622
623    def error(self, msg, token=-1):
624        if self.doc_disable:
625            return
626
627        print("Parse Error: " + msg)
628        if token != -1:
629            print("Got token ", token)
630        self.lexer.debug()
631        sys.exit(1)
632
633    def debug(self, msg, token=-1):
634        print("Debug: " + msg)
635        if token != -1:
636            print("Got token ", token)
637        self.lexer.debug()
638
639    def parseTopComment(self, comment):
640        res = {}
641        lines = comment.split("\n")
642        item = None
643        for line in lines:
644            while line != "" and (line[0] == ' ' or line[0] == '\t'):
645                line = line[1:]
646            while line != "" and line[0] == '*':
647                line = line[1:]
648            while line != "" and (line[0] == ' ' or line[0] == '\t'):
649                line = line[1:]
650            try:
651                (it, line) = line.split(":", 1)
652                item = it
653                while line != "" and (line[0] == ' ' or line[0] == '\t'):
654                    line = line[1:]
655                if item in res:
656                    res[item] = res[item] + " " + line
657                else:
658                    res[item] = line
659            except:
660                if item != None:
661                    if item in res:
662                        res[item] = res[item] + " " + line
663                    else:
664                        res[item] = line
665        self.index.info = res
666
667    def parseComment(self, token):
668        if self.top_comment == "":
669            self.top_comment = token[1]
670        if self.comment == None or token[1][0] == '*':
671            self.comment = token[1];
672        else:
673            self.comment = self.comment + token[1]
674        token = self.lexer.token()
675
676        if self.comment.find("DOC_DISABLE") != -1:
677            self.disable()
678
679        if self.comment.find("DOC_ENABLE") != -1:
680            self.enable()
681
682        return token
683
684    #
685    # Parse a simple comment block for typedefs or global variables
686    #
687    def parseSimpleComment(self, name, quiet = False):
688        if name[0:2] == '__':
689            quiet = 1
690
691        args = []
692        desc = ""
693
694        if self.comment == None:
695            if not quiet:
696                self.warning("Missing comment for %s" % (name))
697            return(None)
698        if self.comment[0] != '*':
699            if not quiet:
700                self.warning("Missing * in comment for %s" % (name))
701            return(None)
702        lines = self.comment.split('\n')
703        if lines[0] == '*':
704            del lines[0]
705        if lines[0] != "* %s:" % (name):
706            if not quiet:
707                self.warning("Misformatted comment for %s" % (name))
708                self.warning("  Expecting '* %s:' got '%s'" % (name, lines[0]))
709            return(None)
710        del lines[0]
711        while len(lines) > 0 and lines[0] == '*':
712            del lines[0]
713        desc = ""
714        while len(lines) > 0:
715            l = lines[0]
716            while len(l) > 0 and l[0] == '*':
717                l = l[1:]
718            l = l.strip()
719            desc = desc + " " + l
720            del lines[0]
721
722        desc = desc.strip()
723
724        if quiet == 0:
725            if desc == "":
726                self.warning("Comment for %s lacks description" % (name))
727
728        return(desc)
729    #
730    # Parse a comment block associate to a macro
731    #
732    def parseMacroComment(self, name, quiet = 0):
733        if name[0:2] == '__':
734            quiet = 1
735
736        args = []
737        desc = ""
738
739        if self.comment == None:
740            if not quiet:
741                self.warning("Missing comment for macro %s" % (name))
742            return((args, desc))
743        if self.comment[0] != '*':
744            if not quiet:
745                self.warning("Missing * in macro comment for %s" % (name))
746            return((args, desc))
747        lines = self.comment.split('\n')
748        if lines[0] == '*':
749            del lines[0]
750        if lines[0] != "* %s:" % (name):
751            if not quiet:
752                self.warning("Misformatted macro comment for %s" % (name))
753                self.warning("  Expecting '* %s:' got '%s'" % (name, lines[0]))
754            return((args, desc))
755        del lines[0]
756        while lines[0] == '*':
757            del lines[0]
758        while len(lines) > 0 and lines[0][0:3] == '* @':
759            l = lines[0][3:]
760            try:
761                (arg, desc) = l.split(':', 1)
762                desc=desc.strip()
763                arg=arg.strip()
764            except:
765                if not quiet:
766                    self.warning("Misformatted macro comment for %s" % (name))
767                    self.warning("  problem with '%s'" % (lines[0]))
768                del lines[0]
769                continue
770            del lines[0]
771            l = lines[0].strip()
772            while len(l) > 2 and l[0:3] != '* @':
773                while l[0] == '*':
774                    l = l[1:]
775                desc = desc + ' ' + l.strip()
776                del lines[0]
777                if len(lines) == 0:
778                    break
779                l = lines[0]
780            args.append((arg, desc))
781        while len(lines) > 0 and lines[0] == '*':
782            del lines[0]
783        desc = ""
784        while len(lines) > 0:
785            l = lines[0]
786            while len(l) > 0 and l[0] == '*':
787                l = l[1:]
788            l = l.strip()
789            desc = desc + " " + l
790            del lines[0]
791
792        desc = desc.strip()
793
794        if quiet == 0:
795            if desc == "":
796                self.warning("Macro comment for %s lack description of the macro" % (name))
797
798        return((args, desc))
799
800     #
801     # Parse a comment block and merge the information found in the
802     # parameters descriptions, finally returns a block as complete
803     # as possible
804     #
805    def mergeFunctionComment(self, name, description, quiet = 0):
806        if name == 'main':
807            quiet = 1
808        if name[0:2] == '__':
809            quiet = 1
810
811        (ret, args) = description
812        desc = ""
813        retdesc = ""
814
815        if self.comment == None:
816            if not quiet:
817                self.warning("Missing comment for function %s" % (name))
818            return(((ret[0], retdesc), args, desc))
819        if self.comment[0] != '*':
820            if not quiet:
821                self.warning("Missing * in function comment for %s" % (name))
822            return(((ret[0], retdesc), args, desc))
823        lines = self.comment.split('\n')
824        if lines[0] == '*':
825            del lines[0]
826        if lines[0] != "* %s:" % (name):
827            if not quiet:
828                self.warning("Misformatted function comment for %s" % (name))
829                self.warning("  Expecting '* %s:' got '%s'" % (name, lines[0]))
830            return(((ret[0], retdesc), args, desc))
831        del lines[0]
832        while lines[0] == '*':
833            del lines[0]
834        nbargs = len(args)
835        while len(lines) > 0 and lines[0][0:3] == '* @':
836            l = lines[0][3:]
837            try:
838                (arg, desc) = l.split(':', 1)
839                desc=desc.strip()
840                arg=arg.strip()
841            except:
842                if not quiet:
843                    self.warning("Misformatted function comment for %s" % (name))
844                    self.warning("  problem with '%s'" % (lines[0]))
845                del lines[0]
846                continue
847            del lines[0]
848            l = lines[0].strip()
849            while len(l) > 2 and l[0:3] != '* @':
850                while l[0] == '*':
851                    l = l[1:]
852                desc = desc + ' ' + l.strip()
853                del lines[0]
854                if len(lines) == 0:
855                    break
856                l = lines[0]
857            i = 0
858            while i < nbargs:
859                if args[i][1] == arg:
860                    args[i] = (args[i][0], arg, desc)
861                    break;
862                i = i + 1
863            if i >= nbargs:
864                if not quiet:
865                    self.warning("Unable to find arg %s from function comment for %s" % (
866                       arg, name))
867        while len(lines) > 0 and lines[0] == '*':
868            del lines[0]
869        desc = ""
870        while len(lines) > 0:
871            l = lines[0]
872            while len(l) > 0 and l[0] == '*':
873                l = l[1:]
874            l = l.strip()
875            if len(l) >= 6 and  l[0:6] == "return" or l[0:6] == "Return":
876                try:
877                    l = l.split(' ', 1)[1]
878                except:
879                    l = ""
880                retdesc = l.strip()
881                del lines[0]
882                while len(lines) > 0:
883                    l = lines[0]
884                    while len(l) > 0 and l[0] == '*':
885                        l = l[1:]
886                    l = l.strip()
887                    retdesc = retdesc + " " + l
888                    del lines[0]
889            else:
890                desc = desc + " " + l
891                del lines[0]
892
893        retdesc = retdesc.strip()
894        desc = desc.strip()
895
896        if quiet == 0:
897             #
898             # report missing comments
899             #
900            i = 0
901            while i < nbargs:
902                if args[i][2] == None and args[i][0] != "void" and \
903                   ((args[i][1] != None) or (args[i][1] == '')):
904                    self.warning("Function comment for %s lacks description of arg %s" % (name, args[i][1]))
905                i = i + 1
906            if retdesc == "" and ret[0] != "void":
907                self.warning("Function comment for %s lacks description of return value" % (name))
908            if desc == "":
909                self.warning("Function comment for %s lacks description of the function" % (name))
910
911        return(((ret[0], retdesc), args, desc))
912
913    def parsePreproc(self, token):
914        if debug:
915            print("=> preproc ", token, self.lexer.tokens)
916        name = token[1]
917        if name == "#include":
918            token = self.lexer.token()
919            if token == None:
920                return None
921            if token[0] == 'preproc':
922                self.index_add(token[1], self.filename, not self.is_header,
923                                "include")
924                return self.lexer.token()
925            return token
926        if name == "#define":
927            token = self.lexer.token()
928            if token == None:
929                return None
930            if token[0] == 'preproc':
931                 # TODO macros with arguments
932                name = token[1]
933                lst = []
934                token = self.lexer.token()
935                while token != None and token[0] == 'preproc' and \
936                      token[1][0] != '#':
937                    lst.append(token[1])
938                    token = self.lexer.token()
939                try:
940                    name = name.split('(') [0]
941                except:
942                    pass
943                info = self.parseMacroComment(name, True)
944                self.index_add(name, self.filename, not self.is_header,
945                                "macro", info)
946                return token
947
948        #
949        # Processing of conditionals modified by Bill 1/1/05
950        #
951        # We process conditionals (i.e. tokens from #ifdef, #ifndef,
952        # #if, #else and #endif) for headers and mainline code,
953        # store the ones from the header in libxml2-api.xml, and later
954        # (in the routine merge_public) verify that the two (header and
955        # mainline code) agree.
956        #
957        # There is a small problem with processing the headers. Some of
958        # the variables are not concerned with enabling / disabling of
959        # library functions (e.g. '__XML_PARSER_H__'), and we don't want
960        # them to be included in libxml2-api.xml, or involved in
961        # the check between the header and the mainline code.  To
962        # accomplish this, we ignore any conditional which doesn't include
963        # the string 'ENABLED'
964        #
965        if name == "#ifdef":
966            apstr = self.lexer.tokens[0][1]
967            try:
968                self.defines.append(apstr)
969                if apstr.find('ENABLED') != -1:
970                    self.conditionals.append("defined(%s)" % apstr)
971            except:
972                pass
973        elif name == "#ifndef":
974            apstr = self.lexer.tokens[0][1]
975            try:
976                self.defines.append(apstr)
977                if apstr.find('ENABLED') != -1:
978                    self.conditionals.append("!defined(%s)" % apstr)
979            except:
980                pass
981        elif name == "#if":
982            apstr = ""
983            for tok in self.lexer.tokens:
984                if apstr != "":
985                    apstr = apstr + " "
986                apstr = apstr + tok[1]
987            try:
988                self.defines.append(apstr)
989                if apstr.find('ENABLED') != -1:
990                    self.conditionals.append(apstr)
991            except:
992                pass
993        elif name == "#else":
994            if self.conditionals != [] and \
995               self.defines[-1].find('ENABLED') != -1:
996                self.conditionals[-1] = "!(%s)" % self.conditionals[-1]
997        elif name == "#endif":
998            if self.conditionals != [] and \
999               self.defines[-1].find('ENABLED') != -1:
1000                self.conditionals = self.conditionals[:-1]
1001            self.defines = self.defines[:-1]
1002        token = self.lexer.token()
1003        while token != None and token[0] == 'preproc' and \
1004            token[1][0] != '#':
1005            token = self.lexer.token()
1006        return token
1007
1008     #
1009     # token acquisition on top of the lexer, it handle internally
1010     # preprocessor and comments since they are logically not part of
1011     # the program structure.
1012     #
1013    def token(self):
1014        global ignored_words
1015
1016        token = self.lexer.token()
1017        while token != None:
1018            if token[0] == 'comment':
1019                token = self.parseComment(token)
1020                continue
1021            elif token[0] == 'preproc':
1022                token = self.parsePreproc(token)
1023                continue
1024            elif token[0] == "name" and token[1] == "__const":
1025                token = ("name", "const")
1026                return token
1027            elif token[0] == "name" and token[1] == "__attribute":
1028                token = self.lexer.token()
1029                while token != None and token[1] != ";":
1030                    token = self.lexer.token()
1031                return token
1032            elif token[0] == "name" and token[1] in ignored_words:
1033                (n, info) = ignored_words[token[1]]
1034                i = 0
1035                while i < n:
1036                    token = self.lexer.token()
1037                    i = i + 1
1038                token = self.lexer.token()
1039                continue
1040            else:
1041                if debug:
1042                    print("=> ", token)
1043                return token
1044        return None
1045
1046     #
1047     # Parse a typedef, it records the type and its name.
1048     #
1049    def parseTypedef(self, token):
1050        if token == None:
1051            return None
1052        token = self.parseType(token)
1053        if token == None:
1054            self.error("parsing typedef")
1055            return None
1056        base_type = self.type
1057        type = base_type
1058         #self.debug("end typedef type", token)
1059        while token != None:
1060            if token[0] == "name":
1061                name = token[1]
1062                signature = self.signature
1063                if signature != None:
1064                    type = type.split('(')[0]
1065                    d = self.mergeFunctionComment(name,
1066                            ((type, None), signature), 1)
1067                    self.index_add(name, self.filename, not self.is_header,
1068                                    "functype", d)
1069                else:
1070                    if base_type == "struct":
1071                        self.index_add(name, self.filename, not self.is_header,
1072                                        "struct", type)
1073                        base_type = "struct " + name
1074                    else:
1075                        # TODO report missing or misformatted comments
1076                        info = self.parseSimpleComment(name, True)
1077                        self.index_add(name, self.filename, not self.is_header,
1078                                    "typedef", type, info)
1079                token = self.token()
1080            else:
1081                self.error("parsing typedef: expecting a name")
1082                return token
1083             #self.debug("end typedef", token)
1084            if token != None and token[0] == 'sep' and token[1] == ',':
1085                type = base_type
1086                token = self.token()
1087                while token != None and token[0] == "op":
1088                    type = type + token[1]
1089                    token = self.token()
1090            elif token != None and token[0] == 'sep' and token[1] == ';':
1091                break;
1092            elif token != None and token[0] == 'name':
1093                type = base_type
1094                continue;
1095            else:
1096                self.error("parsing typedef: expecting ';'", token)
1097                return token
1098        token = self.token()
1099        return token
1100
1101     #
1102     # Parse a C code block, used for functions it parse till
1103     # the balancing } included
1104     #
1105    def parseBlock(self, token):
1106        while token != None:
1107            if token[0] == "sep" and token[1] == "{":
1108                token = self.token()
1109                token = self.parseBlock(token)
1110            elif token[0] == "sep" and token[1] == "}":
1111                token = self.token()
1112                return token
1113            else:
1114                if self.collect_ref == 1:
1115                    oldtok = token
1116                    token = self.token()
1117                    if oldtok[0] == "name" and oldtok[1][0:3] == "xml":
1118                        if token[0] == "sep" and token[1] == "(":
1119                            self.index_add_ref(oldtok[1], self.filename,
1120                                                0, "function")
1121                            token = self.token()
1122                        elif token[0] == "name":
1123                            token = self.token()
1124                            if token[0] == "sep" and (token[1] == ";" or
1125                               token[1] == "," or token[1] == "="):
1126                                self.index_add_ref(oldtok[1], self.filename,
1127                                                    0, "type")
1128                    elif oldtok[0] == "name" and oldtok[1][0:4] == "XML_":
1129                        self.index_add_ref(oldtok[1], self.filename,
1130                                            0, "typedef")
1131                    elif oldtok[0] == "name" and oldtok[1][0:7] == "LIBXML_":
1132                        self.index_add_ref(oldtok[1], self.filename,
1133                                            0, "typedef")
1134
1135                else:
1136                    token = self.token()
1137        return token
1138
1139     #
1140     # Parse a C struct definition till the balancing }
1141     #
1142    def parseStruct(self, token):
1143        fields = []
1144         #self.debug("start parseStruct", token)
1145        while token != None:
1146            if token[0] == "sep" and token[1] == "{":
1147                token = self.token()
1148                token = self.parseTypeBlock(token)
1149            elif token[0] == "sep" and token[1] == "}":
1150                self.struct_fields = fields
1151                 #self.debug("end parseStruct", token)
1152                 #print fields
1153                token = self.token()
1154                return token
1155            else:
1156                base_type = self.type
1157                 #self.debug("before parseType", token)
1158                token = self.parseType(token)
1159                 #self.debug("after parseType", token)
1160                if token != None and token[0] == "name":
1161                    fname = token[1]
1162                    token = self.token()
1163                    if token[0] == "sep" and token[1] == ";":
1164                        self.comment = None
1165                        token = self.token()
1166                        fields.append((self.type, fname, self.comment))
1167                        self.comment = None
1168                    else:
1169                        self.error("parseStruct: expecting ;", token)
1170                elif token != None and token[0] == "sep" and token[1] == "{":
1171                    token = self.token()
1172                    token = self.parseTypeBlock(token)
1173                    if token != None and token[0] == "name":
1174                        token = self.token()
1175                    if token != None and token[0] == "sep" and token[1] == ";":
1176                        token = self.token()
1177                    else:
1178                        self.error("parseStruct: expecting ;", token)
1179                else:
1180                    self.error("parseStruct: name", token)
1181                    token = self.token()
1182                self.type = base_type;
1183        self.struct_fields = fields
1184         #self.debug("end parseStruct", token)
1185         #print fields
1186        return token
1187
1188     #
1189     # Parse a C enum block, parse till the balancing }
1190     #
1191    def parseEnumBlock(self, token):
1192        self.enums = []
1193        name = None
1194        self.comment = None
1195        comment = ""
1196        value = "0"
1197        while token != None:
1198            if token[0] == "sep" and token[1] == "{":
1199                token = self.token()
1200                token = self.parseTypeBlock(token)
1201            elif token[0] == "sep" and token[1] == "}":
1202                if name != None:
1203                    if self.comment != None:
1204                        comment = self.comment
1205                        self.comment = None
1206                    self.enums.append((name, value, comment))
1207                token = self.token()
1208                return token
1209            elif token[0] == "name":
1210                    if name != None:
1211                        if self.comment != None:
1212                            comment = self.comment.strip()
1213                            self.comment = None
1214                        self.enums.append((name, value, comment))
1215                    name = token[1]
1216                    comment = ""
1217                    token = self.token()
1218                    if token[0] == "op" and token[1][0] == "=":
1219                        value = ""
1220                        if len(token[1]) > 1:
1221                            value = token[1][1:]
1222                        token = self.token()
1223                        while token[0] != "sep" or (token[1] != ',' and
1224                              token[1] != '}'):
1225                            value = value + token[1]
1226                            token = self.token()
1227                    else:
1228                        try:
1229                            value = "%d" % (int(value) + 1)
1230                        except:
1231                            self.warning("Failed to compute value of enum %s" % (name))
1232                            value=""
1233                    if token[0] == "sep" and token[1] == ",":
1234                        token = self.token()
1235            else:
1236                token = self.token()
1237        return token
1238
1239     #
1240     # Parse a C definition block, used for structs it parse till
1241     # the balancing }
1242     #
1243    def parseTypeBlock(self, token):
1244        while token != None:
1245            if token[0] == "sep" and token[1] == "{":
1246                token = self.token()
1247                token = self.parseTypeBlock(token)
1248            elif token[0] == "sep" and token[1] == "}":
1249                token = self.token()
1250                return token
1251            else:
1252                token = self.token()
1253        return token
1254
1255     #
1256     # Parse a type: the fact that the type name can either occur after
1257     #    the definition or within the definition makes it a little harder
1258     #    if inside, the name token is pushed back before returning
1259     #
1260    def parseType(self, token):
1261        self.type = ""
1262        self.struct_fields = []
1263        self.signature = None
1264        if token == None:
1265            return token
1266
1267        while token[0] == "name" and (
1268              token[1] == "const" or \
1269              token[1] == "unsigned" or \
1270              token[1] == "signed"):
1271            if self.type == "":
1272                self.type = token[1]
1273            else:
1274                self.type = self.type + " " + token[1]
1275            token = self.token()
1276
1277        if token[0] == "name" and (token[1] == "long" or token[1] == "short"):
1278            if self.type == "":
1279                self.type = token[1]
1280            else:
1281                self.type = self.type + " " + token[1]
1282            if token[0] == "name" and token[1] == "int":
1283                if self.type == "":
1284                    self.type = tmp[1]
1285                else:
1286                    self.type = self.type + " " + tmp[1]
1287
1288        elif token[0] == "name" and token[1] == "struct":
1289            if self.type == "":
1290                self.type = token[1]
1291            else:
1292                self.type = self.type + " " + token[1]
1293            token = self.token()
1294            nametok = None
1295            if token[0] == "name":
1296                nametok = token
1297                token = self.token()
1298            if token != None and token[0] == "sep" and token[1] == "{":
1299                token = self.token()
1300                token = self.parseStruct(token)
1301            elif token != None and token[0] == "op" and token[1] == "*":
1302                self.type = self.type + " " + nametok[1] + " *"
1303                token = self.token()
1304                while token != None and token[0] == "op" and token[1] == "*":
1305                    self.type = self.type + " *"
1306                    token = self.token()
1307                if token[0] == "name":
1308                    nametok = token
1309                    token = self.token()
1310                else:
1311                    self.error("struct : expecting name", token)
1312                    return token
1313            elif token != None and token[0] == "name" and nametok != None:
1314                self.type = self.type + " " + nametok[1]
1315                return token
1316
1317            if nametok != None:
1318                self.lexer.push(token)
1319                token = nametok
1320            return token
1321
1322        elif token[0] == "name" and token[1] == "enum":
1323            if self.type == "":
1324                self.type = token[1]
1325            else:
1326                self.type = self.type + " " + token[1]
1327            self.enums = []
1328            token = self.token()
1329            if token != None and token[0] == "sep" and token[1] == "{":
1330                token = self.token()
1331                token = self.parseEnumBlock(token)
1332            else:
1333                self.error("parsing enum: expecting '{'", token)
1334            enum_type = None
1335            if token != None and token[0] != "name":
1336                self.lexer.push(token)
1337                token = ("name", "enum")
1338            else:
1339                enum_type = token[1]
1340            for enum in self.enums:
1341                self.index_add(enum[0], self.filename,
1342                               not self.is_header, "enum",
1343                               (enum[1], enum[2], enum_type))
1344            return token
1345
1346        elif token[0] == "name":
1347            if self.type == "":
1348                self.type = token[1]
1349            else:
1350                self.type = self.type + " " + token[1]
1351        else:
1352            self.error("parsing type %s: expecting a name" % (self.type),
1353                       token)
1354            return token
1355        token = self.token()
1356        while token != None and (token[0] == "op" or
1357              token[0] == "name" and token[1] == "const"):
1358            self.type = self.type + " " + token[1]
1359            token = self.token()
1360
1361         #
1362         # if there is a parenthesis here, this means a function type
1363         #
1364        if token != None and token[0] == "sep" and token[1] == '(':
1365            self.type = self.type + token[1]
1366            token = self.token()
1367            while token != None and token[0] == "op" and token[1] == '*':
1368                self.type = self.type + token[1]
1369                token = self.token()
1370            if token == None or token[0] != "name" :
1371                self.error("parsing function type, name expected", token);
1372                return token
1373            self.type = self.type + token[1]
1374            nametok = token
1375            token = self.token()
1376            if token != None and token[0] == "sep" and token[1] == ')':
1377                self.type = self.type + token[1]
1378                token = self.token()
1379                if token != None and token[0] == "sep" and token[1] == '(':
1380                    token = self.token()
1381                    type = self.type;
1382                    token = self.parseSignature(token);
1383                    self.type = type;
1384                else:
1385                    self.error("parsing function type, '(' expected", token);
1386                    return token
1387            else:
1388                self.error("parsing function type, ')' expected", token);
1389                return token
1390            self.lexer.push(token)
1391            token = nametok
1392            return token
1393
1394         #
1395         # do some lookahead for arrays
1396         #
1397        if token != None and token[0] == "name":
1398            nametok = token
1399            token = self.token()
1400            if token != None and token[0] == "sep" and token[1] == '[':
1401                self.type = self.type + nametok[1]
1402                while token != None and token[0] == "sep" and token[1] == '[':
1403                    self.type = self.type + token[1]
1404                    token = self.token()
1405                    while token != None and token[0] != 'sep' and \
1406                          token[1] != ']' and token[1] != ';':
1407                        self.type = self.type + token[1]
1408                        token = self.token()
1409                if token != None and token[0] == 'sep' and token[1] == ']':
1410                    self.type = self.type + token[1]
1411                    token = self.token()
1412                else:
1413                    self.error("parsing array type, ']' expected", token);
1414                    return token
1415            elif token != None and token[0] == "sep" and token[1] == ':':
1416                 # remove :12 in case it's a limited int size
1417                token = self.token()
1418                token = self.token()
1419            self.lexer.push(token)
1420            token = nametok
1421
1422        return token
1423
1424     #
1425     # Parse a signature: '(' has been parsed and we scan the type definition
1426     #    up to the ')' included
1427    def parseSignature(self, token):
1428        signature = []
1429        if token != None and token[0] == "sep" and token[1] == ')':
1430            self.signature = []
1431            token = self.token()
1432            return token
1433        while token != None:
1434            token = self.parseType(token)
1435            if token != None and token[0] == "name":
1436                signature.append((self.type, token[1], None))
1437                token = self.token()
1438            elif token != None and token[0] == "sep" and token[1] == ',':
1439                token = self.token()
1440                continue
1441            elif token != None and token[0] == "sep" and token[1] == ')':
1442                 # only the type was provided
1443                if self.type == "...":
1444                    signature.append((self.type, "...", None))
1445                else:
1446                    signature.append((self.type, None, None))
1447            if token != None and token[0] == "sep":
1448                if token[1] == ',':
1449                    token = self.token()
1450                    continue
1451                elif token[1] == ')':
1452                    token = self.token()
1453                    break
1454        self.signature = signature
1455        return token
1456
1457     #
1458     # Parse a global definition, be it a type, variable or function
1459     # the extern "C" blocks are a bit nasty and require it to recurse.
1460     #
1461    def parseGlobal(self, token):
1462        static = 0
1463        if token[1] == 'extern':
1464            token = self.token()
1465            if token == None:
1466                return token
1467            if token[0] == 'string':
1468                if token[1] == 'C':
1469                    token = self.token()
1470                    if token == None:
1471                        return token
1472                    if token[0] == 'sep' and token[1] == "{":
1473                        token = self.token()
1474#                         print 'Entering extern "C line ', self.lineno()
1475                        while token != None and (token[0] != 'sep' or
1476                              token[1] != "}"):
1477                            if token[0] == 'name':
1478                                token = self.parseGlobal(token)
1479                            else:
1480                                self.error(
1481                                 "token %s %s unexpected at the top level" % (
1482                                        token[0], token[1]))
1483                                token = self.parseGlobal(token)
1484#                         print 'Exiting extern "C" line', self.lineno()
1485                        token = self.token()
1486                        return token
1487                else:
1488                    return token
1489        elif token[1] == 'static':
1490            static = 1
1491            token = self.token()
1492            if token == None or  token[0] != 'name':
1493                return token
1494
1495        if token[1] == 'typedef':
1496            token = self.token()
1497            return self.parseTypedef(token)
1498        else:
1499            token = self.parseType(token)
1500            type_orig = self.type
1501        if token == None or token[0] != "name":
1502            return token
1503        type = type_orig
1504        self.name = token[1]
1505        token = self.token()
1506        while token != None and (token[0] == "sep" or token[0] == "op"):
1507            if token[0] == "sep":
1508                if token[1] == "[":
1509                    type = type + token[1]
1510                    token = self.token()
1511                    while token != None and (token[0] != "sep" or \
1512                          token[1] != ";"):
1513                        type = type + token[1]
1514                        token = self.token()
1515
1516            if token != None and token[0] == "op" and token[1] == "=":
1517                 #
1518                 # Skip the initialization of the variable
1519                 #
1520                token = self.token()
1521                if token[0] == 'sep' and token[1] == '{':
1522                    token = self.token()
1523                    token = self.parseBlock(token)
1524                else:
1525                    while token != None and (token[0] != "sep" or \
1526                          (token[1] != ';' and token[1] != ',')):
1527                            token = self.token()
1528                if token == None or token[0] != "sep" or (token[1] != ';' and
1529                   token[1] != ','):
1530                    self.error("missing ';' or ',' after value")
1531
1532            if token != None and token[0] == "sep":
1533                if token[1] == ";":
1534                    if type == "struct":
1535                        self.index_add(self.name, self.filename,
1536                             not self.is_header, "struct", self.struct_fields)
1537                    else:
1538                        info = self.parseSimpleComment(self.name, True)
1539                        self.index_add(self.name, self.filename,
1540                             not self.is_header, "variable", type, info)
1541                    self.comment = None
1542                    token = self.token()
1543                    break
1544                elif token[1] == "(":
1545                    token = self.token()
1546                    token = self.parseSignature(token)
1547                    if token == None:
1548                        return None
1549                    if token[0] == "sep" and token[1] == ";":
1550                        d = self.mergeFunctionComment(self.name,
1551                                ((type, None), self.signature), 1)
1552                        self.index_add(self.name, self.filename, static,
1553                                        "function", d)
1554                        self.comment = None
1555                        token = self.token()
1556                    elif token[0] == "sep" and token[1] == "{":
1557                        d = self.mergeFunctionComment(self.name,
1558                                ((type, None), self.signature), static)
1559                        self.index_add(self.name, self.filename, static,
1560                                        "function", d)
1561                        self.comment = None
1562                        token = self.token()
1563                        token = self.parseBlock(token);
1564                elif token[1] == ',':
1565                    self.index_add(self.name, self.filename, static,
1566                                    "variable", type)
1567                    self.comment = None
1568                    type = type_orig
1569                    token = self.token()
1570                    while token != None and token[0] == "sep":
1571                        type = type + token[1]
1572                        token = self.token()
1573                    if token != None and token[0] == "name":
1574                        self.name = token[1]
1575                        token = self.token()
1576                else:
1577                    break
1578
1579        return token
1580
1581    def parse(self):
1582        self.warning("Parsing %s" % (self.filename))
1583        token = self.token()
1584        while token != None:
1585            if token[0] == 'name':
1586                token = self.parseGlobal(token)
1587            else:
1588                self.error("token %s %s unexpected at the top level" % (
1589                       token[0], token[1]))
1590                token = self.parseGlobal(token)
1591                return
1592        self.parseTopComment(self.top_comment)
1593        return self.index
1594
1595
1596class docBuilder:
1597    """A documentation builder"""
1598    def __init__(self, name, directories=['.'], excludes=[]):
1599        self.name = name
1600        self.directories = directories
1601        self.excludes = excludes + list(ignored_files.keys())
1602        self.modules = {}
1603        self.headers = {}
1604        self.idx = index()
1605        self.index = {}
1606        if name == 'libxml2':
1607            self.basename = 'libxml'
1608        else:
1609            self.basename = name
1610
1611    def analyze(self):
1612        print("Project %s : %d headers, %d modules" % (self.name, len(list(self.headers.keys())), len(list(self.modules.keys()))))
1613        self.idx.analyze()
1614
1615    def scanHeaders(self):
1616        for header in list(self.headers.keys()):
1617            parser = CParser(header)
1618            idx = parser.parse()
1619            self.headers[header] = idx;
1620            self.idx.merge(idx)
1621
1622    def scanModules(self):
1623        for module in list(self.modules.keys()):
1624            parser = CParser(module)
1625            idx = parser.parse()
1626            # idx.analyze()
1627            self.modules[module] = idx
1628            self.idx.merge_public(idx)
1629
1630    def scan(self):
1631        for directory in self.directories:
1632            files = glob.glob(directory + "/*.c")
1633            for file in files:
1634                skip = 0
1635                for excl in self.excludes:
1636                    if file.find(excl) != -1:
1637                        print("Skipping %s" % file)
1638                        skip = 1
1639                        break
1640                if skip == 0:
1641                    self.modules[file] = None;
1642            files = glob.glob(directory + "/*.h")
1643            for file in files:
1644                skip = 0
1645                for excl in self.excludes:
1646                    if file.find(excl) != -1:
1647                        print("Skipping %s" % file)
1648                        skip = 1
1649                        break
1650                if skip == 0:
1651                    self.headers[file] = None;
1652        self.scanHeaders()
1653        self.scanModules()
1654
1655    def modulename_file(self, file):
1656        module = os.path.basename(file)
1657        if module[-2:] == '.h':
1658            module = module[:-2]
1659        elif module[-2:] == '.c':
1660            module = module[:-2]
1661        return module
1662
1663    def serialize_enum(self, output, name):
1664        id = self.idx.enums[name]
1665        output.write("    <enum name='%s' file='%s'" % (name,
1666                     self.modulename_file(id.header)))
1667        if id.info != None:
1668            info = id.info
1669            if info[0] != None and info[0] != '':
1670                try:
1671                    val = eval(info[0])
1672                except:
1673                    val = info[0]
1674                output.write(" value='%s'" % (val));
1675            if info[2] != None and info[2] != '':
1676                output.write(" type='%s'" % info[2]);
1677            if info[1] != None and info[1] != '':
1678                output.write(" info='%s'" % escape(info[1]));
1679        output.write("/>\n")
1680
1681    def serialize_macro(self, output, name):
1682        id = self.idx.macros[name]
1683        output.write("    <macro name='%s' file='%s'>\n" % (name,
1684                     self.modulename_file(id.header)))
1685        if id.info != None:
1686            try:
1687                (args, desc) = id.info
1688                if desc != None and desc != "":
1689                    output.write("      <info>%s</info>\n" % (escape(desc)))
1690                for arg in args:
1691                    (name, desc) = arg
1692                    if desc != None and desc != "":
1693                        output.write("      <arg name='%s' info='%s'/>\n" % (
1694                                     name, escape(desc)))
1695                    else:
1696                        output.write("      <arg name='%s'/>\n" % (name))
1697            except:
1698                pass
1699        output.write("    </macro>\n")
1700
1701    def serialize_typedef(self, output, name):
1702        id = self.idx.typedefs[name]
1703        if id.info[0:7] == 'struct ':
1704            output.write("    <struct name='%s' file='%s' type='%s'" % (
1705                     name, self.modulename_file(id.header), id.info))
1706            name = id.info[7:]
1707            if name in self.idx.structs and ( \
1708               type(self.idx.structs[name].info) == type(()) or
1709                type(self.idx.structs[name].info) == type([])):
1710                output.write(">\n");
1711                try:
1712                    for field in self.idx.structs[name].info:
1713                        desc = field[2]
1714                        if desc == None:
1715                            desc = ''
1716                        else:
1717                            desc = escape(desc)
1718                        output.write("      <field name='%s' type='%s' info='%s'/>\n" % (field[1] , field[0], desc))
1719                except:
1720                    print("Failed to serialize struct %s" % (name))
1721                output.write("    </struct>\n")
1722            else:
1723                output.write("/>\n");
1724        else :
1725            output.write("    <typedef name='%s' file='%s' type='%s'" % (
1726                         name, self.modulename_file(id.header), id.info))
1727            try:
1728                desc = id.extra
1729                if desc != None and desc != "":
1730                    output.write(">\n      <info>%s</info>\n" % (escape(desc)))
1731                    output.write("    </typedef>\n")
1732                else:
1733                    output.write("/>\n")
1734            except:
1735                output.write("/>\n")
1736
1737    def serialize_variable(self, output, name):
1738        id = self.idx.variables[name]
1739        if id.info != None:
1740            output.write("    <variable name='%s' file='%s' type='%s'" % (
1741                    name, self.modulename_file(id.header), id.info))
1742        else:
1743            output.write("    <variable name='%s' file='%s'" % (
1744                    name, self.modulename_file(id.header)))
1745        desc = id.extra
1746        if desc != None and desc != "":
1747            output.write(">\n      <info>%s</info>\n" % (escape(desc)))
1748            output.write("    </variable>\n")
1749        else:
1750            output.write("/>\n")
1751
1752    def serialize_function(self, output, name):
1753        id = self.idx.functions[name]
1754        if name == debugsym:
1755            print("=>", id)
1756
1757        output.write("    <%s name='%s' file='%s' module='%s'>\n" % (id.type,
1758                     name, self.modulename_file(id.header),
1759                     self.modulename_file(id.module)))
1760        #
1761        # Processing of conditionals modified by Bill 1/1/05
1762        #
1763        if id.conditionals != None:
1764            apstr = ""
1765            for cond in id.conditionals:
1766                if apstr != "":
1767                    apstr = apstr + " &amp;&amp; "
1768                apstr = apstr + cond
1769            output.write("      <cond>%s</cond>\n"% (apstr));
1770        try:
1771            (ret, params, desc) = id.info
1772            if (desc == None or desc == '') and \
1773               name[0:9] != "xmlThrDef" and name != "xmlDllMain":
1774                print("%s %s from %s has no description" % (id.type, name,
1775                       self.modulename_file(id.module)))
1776
1777            output.write("      <info>%s</info>\n" % (escape(desc)))
1778            if ret[0] != None:
1779                if ret[0] == "void":
1780                    output.write("      <return type='void'/>\n")
1781                else:
1782                    output.write("      <return type='%s' info='%s'/>\n" % (
1783                             ret[0], escape(ret[1])))
1784            for param in params:
1785                if param[0] == 'void':
1786                    continue
1787                if param[2] == None:
1788                    output.write("      <arg name='%s' type='%s' info=''/>\n" % (param[1], param[0]))
1789                else:
1790                    output.write("      <arg name='%s' type='%s' info='%s'/>\n" % (param[1], param[0], escape(param[2])))
1791        except:
1792            print("Failed to save function %s info: " % name, repr(id.info))
1793        output.write("    </%s>\n" % (id.type))
1794
1795    def serialize_exports(self, output, file):
1796        module = self.modulename_file(file)
1797        output.write("    <file name='%s'>\n" % (module))
1798        dict = self.headers[file]
1799        if dict.info != None:
1800            for data in ('Summary', 'Description', 'Author'):
1801                try:
1802                    output.write("     <%s>%s</%s>\n" % (
1803                                 data.lower(),
1804                                 escape(dict.info[data]),
1805                                 data.lower()))
1806                except:
1807                    print("Header %s lacks a %s description" % (module, data))
1808            if 'Description' in dict.info:
1809                desc = dict.info['Description']
1810                if desc.find("DEPRECATED") != -1:
1811                    output.write("     <deprecated/>\n")
1812
1813        ids = list(dict.macros.keys())
1814        ids.sort()
1815        for id in ids:
1816            # Macros are sometime used to masquerade other types.
1817            if id in dict.functions:
1818                continue
1819            if id in dict.variables:
1820                continue
1821            if id in dict.typedefs:
1822                continue
1823            if id in dict.structs:
1824                continue
1825            if id in dict.enums:
1826                continue
1827            output.write("     <exports symbol='%s' type='macro'/>\n" % (id))
1828        ids = list(dict.enums.keys())
1829        ids.sort()
1830        for id in ids:
1831            output.write("     <exports symbol='%s' type='enum'/>\n" % (id))
1832        ids = list(dict.typedefs.keys())
1833        ids.sort()
1834        for id in ids:
1835            output.write("     <exports symbol='%s' type='typedef'/>\n" % (id))
1836        ids = list(dict.structs.keys())
1837        ids.sort()
1838        for id in ids:
1839            output.write("     <exports symbol='%s' type='struct'/>\n" % (id))
1840        ids = list(dict.variables.keys())
1841        ids.sort()
1842        for id in ids:
1843            output.write("     <exports symbol='%s' type='variable'/>\n" % (id))
1844        ids = list(dict.functions.keys())
1845        ids.sort()
1846        for id in ids:
1847            output.write("     <exports symbol='%s' type='function'/>\n" % (id))
1848        output.write("    </file>\n")
1849
1850    def serialize(self):
1851        filename = "%s-api.xml" % self.name
1852        print("Saving XML description %s" % (filename))
1853        output = open(filename, "w")
1854        output.write('<?xml version="1.0" encoding="ISO-8859-1"?>\n')
1855        output.write("<api name='%s'>\n" % self.name)
1856        output.write("  <files>\n")
1857        headers = list(self.headers.keys())
1858        headers.sort()
1859        for file in headers:
1860            self.serialize_exports(output, file)
1861        output.write("  </files>\n")
1862        output.write("  <symbols>\n")
1863        macros = list(self.idx.macros.keys())
1864        macros.sort()
1865        for macro in macros:
1866            self.serialize_macro(output, macro)
1867        enums = list(self.idx.enums.keys())
1868        enums.sort()
1869        for enum in enums:
1870            self.serialize_enum(output, enum)
1871        typedefs = list(self.idx.typedefs.keys())
1872        typedefs.sort()
1873        for typedef in typedefs:
1874            self.serialize_typedef(output, typedef)
1875        variables = list(self.idx.variables.keys())
1876        variables.sort()
1877        for variable in variables:
1878            self.serialize_variable(output, variable)
1879        functions = list(self.idx.functions.keys())
1880        functions.sort()
1881        for function in functions:
1882            self.serialize_function(output, function)
1883        output.write("  </symbols>\n")
1884        output.write("</api>\n")
1885        output.close()
1886
1887
1888def rebuild():
1889    builder = None
1890    if glob.glob("parser.c") != [] :
1891        print("Rebuilding API description for libxml2")
1892        builder = docBuilder("libxml2", [".", "."],
1893                             ["tst.c"])
1894    elif glob.glob("../parser.c") != [] :
1895        print("Rebuilding API description for libxml2")
1896        builder = docBuilder("libxml2", ["..", "../include/libxml"],
1897                             ["tst.c"])
1898    elif glob.glob("../libxslt/transform.c") != [] :
1899        print("Rebuilding API description for libxslt")
1900        builder = docBuilder("libxslt", ["../libxslt"],
1901                             ["win32config.h", "libxslt.h", "tst.c"])
1902    else:
1903        print("rebuild() failed, unable to guess the module")
1904        return None
1905    builder.scan()
1906    builder.analyze()
1907    builder.serialize()
1908    if glob.glob("../libexslt/exslt.c") != [] :
1909        extra = docBuilder("libexslt", ["../libexslt"], ["libexslt.h"])
1910        extra.scan()
1911        extra.analyze()
1912        extra.serialize()
1913    return builder
1914
1915#
1916# for debugging the parser
1917#
1918def parse(filename):
1919    parser = CParser(filename)
1920    idx = parser.parse()
1921    return idx
1922
1923if __name__ == "__main__":
1924    if len(sys.argv) > 1:
1925        debug = 1
1926        parse(sys.argv[1])
1927    else:
1928        rebuild()
1929